jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import tempfile
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30	from zipimport import zipimporter
	31
	32	from .compat import (
	33	compat_basestring,
	34	compat_cookiejar,
	35	compat_get_terminal_size,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DOT_DESKTOP_LINK_TEMPLATE,
	55	DOT_URL_LINK_TEMPLATE,
	56	DOT_WEBLOC_LINK_TEMPLATE,
	57	DownloadError,
	58	encode_compat_str,
	59	encodeFilename,
	60	EntryNotInPlaylist,
	61	error_to_compat_str,
	62	ExistingVideoReached,
	63	expand_path,
	64	ExtractorError,
	65	float_or_none,
	66	format_bytes,
	67	format_field,
	68	STR_FORMAT_RE,
	69	formatSeconds,
	70	GeoRestrictedError,
	71	HEADRequest,
	72	int_or_none,
	73	iri_to_uri,
	74	ISO3166Utils,
	75	LazyList,
	76	locked_file,
	77	make_dir,
	78	make_HTTPS_handler,
	79	MaxDownloadsReached,
	80	network_exceptions,
	81	orderedSet,
	82	OUTTMPL_TYPES,
	83	PagedList,
	84	parse_filesize,
	85	PerRequestProxyHandler,
	86	platform_name,
	87	PostProcessingError,
	88	preferredencoding,
	89	prepend_extension,
	90	process_communicate_or_kill,
	91	register_socks_protocols,
	92	RejectedVideoReached,
	93	render_table,
	94	replace_extension,
	95	SameFileError,
	96	sanitize_filename,
	97	sanitize_path,
	98	sanitize_url,
	99	sanitized_Request,
	100	std_headers,
	101	str_or_none,
	102	strftime_or_none,
	103	subtitles_filename,
	104	ThrottledDownload,
	105	to_high_limit_path,
	106	traverse_obj,
	107	try_get,
	108	UnavailableVideoError,
	109	url_basename,
	110	version_tuple,
	111	write_json_file,
	112	write_string,
	113	YoutubeDLCookieJar,
	114	YoutubeDLCookieProcessor,
	115	YoutubeDLHandler,
	116	YoutubeDLRedirectHandler,
	117	)
	118	from .cache import Cache
	119	from .extractor import (
	120	gen_extractor_classes,
	121	get_info_extractor,
	122	_LAZY_LOADER,
	123	_PLUGIN_CLASSES
	124	)
	125	from .extractor.openload import PhantomJSwrapper
	126	from .downloader import (
	127	get_suitable_downloader,
	128	shorten_protocol_name
	129	)
	130	from .downloader.rtmp import rtmpdump_version
	131	from .postprocessor import (
	132	get_postprocessor,
	133	FFmpegFixupDurationPP,
	134	FFmpegFixupM3u8PP,
	135	FFmpegFixupM4aPP,
	136	FFmpegFixupStretchedPP,
	137	FFmpegFixupTimestampPP,
	138	FFmpegMergerPP,
	139	FFmpegPostProcessor,
	140	MoveFilesAfterDownloadPP,
	141	)
	142	from .version import __version__
	143
	144	if compat_os_name == 'nt':
	145	import ctypes
	146
	147
	148	class YoutubeDL(object):
	149	"""YoutubeDL class.
	150
	151	YoutubeDL objects are the ones responsible of downloading the
	152	actual video file and writing it to disk if the user has requested
	153	it, among some other tasks. In most cases there should be one per
	154	program. As, given a video URL, the downloader doesn't know how to
	155	extract all the needed information, task that InfoExtractors do, it
	156	has to pass the URL to one of them.
	157
	158	For this, YoutubeDL objects have a method that allows
	159	InfoExtractors to be registered in a given order. When it is passed
	160	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	161	finds that reports being able to handle it. The InfoExtractor extracts
	162	all the information about the video or videos the URL refers to, and
	163	YoutubeDL process the extracted information, possibly using a File
	164	Downloader to download the video.
	165
	166	YoutubeDL objects accept a lot of parameters. In order not to saturate
	167	the object constructor with arguments, it receives a dictionary of
	168	options instead. These options are available through the params
	169	attribute for the InfoExtractors to use. The YoutubeDL also
	170	registers itself as the downloader in charge for the InfoExtractors
	171	that are added to it, so this is a "mutual registration".
	172
	173	Available options:
	174
	175	username: Username for authentication purposes.
	176	password: Password for authentication purposes.
	177	videopassword: Password for accessing a video.
	178	ap_mso: Adobe Pass multiple-system operator identifier.
	179	ap_username: Multiple-system operator account username.
	180	ap_password: Multiple-system operator account password.
	181	usenetrc: Use netrc for authentication instead.
	182	verbose: Print additional info to stdout.
	183	quiet: Do not print messages to stdout.
	184	no_warnings: Do not print out anything for warnings.
	185	forceprint: A list of templates to force print
	186	forceurl: Force printing final URL. (Deprecated)
	187	forcetitle: Force printing title. (Deprecated)
	188	forceid: Force printing ID. (Deprecated)
	189	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	190	forcedescription: Force printing description. (Deprecated)
	191	forcefilename: Force printing final filename. (Deprecated)
	192	forceduration: Force printing duration. (Deprecated)
	193	forcejson: Force printing info_dict as JSON.
	194	dump_single_json: Force printing the info_dict of the whole playlist
	195	(or video) as a single JSON line.
	196	force_write_download_archive: Force writing download archive regardless
	197	of 'skip_download' or 'simulate'.
	198	simulate: Do not download the video files.
	199	format: Video format code. see "FORMAT SELECTION" for more details.
	200	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	201	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	202	extracting metadata even if the video is not actually
	203	available for download (experimental)
	204	format_sort: How to sort the video formats. see "Sorting Formats"
	205	for more details.
	206	format_sort_force: Force the given format_sort. see "Sorting Formats"
	207	for more details.
	208	allow_multiple_video_streams: Allow multiple video streams to be merged
	209	into a single file
	210	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	211	into a single file
	212	paths: Dictionary of output paths. The allowed keys are 'home'
	213	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	214	outtmpl: Dictionary of templates for output names. Allowed keys
	215	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	216	A string a also accepted for backward compatibility
	217	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	218	restrictfilenames: Do not allow "&" and spaces in file names
	219	trim_file_name: Limit length of filename (extension excluded)
	220	windowsfilenames: Force the filenames to be windows compatible
	221	ignoreerrors: Do not stop on download errors
	222	(Default True when running yt-dlp,
	223	but False when directly accessing YoutubeDL class)
	224	skip_playlist_after_errors: Number of allowed failures until the rest of
	225	the playlist is skipped
	226	force_generic_extractor: Force downloader to use the generic extractor
	227	overwrites: Overwrite all video and metadata files if True,
	228	overwrite only non-video files if None
	229	and don't overwrite any file if False
	230	playliststart: Playlist item to start at.
	231	playlistend: Playlist item to end at.
	232	playlist_items: Specific indices of playlist to download.
	233	playlistreverse: Download playlist items in reverse order.
	234	playlistrandom: Download playlist items in random order.
	235	matchtitle: Download only matching titles.
	236	rejecttitle: Reject downloads for matching titles.
	237	logger: Log messages to a logging.Logger instance.
	238	logtostderr: Log messages to stderr instead of stdout.
	239	writedescription: Write the video description to a .description file
	240	writeinfojson: Write the video description to a .info.json file
	241	clean_infojson: Remove private fields from the infojson
	242	writecomments: Extract video comments. This will not be written to disk
	243	unless writeinfojson is also given
	244	writeannotations: Write the video annotations to a .annotations.xml file
	245	writethumbnail: Write the thumbnail image to a file
	246	allow_playlist_files: Whether to write playlists' description, infojson etc
	247	also to disk when using the 'write*' options
	248	write_all_thumbnails: Write all thumbnail formats to files
	249	writelink: Write an internet shortcut file, depending on the
	250	current platform (.url/.webloc/.desktop)
	251	writeurllink: Write a Windows internet shortcut file (.url)
	252	writewebloclink: Write a macOS internet shortcut file (.webloc)
	253	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	254	writesubtitles: Write the video subtitles to a file
	255	writeautomaticsub: Write the automatically generated subtitles to a file
	256	allsubtitles: Deprecated - Use subtitlelangs = ['all']
	257	Downloads all the subtitles of the video
	258	(requires writesubtitles or writeautomaticsub)
	259	listsubtitles: Lists all available subtitles for the video
	260	subtitlesformat: The format code for subtitles
	261	subtitleslangs: List of languages of the subtitles to download (can be regex).
	262	The list may contain "all" to refer to all the available
	263	subtitles. The language can be prefixed with a "-" to
	264	exclude it from the requested languages. Eg: ['all', '-live_chat']
	265	keepvideo: Keep the video file after post-processing
	266	daterange: A DateRange object, download only if the upload_date is in the range.
	267	skip_download: Skip the actual download of the video file
	268	cachedir: Location of the cache files in the filesystem.
	269	False to disable filesystem cache.
	270	noplaylist: Download single video instead of a playlist if in doubt.
	271	age_limit: An integer representing the user's age in years.
	272	Unsuitable videos for the given age are skipped.
	273	min_views: An integer representing the minimum view count the video
	274	must have in order to not be skipped.
	275	Videos without view count information are always
	276	downloaded. None for no limit.
	277	max_views: An integer representing the maximum view count.
	278	Videos that are more popular than that are not
	279	downloaded.
	280	Videos without view count information are always
	281	downloaded. None for no limit.
	282	download_archive: File name of a file where all downloads are recorded.
	283	Videos already present in the file are not downloaded
	284	again.
	285	break_on_existing: Stop the download process after attempting to download a
	286	file that is in the archive.
	287	break_on_reject: Stop the download process when encountering a video that
	288	has been filtered out.
	289	cookiefile: File name where cookies should be read from and dumped to
	290	nocheckcertificate:Do not verify SSL certificates
	291	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	292	At the moment, this is only supported by YouTube.
	293	proxy: URL of the proxy server to use
	294	geo_verification_proxy: URL of the proxy to use for IP address verification
	295	on geo-restricted sites.
	296	socket_timeout: Time to wait for unresponsive hosts, in seconds
	297	bidi_workaround: Work around buggy terminals without bidirectional text
	298	support, using fridibi
	299	debug_printtraffic:Print out sent and received HTTP traffic
	300	include_ads: Download ads as well
	301	default_search: Prepend this string if an input url is not valid.
	302	'auto' for elaborate guessing
	303	encoding: Use this encoding instead of the system-specified.
	304	extract_flat: Do not resolve URLs, return the immediate result.
	305	Pass in 'in_playlist' to only show this behavior for
	306	playlist items.
	307	postprocessors: A list of dictionaries, each with an entry
	308	* key: The name of the postprocessor. See
	309	yt_dlp/postprocessor/__init__.py for a list.
	310	* when: When to run the postprocessor. Can be one of
	311	pre_process\|before_dl\|post_process\|after_move.
	312	Assumed to be 'post_process' if not given
	313	post_hooks: A list of functions that get called as the final step
	314	for each video file, after all postprocessors have been
	315	called. The filename will be passed as the only argument.
	316	progress_hooks: A list of functions that get called on download
	317	progress, with a dictionary with the entries
	318	* status: One of "downloading", "error", or "finished".
	319	Check this first and ignore unknown values.
	320
	321	If status is one of "downloading", or "finished", the
	322	following properties may also be present:
	323	* filename: The final filename (always present)
	324	* tmpfilename: The filename we're currently writing to
	325	* downloaded_bytes: Bytes on disk
	326	* total_bytes: Size of the whole file, None if unknown
	327	* total_bytes_estimate: Guess of the eventual file size,
	328	None if unavailable.
	329	* elapsed: The number of seconds since download started.
	330	* eta: The estimated time in seconds, None if unknown
	331	* speed: The download speed in bytes/second, None if
	332	unknown
	333	* fragment_index: The counter of the currently
	334	downloaded video fragment.
	335	* fragment_count: The number of fragments (= individual
	336	files that will be merged)
	337
	338	Progress hooks are guaranteed to be called at least once
	339	(with status "finished") if the download is successful.
	340	merge_output_format: Extension to use when merging formats.
	341	final_ext: Expected final extension; used to detect when the file was
	342	already downloaded and converted. "merge_output_format" is
	343	replaced by this extension when given
	344	fixup: Automatically correct known faults of the file.
	345	One of:
	346	- "never": do nothing
	347	- "warn": only emit a warning
	348	- "detect_or_warn": check whether we can do anything
	349	about it, warn otherwise (default)
	350	source_address: Client-side IP address to bind to.
	351	call_home: Boolean, true iff we are allowed to contact the
	352	yt-dlp servers for debugging. (BROKEN)
	353	sleep_interval_requests: Number of seconds to sleep between requests
	354	during extraction
	355	sleep_interval: Number of seconds to sleep before each download when
	356	used alone or a lower bound of a range for randomized
	357	sleep before each download (minimum possible number
	358	of seconds to sleep) when used along with
	359	max_sleep_interval.
	360	max_sleep_interval:Upper bound of a range for randomized sleep before each
	361	download (maximum possible number of seconds to sleep).
	362	Must only be used along with sleep_interval.
	363	Actual sleep time will be a random float from range
	364	[sleep_interval; max_sleep_interval].
	365	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	366	listformats: Print an overview of available video formats and exit.
	367	list_thumbnails: Print a table of all thumbnails and exit.
	368	match_filter: A function that gets called with the info_dict of
	369	every video.
	370	If it returns a message, the video is ignored.
	371	If it returns None, the video is downloaded.
	372	match_filter_func in utils.py is one example for this.
	373	no_color: Do not emit color codes in output.
	374	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	375	HTTP header
	376	geo_bypass_country:
	377	Two-letter ISO 3166-2 country code that will be used for
	378	explicit geographic restriction bypassing via faking
	379	X-Forwarded-For HTTP header
	380	geo_bypass_ip_block:
	381	IP range in CIDR notation that will be used similarly to
	382	geo_bypass_country
	383
	384	The following options determine which downloader is picked:
	385	external_downloader: A dictionary of protocol keys and the executable of the
	386	external downloader to use for it. The allowed protocols
	387	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	388	Set the value to 'native' to use the native downloader
	389	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	390	or {'m3u8': 'ffmpeg'} instead.
	391	Use the native HLS downloader instead of ffmpeg/avconv
	392	if True, otherwise use ffmpeg/avconv if False, otherwise
	393	use downloader suggested by extractor if None.
	394	compat_opts: Compatibility options. See "Differences in default behavior".
	395	The following options do not work when used through the API:
	396	filename, abort-on-error, multistreams, no-live-chat,
	397	no-playlist-metafiles. Refer __init__.py for their implementation
	398
	399	The following parameters are not used by YoutubeDL itself, they are used by
	400	the downloader (see yt_dlp/downloader/common.py):
	401	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	402	max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
	403	xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
	404
	405	The following options are used by the post processors:
	406	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	407	otherwise prefer ffmpeg. (avconv support is deprecated)
	408	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	409	to the binary or its containing directory.
	410	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	411	and a list of additional command-line arguments for the
	412	postprocessor/executable. The dict can also have "PP+EXE" keys
	413	which are used when the given exe is used by the given PP.
	414	Use 'default' as the name for arguments to passed to all PP
	415
	416	The following options are used by the extractors:
	417	extractor_retries: Number of times to retry for known errors
	418	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	419	hls_split_discontinuity: Split HLS playlists to different formats at
	420	discontinuities such as ad breaks (default: False)
	421	extractor_args: A dictionary of arguments to be passed to the extractors.
	422	See "EXTRACTOR ARGUMENTS" for details.
	423	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	424	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	425	If True (default), DASH manifests and related
	426	data will be downloaded and processed by extractor.
	427	You can reduce network I/O by disabling it if you don't
	428	care about DASH. (only for youtube)
	429	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	430	If True (default), HLS manifests and related
	431	data will be downloaded and processed by extractor.
	432	You can reduce network I/O by disabling it if you don't
	433	care about HLS. (only for youtube)
	434	"""
	435
	436	_NUMERIC_FIELDS = set((
	437	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	438	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	439	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	440	'average_rating', 'comment_count', 'age_limit',
	441	'start_time', 'end_time',
	442	'chapter_number', 'season_number', 'episode_number',
	443	'track_number', 'disc_number', 'release_year',
	444	'playlist_index',
	445	))
	446
	447	params = None
	448	_ies = []
	449	_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	450	__prepare_filename_warned = False
	451	_first_webpage_request = True
	452	_download_retcode = None
	453	_num_downloads = None
	454	_playlist_level = 0
	455	_playlist_urls = set()
	456	_screen_file = None
	457
	458	def __init__(self, params=None, auto_init=True):
	459	"""Create a FileDownloader object with the given options."""
	460	if params is None:
	461	params = {}
	462	self._ies = []
	463	self._ies_instances = {}
	464	self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	465	self.__prepare_filename_warned = False
	466	self._first_webpage_request = True
	467	self._post_hooks = []
	468	self._progress_hooks = []
	469	self._download_retcode = 0
	470	self._num_downloads = 0
	471	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	472	self._err_file = sys.stderr
	473	self.params = {
	474	# Default parameters
	475	'nocheckcertificate': False,
	476	}
	477	self.params.update(params)
	478	self.cache = Cache(self)
	479
	480	if sys.version_info < (3, 6):
	481	self.report_warning(
	482	'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
	483
	484	def check_deprecated(param, option, suggestion):
	485	if self.params.get(param) is not None:
	486	self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
	487	return True
	488	return False
	489
	490	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	491	if self.params.get('geo_verification_proxy') is None:
	492	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	493
	494	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	495	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	496	check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
	497
	498	for msg in self.params.get('warnings', []):
	499	self.report_warning(msg)
	500

1

#!/usr/bin/env python3

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

from zipimport import zipimporter

31

32

from .compat import (

33

compat_basestring,

34

compat_cookiejar,

35

compat_get_terminal_size,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

55

DOT_URL_LINK_TEMPLATE,

56

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

STR_FORMAT_RE,

formatSeconds,

GeoRestrictedError,

HEADRequest,

int_or_none,

iri_to_uri,

ISO3166Utils,

LazyList,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

network_exceptions,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

process_communicate_or_kill,

91

register_socks_protocols,

92

RejectedVideoReached,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

ThrottledDownload,

to_high_limit_path,

traverse_obj,

try_get,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

115

YoutubeDLHandler,

116

YoutubeDLRedirectHandler,

117

)

118

from .cache import Cache

119

from .extractor import (

120

gen_extractor_classes,

get_info_extractor,

_LAZY_LOADER,

_PLUGIN_CLASSES

)

from .extractor.openload import PhantomJSwrapper

126

from .downloader import (

127

get_suitable_downloader,

128

shorten_protocol_name

129

)

130

from .downloader.rtmp import rtmpdump_version

131

from .postprocessor import (

132

get_postprocessor,

133

FFmpegFixupDurationPP,

134

FFmpegFixupM3u8PP,

135

FFmpegFixupM4aPP,

136

FFmpegFixupStretchedPP,

137

FFmpegFixupTimestampPP,

138

FFmpegMergerPP,

139

FFmpegPostProcessor,

140

MoveFilesAfterDownloadPP,

141

)

142

from .version import __version__

143

144

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

149

"""YoutubeDL class.

150

151

YoutubeDL objects are the ones responsible of downloading the

152

actual video file and writing it to disk if the user has requested

153

it, among some other tasks. In most cases there should be one per

154

program. As, given a video URL, the downloader doesn't know how to

155

extract all the needed information, task that InfoExtractors do, it

156

has to pass the URL to one of them.

157

158

For this, YoutubeDL objects have a method that allows

159

InfoExtractors to be registered in a given order. When it is passed

160

a URL, the YoutubeDL object handles it to the first InfoExtractor it

161

finds that reports being able to handle it. The InfoExtractor extracts

162

all the information about the video or videos the URL refers to, and

163

YoutubeDL process the extracted information, possibly using a File

164

Downloader to download the video.

165

166

YoutubeDL objects accept a lot of parameters. In order not to saturate

167

the object constructor with arguments, it receives a dictionary of

168

options instead. These options are available through the params

169

attribute for the InfoExtractors to use. The YoutubeDL also

170

registers itself as the downloader in charge for the InfoExtractors

171

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

176

password: Password for authentication purposes.

177

videopassword: Password for accessing a video.

178

ap_mso: Adobe Pass multiple-system operator identifier.

179

ap_username: Multiple-system operator account username.

180

ap_password: Multiple-system operator account password.

181

usenetrc: Use netrc for authentication instead.

182

verbose: Print additional info to stdout.

183

quiet: Do not print messages to stdout.

184

no_warnings: Do not print out anything for warnings.

185

forceprint: A list of templates to force print

186

forceurl: Force printing final URL. (Deprecated)

187

forcetitle: Force printing title. (Deprecated)

188

forceid: Force printing ID. (Deprecated)

189

forcethumbnail: Force printing thumbnail URL. (Deprecated)

190

forcedescription: Force printing description. (Deprecated)

191

forcefilename: Force printing final filename. (Deprecated)

192

forceduration: Force printing duration. (Deprecated)

193

forcejson: Force printing info_dict as JSON.

194

dump_single_json: Force printing the info_dict of the whole playlist

195

(or video) as a single JSON line.

196

force_write_download_archive: Force writing download archive regardless

197

of 'skip_download' or 'simulate'.

198

simulate: Do not download the video files.

199

format: Video format code. see "FORMAT SELECTION" for more details.

200

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

201

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

202

extracting metadata even if the video is not actually

203

available for download (experimental)

204

format_sort: How to sort the video formats. see "Sorting Formats"

205

for more details.

206

format_sort_force: Force the given format_sort. see "Sorting Formats"

207

for more details.

208

allow_multiple_video_streams: Allow multiple video streams to be merged

209

into a single file

210

allow_multiple_audio_streams: Allow multiple audio streams to be merged

211

into a single file

212

paths: Dictionary of output paths. The allowed keys are 'home'

213

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

214

outtmpl: Dictionary of templates for output names. Allowed keys

215

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

216

A string a also accepted for backward compatibility

217

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

218

restrictfilenames: Do not allow "&" and spaces in file names

219

trim_file_name: Limit length of filename (extension excluded)

220

windowsfilenames: Force the filenames to be windows compatible

221

ignoreerrors: Do not stop on download errors

222

(Default True when running yt-dlp,

223

but False when directly accessing YoutubeDL class)

224

skip_playlist_after_errors: Number of allowed failures until the rest of

225

the playlist is skipped

226

force_generic_extractor: Force downloader to use the generic extractor

227

overwrites: Overwrite all video and metadata files if True,

228

overwrite only non-video files if None

229

and don't overwrite any file if False

230

playliststart: Playlist item to start at.

231

playlistend: Playlist item to end at.

232

playlist_items: Specific indices of playlist to download.

233

playlistreverse: Download playlist items in reverse order.

234

playlistrandom: Download playlist items in random order.

235

matchtitle: Download only matching titles.

236

rejecttitle: Reject downloads for matching titles.

237

logger: Log messages to a logging.Logger instance.

238

logtostderr: Log messages to stderr instead of stdout.

239

writedescription: Write the video description to a .description file

240

writeinfojson: Write the video description to a .info.json file

241

clean_infojson: Remove private fields from the infojson

242

writecomments: Extract video comments. This will not be written to disk

243

unless writeinfojson is also given

244

writeannotations: Write the video annotations to a .annotations.xml file

245

writethumbnail: Write the thumbnail image to a file

246

allow_playlist_files: Whether to write playlists' description, infojson etc

247

also to disk when using the 'write*' options

248

write_all_thumbnails: Write all thumbnail formats to files

249

writelink: Write an internet shortcut file, depending on the

250

current platform (.url/.webloc/.desktop)

251

writeurllink: Write a Windows internet shortcut file (.url)

252

writewebloclink: Write a macOS internet shortcut file (.webloc)

253

writedesktoplink: Write a Linux internet shortcut file (.desktop)

254

writesubtitles: Write the video subtitles to a file

255

writeautomaticsub: Write the automatically generated subtitles to a file

256

allsubtitles: Deprecated - Use subtitlelangs = ['all']

257

Downloads all the subtitles of the video

258

(requires writesubtitles or writeautomaticsub)

259

listsubtitles: Lists all available subtitles for the video

260

subtitlesformat: The format code for subtitles

261

subtitleslangs: List of languages of the subtitles to download (can be regex).

262

The list may contain "all" to refer to all the available

263

subtitles. The language can be prefixed with a "-" to

264

exclude it from the requested languages. Eg: ['all', '-live_chat']

265

keepvideo: Keep the video file after post-processing

266

daterange: A DateRange object, download only if the upload_date is in the range.

267

skip_download: Skip the actual download of the video file

268

cachedir: Location of the cache files in the filesystem.

269

False to disable filesystem cache.

270

noplaylist: Download single video instead of a playlist if in doubt.

271

age_limit: An integer representing the user's age in years.

272

Unsuitable videos for the given age are skipped.

273

min_views: An integer representing the minimum view count the video

274

must have in order to not be skipped.

275

Videos without view count information are always

276

downloaded. None for no limit.

277

max_views: An integer representing the maximum view count.

278

Videos that are more popular than that are not

279

downloaded.

280

Videos without view count information are always

281

downloaded. None for no limit.

282

download_archive: File name of a file where all downloads are recorded.

283

Videos already present in the file are not downloaded

284

again.

285

break_on_existing: Stop the download process after attempting to download a

286

file that is in the archive.

287

break_on_reject: Stop the download process when encountering a video that

288

has been filtered out.

289

cookiefile: File name where cookies should be read from and dumped to

290

nocheckcertificate:Do not verify SSL certificates

291

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

292

At the moment, this is only supported by YouTube.

293

proxy: URL of the proxy server to use

294

geo_verification_proxy: URL of the proxy to use for IP address verification

295

on geo-restricted sites.

296

socket_timeout: Time to wait for unresponsive hosts, in seconds

297

bidi_workaround: Work around buggy terminals without bidirectional text

298

support, using fridibi

299

debug_printtraffic:Print out sent and received HTTP traffic

300

include_ads: Download ads as well

301

default_search: Prepend this string if an input url is not valid.

302

'auto' for elaborate guessing

303

encoding: Use this encoding instead of the system-specified.

304

extract_flat: Do not resolve URLs, return the immediate result.

305

Pass in 'in_playlist' to only show this behavior for

306

playlist items.

307

postprocessors: A list of dictionaries, each with an entry

308

* key: The name of the postprocessor. See

309

yt_dlp/postprocessor/__init__.py for a list.

310

* when: When to run the postprocessor. Can be one of

311

pre_process|before_dl|post_process|after_move.

312

Assumed to be 'post_process' if not given

313

post_hooks: A list of functions that get called as the final step

314

for each video file, after all postprocessors have been

315

called. The filename will be passed as the only argument.

316

progress_hooks: A list of functions that get called on download

317

progress, with a dictionary with the entries

318

* status: One of "downloading", "error", or "finished".

319

Check this first and ignore unknown values.

320

321

If status is one of "downloading", or "finished", the

322

following properties may also be present:

323

* filename: The final filename (always present)

324

* tmpfilename: The filename we're currently writing to

325

* downloaded_bytes: Bytes on disk

326

* total_bytes: Size of the whole file, None if unknown

327

* total_bytes_estimate: Guess of the eventual file size,

328

None if unavailable.

329

* elapsed: The number of seconds since download started.

330

* eta: The estimated time in seconds, None if unknown

331

* speed: The download speed in bytes/second, None if

332

unknown

333

* fragment_index: The counter of the currently

334

downloaded video fragment.

335

* fragment_count: The number of fragments (= individual

336

files that will be merged)

337

338

Progress hooks are guaranteed to be called at least once

339

(with status "finished") if the download is successful.

340

merge_output_format: Extension to use when merging formats.

341

final_ext: Expected final extension; used to detect when the file was

342

already downloaded and converted. "merge_output_format" is

343

replaced by this extension when given

344

fixup: Automatically correct known faults of the file.

345

One of:

346

- "never": do nothing

347

- "warn": only emit a warning

348

- "detect_or_warn": check whether we can do anything

349

about it, warn otherwise (default)

350

source_address: Client-side IP address to bind to.

351

call_home: Boolean, true iff we are allowed to contact the

352

yt-dlp servers for debugging. (BROKEN)

353

sleep_interval_requests: Number of seconds to sleep between requests

354

during extraction

355

sleep_interval: Number of seconds to sleep before each download when

356

used alone or a lower bound of a range for randomized

357

sleep before each download (minimum possible number

358

of seconds to sleep) when used along with

359

max_sleep_interval.

360

max_sleep_interval:Upper bound of a range for randomized sleep before each

361

download (maximum possible number of seconds to sleep).

362

Must only be used along with sleep_interval.

363

Actual sleep time will be a random float from range

364

[sleep_interval; max_sleep_interval].

365

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

366

listformats: Print an overview of available video formats and exit.

367

list_thumbnails: Print a table of all thumbnails and exit.

368

match_filter: A function that gets called with the info_dict of

369

every video.

370

If it returns a message, the video is ignored.

371

If it returns None, the video is downloaded.

372

match_filter_func in utils.py is one example for this.

373

no_color: Do not emit color codes in output.

374

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

375

HTTP header

376

geo_bypass_country:

377

Two-letter ISO 3166-2 country code that will be used for

378

explicit geographic restriction bypassing via faking

379

X-Forwarded-For HTTP header

380

geo_bypass_ip_block:

381

IP range in CIDR notation that will be used similarly to

382

geo_bypass_country

383

384

The following options determine which downloader is picked:

385

external_downloader: A dictionary of protocol keys and the executable of the

386

external downloader to use for it. The allowed protocols

387

388

Set the value to 'native' to use the native downloader

389

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

390

or {'m3u8': 'ffmpeg'} instead.

391

Use the native HLS downloader instead of ffmpeg/avconv

392

if True, otherwise use ffmpeg/avconv if False, otherwise

393

use downloader suggested by extractor if None.

394

compat_opts: Compatibility options. See "Differences in default behavior".

395

The following options do not work when used through the API:

396

filename, abort-on-error, multistreams, no-live-chat,

397

no-playlist-metafiles. Refer __init__.py for their implementation

398

399

The following parameters are not used by YoutubeDL itself, they are used by

400

the downloader (see yt_dlp/downloader/common.py):

401

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

402

max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,

403

xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.

404

405

The following options are used by the post processors:

406

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

407

otherwise prefer ffmpeg. (avconv support is deprecated)

408

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

409

to the binary or its containing directory.

410

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

411

and a list of additional command-line arguments for the

412

postprocessor/executable. The dict can also have "PP+EXE" keys

413

which are used when the given exe is used by the given PP.

414

Use 'default' as the name for arguments to passed to all PP

415

416

The following options are used by the extractors:

417

extractor_retries: Number of times to retry for known errors

418

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

419

hls_split_discontinuity: Split HLS playlists to different formats at

420

discontinuities such as ad breaks (default: False)

421

extractor_args: A dictionary of arguments to be passed to the extractors.

422

See "EXTRACTOR ARGUMENTS" for details.

423

Eg: {'youtube': {'skip': ['dash', 'hls']}}

424

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

425

If True (default), DASH manifests and related

426

data will be downloaded and processed by extractor.

427

You can reduce network I/O by disabling it if you don't

428

care about DASH. (only for youtube)

429

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

430

If True (default), HLS manifests and related

431

data will be downloaded and processed by extractor.

432

You can reduce network I/O by disabling it if you don't

433

care about HLS. (only for youtube)

434

"""

435

436

_NUMERIC_FIELDS = set((

437

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

438

'timestamp', 'upload_year', 'upload_month', 'upload_day',

439

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

440

'average_rating', 'comment_count', 'age_limit',

441

'start_time', 'end_time',

442

'chapter_number', 'season_number', 'episode_number',

443

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

450

__prepare_filename_warned = False

451

_first_webpage_request = True

452

_download_retcode = None

453

_num_downloads = None

454

_playlist_level = 0

455

_playlist_urls = set()

456

_screen_file = None

457

458

def __init__(self, params=None, auto_init=True):

459

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

464

self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

465

self.__prepare_filename_warned = False

466

self._first_webpage_request = True

467

self._post_hooks = []

468

self._progress_hooks = []

469

self._download_retcode = 0

470

self._num_downloads = 0

471

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

472

self._err_file = sys.stderr

473

self.params = {

474

# Default parameters

475

'nocheckcertificate': False,

476

}

477

self.params.update(params)

478

self.cache = Cache(self)

479

480

if sys.version_info < (3, 6):

481

self.report_warning(

482

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

483

484

def check_deprecated(param, option, suggestion):

485

if self.params.get(param) is not None:

486

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

491

if self.params.get('geo_verification_proxy') is None:

492

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

493

494

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

495

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

496

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

497

498

for msg in self.params.get('warnings', []):

499

self.report_warning(msg)

500

501

if self.params.get('final_ext'):

502

if self.params.get('merge_output_format'):

503

self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')

504

self.params['merge_output_format'] = self.params['final_ext']

505

506

if 'overwrites' in self.params and self.params['overwrites'] is None:

507

del self.params['overwrites']

508

509

if params.get('bidi_workaround', False):

510

try:

511

import pty

512

master, slave = pty.openpty()

513

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

518

sp_kwargs = dict(

519

stdin=subprocess.PIPE,

520

stdout=slave,

521

stderr=self._err_file)

522

try:

523

self._output_process = subprocess.Popen(

524

['bidiv'] + width_args, **sp_kwargs

525

)

526

except OSError:

527

self._output_process = subprocess.Popen(

528

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

529

self._output_channel = os.fdopen(master, 'rb')

530

except OSError as ose:

531

if ose.errno == errno.ENOENT:

532

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

537

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

538

and not params.get('restrictfilenames', False)):

539

# Unicode filesystem API will throw errors (#1474, #13027)

540

self.report_warning(

541

'Assuming --restrict-filenames since file system encoding '

542

'cannot encode all characters. '

543

'Set the LC_ALL environment variable to fix this.')

544

self.params['restrictfilenames'] = True

545

546

self.outtmpl_dict = self.parse_outtmpl()

547

548

# Creating format selector here allows us to catch syntax errors before the extraction

549

self.format_selector = (

550

None if self.params.get('format') is None

551

else self.build_format_selector(self.params['format']))

self._setup_opener()

"""Preload the archive, if any is specified"""

556

def preload_download_archive(fn):

557

if fn is None:

558

return False

559

self.write_debug('Loading archive file %r\n' % fn)

560

try:

561

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

562

for line in archive_file:

563

self.archive.add(line.strip())

564

except IOError as ioe:

565

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

572

573

if auto_init:

574

self.print_debug_header()

575

self.add_default_info_extractors()

576

577

for pp_def_raw in self.params.get('postprocessors', []):

578

pp_def = dict(pp_def_raw)

579

when = pp_def.pop('when', 'post_process')

580

pp_class = get_postprocessor(pp_def.pop('key'))

581

pp = pp_class(self, **compat_kwargs(pp_def))

582

self.add_post_processor(pp, when=when)

583

584

for ph in self.params.get('post_hooks', []):

585

self.add_post_hook(ph)

586

587

for ph in self.params.get('progress_hooks', []):

588

self.add_progress_hook(ph)

589

590

register_socks_protocols()

591

592

def warn_if_short_id(self, argv):

593

# short YouTube ID starting with dash?

594

idxs = [

595

i for i, a in enumerate(argv)

596

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

601

+ ['--'] + [argv[i] for i in idxs]

602

)

603

self.report_warning(

604

'Long argument string detected. '

605

'Use -- to separate parameters and URLs, like this:\n%s\n' %

606

args_to_str(correct_argv))

607

608

def add_info_extractor(self, ie):

609

"""Add an InfoExtractor object to the end of the list."""

610

self._ies.append(ie)

611

if not isinstance(ie, type):

612

self._ies_instances[ie.ie_key()] = ie

613

ie.set_downloader(self)

614

615

def get_info_extractor(self, ie_key):

616

"""

617

Get an instance of an IE with name ie_key, it will try to get one from

618

the _ies list, if there's no instance it will create a new one and add

619

it to the extractor list.

620

"""

621

ie = self._ies_instances.get(ie_key)

622

if ie is None:

623

ie = get_info_extractor(ie_key)()

624

self.add_info_extractor(ie)

625

return ie

626

627

def add_default_info_extractors(self):

628

"""

629

Add the InfoExtractors returned by gen_extractors to the end of the list

630

"""

631

for ie in gen_extractor_classes():

632

self.add_info_extractor(ie)

633

634

def add_post_processor(self, pp, when='post_process'):

635

"""Add a PostProcessor object to the end of the chain."""

636

self._pps[when].append(pp)

637

pp.set_downloader(self)

638

639

def add_post_hook(self, ph):

640

"""Add the post hook"""

641

self._post_hooks.append(ph)

642

643

def add_progress_hook(self, ph):

644

"""Add the progress hook (currently only for the file downloader)"""

645

self._progress_hooks.append(ph)

646

647

def _bidi_workaround(self, message):

648

if not hasattr(self, '_output_channel'):

649

return message

650

651

assert hasattr(self, '_output_process')

652

assert isinstance(message, compat_str)

653

line_count = message.count('\n') + 1

654

self._output_process.stdin.write((message + '\n').encode('utf-8'))

655

self._output_process.stdin.flush()

656

res = ''.join(self._output_channel.readline().decode('utf-8')

657

for _ in range(line_count))

658

return res[:-len('\n')]

659

660

def _write_string(self, s, out=None):

661

write_string(s, out=out, encoding=self.params.get('encoding'))

662

663

def to_stdout(self, message, skip_eol=False, quiet=False):

664

"""Print message to stdout"""

665

if self.params.get('logger'):

666

self.params['logger'].debug(message)

667

elif not quiet or self.params.get('verbose'):

668

self._write_string(

669

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

670

self._err_file if quiet else self._screen_file)

671

672

def to_stderr(self, message):

673

"""Print message to stderr"""

674

assert isinstance(message, compat_str)

675

if self.params.get('logger'):

676

self.params['logger'].error(message)

677

else:

678

self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)

679

680

def to_console_title(self, message):

681

if not self.params.get('consoletitle', False):

682

return

683

if compat_os_name == 'nt':

684

if ctypes.windll.kernel32.GetConsoleWindow():

685

# c_wchar_p() might not be necessary if `message` is

686

# already of type unicode()

687

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

688

elif 'TERM' in os.environ:

689

self._write_string('\033]0;%s\007' % message, self._screen_file)

690

691

def save_console_title(self):

692

if not self.params.get('consoletitle', False):

693

return

694

if self.params.get('simulate', False):

695

return

696

if compat_os_name != 'nt' and 'TERM' in os.environ:

697

# Save the title on stack

698

self._write_string('\033[22;0t', self._screen_file)

699

700

def restore_console_title(self):

701

if not self.params.get('consoletitle', False):

702

return

703

if self.params.get('simulate', False):

704

return

705

if compat_os_name != 'nt' and 'TERM' in os.environ:

706

# Restore the title from stack

707

self._write_string('\033[23;0t', self._screen_file)

708

709

def __enter__(self):

710

self.save_console_title()

711

return self

712

713

def __exit__(self, *args):

714

self.restore_console_title()

715

716

if self.params.get('cookiefile') is not None:

717

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

718

719

def trouble(self, message=None, tb=None):

720

"""Determine action to take when a download problem appears.

721

722

Depending on if the downloader has been configured to ignore

723

download errors or not, this method may throw an exception or

724

not when errors are found, after printing the message.

725

726

tb, if given, is additional traceback information.

727

"""

728

if message is not None:

729

self.to_stderr(message)

730

if self.params.get('verbose'):

731

if tb is None:

732

if sys.exc_info()[0]: # if .trouble has been called from an except block

733

tb = ''

734

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

735

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

736

tb += encode_compat_str(traceback.format_exc())

737

else:

738

tb_data = traceback.format_list(traceback.extract_stack())

739

tb = ''.join(tb_data)

740

if tb:

741

self.to_stderr(tb)

742

if not self.params.get('ignoreerrors', False):

743

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

744

exc_info = sys.exc_info()[1].exc_info

745

else:

746

exc_info = sys.exc_info()

747

raise DownloadError(message, exc_info)

748

self._download_retcode = 1

749

750

def to_screen(self, message, skip_eol=False):

751

"""Print message to stdout if not in quiet mode"""

752

self.to_stdout(

753

message, skip_eol, quiet=self.params.get('quiet', False))

754

755

def report_warning(self, message):

756

'''

757

Print the message to stderr, it will be prefixed with 'WARNING:'

758

If stderr is a tty file the 'WARNING:' will be colored

759

'''

760

if self.params.get('logger') is not None:

761

self.params['logger'].warning(message)

762

else:

763

if self.params.get('no_warnings'):

764

return

765

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

766

_msg_header = '\033[0;33mWARNING:\033[0m'

767

else:

768

_msg_header = 'WARNING:'

769

warning_message = '%s %s' % (_msg_header, message)

770

self.to_stderr(warning_message)

771

772

def report_error(self, message, tb=None):

773

'''

774

Do the same as trouble, but prefixes the message with 'ERROR:', colored

775

in red if stderr is a tty file.

776

'''

777

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

778

_msg_header = '\033[0;31mERROR:\033[0m'

779

else:

780

_msg_header = 'ERROR:'

781

error_message = '%s %s' % (_msg_header, message)

782

self.trouble(error_message, tb)

783

784

def write_debug(self, message):

785

'''Log debug message or Print message to stderr'''

786

if not self.params.get('verbose', False):

787

return

788

message = '[debug] %s' % message

789

if self.params.get('logger'):

790

self.params['logger'].debug(message)

791

else:

792

self._write_string('%s\n' % message)

793

794

def report_file_already_downloaded(self, file_name):

795

"""Report file has already been fully downloaded."""

796

try:

797

self.to_screen('[download] %s has already been downloaded' % file_name)

798

except UnicodeEncodeError:

799

self.to_screen('[download] The file has already been downloaded')

800

801

def report_file_delete(self, file_name):

802

"""Report that existing file will be deleted."""

803

try:

804

self.to_screen('Deleting existing file %s' % file_name)

805

except UnicodeEncodeError:

806

self.to_screen('Deleting existing file')

807

808

def parse_outtmpl(self):

809

outtmpl_dict = self.params.get('outtmpl', {})

810

if not isinstance(outtmpl_dict, dict):

811

outtmpl_dict = {'default': outtmpl_dict}

812

outtmpl_dict.update({

813

k: v for k, v in DEFAULT_OUTTMPL.items()

814

if not outtmpl_dict.get(k)})

815

for key, val in outtmpl_dict.items():

816

if isinstance(val, bytes):

817

self.report_warning(

818

'Parameter outtmpl is bytes, but should be a unicode string. '

819

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

820

return outtmpl_dict

821

822

def get_output_path(self, dir_type='', filename=None):

823

paths = self.params.get('paths', {})

824

assert isinstance(paths, dict)

825

path = os.path.join(

826

expand_path(paths.get('home', '').strip()),

827

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

828

filename or '')

829

830

# Temporary fix for #4787

831

# 'Treat' all problem characters by passing filename through preferredencoding

832

# to workaround encoding issues with subprocess on python2 @ Windows

833

if sys.version_info < (3, 0) and sys.platform == 'win32':

834

path = encodeFilename(path, True).decode(preferredencoding())

835

return sanitize_path(path, force=self.params.get('windowsfilenames'))

836

837

@staticmethod

838

def validate_outtmpl(tmpl):

839

''' @return None or Exception object '''

840

try:

841

re.sub(

842

STR_FORMAT_RE.format(''),

843

lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),

844

tmpl

845

) % collections.defaultdict(int)

846

return None

847

except ValueError as err:

848

return err

849

850

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):

851

""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""

852

info_dict = dict(info_dict)

853

na = self.params.get('outtmpl_na_placeholder', 'NA')

854

855

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

856

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

857

if info_dict.get('duration', None) is not None

858

else None)

859

info_dict['epoch'] = int(time.time())

860

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

861

if info_dict.get('resolution') is None:

862

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

863

864

# For fields playlist_index and autonumber convert all occurrences

865

# of %(field)s to %(field)0Nd for backward compatibility

866

field_size_compat_map = {

867

'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),

868

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

878

# where keys (except first) can be string, int or slice

879

FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

880

MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

881

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

882

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

883

(?P<negate>-)?

884

(?P<fields>{field})

885

(?P<maths>(?:{math_op}{math_field})*)

886

(?:>(?P<strf_format>.+?))?

887

(?:\|(?P<default>.*?))?

888

$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

889

890

get_key = lambda k: traverse_obj(

891

info_dict, k.split('.'), is_user_input=True, traverse_string=True)

892

893

def get_value(mdict):

894

# Object traversal

895

value = get_key(mdict['fields'])

896

# Negative

897

if mdict['negate']:

898

value = float_or_none(value)

899

if value is not None:

900

value *= -1

901

# Do maths

902

offset_key = mdict['maths']

903

if offset_key:

904

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

909

offset_key).group(0)

910

offset_key = offset_key[len(item):]

911

if operator is None:

912

operator = MATH_FUNCTIONS[item]

913

continue

914

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

915

offset = float_or_none(item)

916

if offset is None:

917

offset = float_or_none(get_key(item))

918

try:

919

value = operator(value, multiplier * offset)

920

except (TypeError, ZeroDivisionError):

921

return None

922

operator = None

923

# Datetime formatting

924

if mdict['strf_format']:

925

value = strftime_or_none(value, mdict['strf_format'])

return value

def create_key(outer_mobj):

930

if not outer_mobj.group('has_key'):

931

return '%{}'.format(outer_mobj.group(0))

932

933

key = outer_mobj.group('key')

934

fmt = outer_mobj.group('format')

935

mobj = re.match(INTERNAL_FORMAT_RE, key)

936

if mobj is None:

937

value, default, mobj = None, na, {'fields': ''}

938

else:

939

mobj = mobj.groupdict()

940

default = mobj['default'] if mobj['default'] is not None else na

941

value = get_value(mobj)

942

943

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

944

fmt = '0{:d}d'.format(field_size_compat_map[key])

945

946

value = default if value is None else value

947

948

if fmt == 'c':

949

value = compat_str(value)

950

if value is None:

951

value, fmt = default, 's'

952

else:

953

value = value[0]

954

elif fmt[-1] not in 'rs': # numeric

955

value = float_or_none(value)

956

if value is None:

957

value, fmt = default, 's'

958

if sanitize:

959

if fmt[-1] == 'r':

960

# If value is an object, sanitize might convert it to a string

961

# So we convert it to repr first

962

value, fmt = repr(value), '%ss' % fmt[:-1]

963

if fmt[-1] in 'csr':

964

value = sanitize(mobj['fields'].split('.')[-1], value)

965

key += '\0%s' % fmt

966

TMPL_DICT[key] = value

967

return '%({key}){fmt}'.format(key=key, fmt=fmt)

968

969

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

970

971

def _prepare_filename(self, info_dict, tmpl_type='default'):

972

try:

973

sanitize = lambda k, v: sanitize_filename(

974

compat_str(v),

975

restricted=self.params.get('restrictfilenames'),

976

is_id=(k == 'id' or k.endswith('_id')))

977

outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])

978

outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)

979

980

# expand_path translates '%%' into '%' and '$$' into '$'

981

# correspondingly that is not what we want since we need to keep

982

# '%%' intact for template dict substitution step. Working around

983

# with boundary-alike separator hack.

984

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

985

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

986

987

# outtmpl should be expand_path'ed before template dict substitution

988

# because meta fields may contain env variables we don't want to

989

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

990

# title "Hello $PATH", we don't want `$PATH` to be expanded.

991

filename = expand_path(outtmpl).replace(sep, '') % template_dict

992

993

force_ext = OUTTMPL_TYPES.get(tmpl_type)

994

if force_ext is not None:

995

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

996

997

# https://github.com/blackjack4494/youtube-dlc/issues/85

998

trim_file_name = self.params.get('trim_file_name', False)

999

if trim_file_name:

1000

fn_groups = filename.rsplit('.')

1001

ext = fn_groups[-1]

1002

sub_ext = ''

1003

if len(fn_groups) > 2:

1004

sub_ext = fn_groups[-2]

1005

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

1006

1007

return filename

1008

except ValueError as err:

1009

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1010

return None

1011

1012

def prepare_filename(self, info_dict, dir_type='', warn=False):

1013

"""Generate the output filename."""

1014

1015

filename = self._prepare_filename(info_dict, dir_type or 'default')

1016

1017

if warn and not self.__prepare_filename_warned:

1018

if not self.params.get('paths'):

1019

pass

1020

elif filename == '-':

1021

self.report_warning('--paths is ignored when an outputting to stdout')

1022

elif os.path.isabs(filename):

1023

self.report_warning('--paths is ignored since an absolute path is given in output template')

1024

self.__prepare_filename_warned = True

1025

if filename == '-' or not filename:

1026

return filename

1027

1028

return self.get_output_path(dir_type, filename)

1029

1030

def _match_entry(self, info_dict, incomplete=False, silent=False):

1031

""" Returns None if the file should be downloaded """

1032

1033

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1034

1035

def check_filter():

1036

if 'title' in info_dict:

1037

# This can happen when we're just evaluating the playlist

1038

title = info_dict['title']

1039

matchtitle = self.params.get('matchtitle', False)

1040

if matchtitle:

1041

if not re.search(matchtitle, title, re.IGNORECASE):

1042

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1043

rejecttitle = self.params.get('rejecttitle', False)

1044

if rejecttitle:

1045

if re.search(rejecttitle, title, re.IGNORECASE):

1046

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1047

date = info_dict.get('upload_date')

1048

if date is not None:

1049

dateRange = self.params.get('daterange', DateRange())

1050

if date not in dateRange:

1051

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

1052

view_count = info_dict.get('view_count')

1053

if view_count is not None:

1054

min_views = self.params.get('min_views')

1055

if min_views is not None and view_count < min_views:

1056

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1057

max_views = self.params.get('max_views')

1058

if max_views is not None and view_count > max_views:

1059

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1060

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1061

return 'Skipping "%s" because it is age restricted' % video_title

1062

1063

if not incomplete:

1064

match_filter = self.params.get('match_filter')

1065

if match_filter is not None:

1066

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1072

reason = '%s has already been recorded in the archive' % video_title

1073

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1074

else:

1075

reason = check_filter()

1076

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1077

if reason is not None:

1078

if not silent:

1079

self.to_screen('[download] ' + reason)

1080

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1086

'''Set the keys from extra_info in info dict if they are missing'''

1087

for key, value in extra_info.items():

1088

info_dict.setdefault(key, value)

1089

1090

def extract_info(self, url, download=True, ie_key=None, extra_info={},

1091

process=True, force_generic_extractor=False):

1092

"""

1093

Return a list with a dictionary for each video extracted.

1094

1095

Arguments:

1096

url -- URL to extract

1097

1098

Keyword arguments:

1099

download -- whether to download videos during extraction

1100

ie_key -- extractor key hint

1101

extra_info -- dictionary containing the extra values to add to each result

1102

process -- whether to resolve all unresolved references (URLs, playlist items),

1103

must be True for download to work.

1104

force_generic_extractor -- force using the generic extractor

1105

"""

1106

1107

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

continue

ie_key = ie.ie_key()

ie = self.get_info_extractor(ie_key)

1121

if not ie.working():

1122

self.report_warning('The program functionality for this site has been marked as broken, '

1123

'and will probably not work.')

1124

1125

try:

1126

temp_id = str_or_none(

1127

ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))

1128

else ie._match_id(url))

1129

except (AssertionError, IndexError, AttributeError):

1130

temp_id = None

1131

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1132

self.to_screen("[%s] %s: has already been recorded in archive" % (

1133

ie_key, temp_id))

1134

break

1135

return self.__extract_info(url, ie, download, extra_info, process)

1136

else:

1137

self.report_error('no suitable InfoExtractor for URL %s' % url)

1138

1139

def __handle_extraction_exceptions(func):

1140

def wrapper(self, *args, **kwargs):

1141

try:

1142

return func(self, *args, **kwargs)

1143

except GeoRestrictedError as e:

1144

msg = e.msg

1145

if e.countries:

1146

msg += '\nThis video is available in %s.' % ', '.join(

1147

map(ISO3166Utils.short2full, e.countries))

1148

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1149

self.report_error(msg)

1150

except ExtractorError as e: # An error we somewhat expected

1151

self.report_error(compat_str(e), e.format_traceback())

1152

except ThrottledDownload:

1153

self.to_stderr('\r')

1154

self.report_warning('The download speed is below throttle limit. Re-extracting data')

1155

return wrapper(self, *args, **kwargs)

1156

except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):

1157

raise

1158

except Exception as e:

1159

if self.params.get('ignoreerrors', False):

1160

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

1166

def __extract_info(self, url, ie, download, extra_info, process):

1167

ie_result = ie.extract(url)

1168

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1169

return

1170

if isinstance(ie_result, list):

1171

# Backwards compatibility: old IE result format

1172

ie_result = {

1173

'_type': 'compat_list',

1174

'entries': ie_result,

1175

}

1176

self.add_default_extra_info(ie_result, ie, url)

1177

if process:

1178

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1183

if url is not None:

1184

self.add_extra_info(ie_result, {

1185

'webpage_url': url,

1186

'original_url': url,

1187

'webpage_url_basename': url_basename(url),

1188

})

1189

if ie is not None:

1190

self.add_extra_info(ie_result, {

1191

'extractor': ie.IE_NAME,

1192

'extractor_key': ie.ie_key(),

1193

})

1194

1195

def process_ie_result(self, ie_result, download=True, extra_info={}):

1196

"""

1197

Take the result of the ie(may be modified) and resolve all unresolved

1198

references (URLs, playlist items).

1199

1200

It will also download the videos if 'download'.

1201

Returns the resolved ie_result.

1202

"""

1203

result_type = ie_result.get('_type', 'video')

1204

1205

if result_type in ('url', 'url_transparent'):

1206

ie_result['url'] = sanitize_url(ie_result['url'])

1207

extract_flat = self.params.get('extract_flat', False)

1208

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1209

or extract_flat is True):

1210

info_copy = ie_result.copy()

1211

self.add_extra_info(info_copy, extra_info)

1212

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1213

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1214

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1215

return ie_result

1216

1217

if result_type == 'video':

1218

self.add_extra_info(ie_result, extra_info)

1219

ie_result = self.process_video_result(ie_result, download=download)

1220

additional_urls = (ie_result or {}).get('additional_urls')

1221

if additional_urls:

1222

# TODO: Improve MetadataFromFieldPP to allow setting a list

1223

if isinstance(additional_urls, compat_str):

1224

additional_urls = [additional_urls]

1225

self.to_screen(

1226

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1227

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1228

ie_result['additional_entries'] = [

1229

self.extract_info(

1230

url, download, extra_info,

1231

force_generic_extractor=self.params.get('force_generic_extractor'))

1232

for url in additional_urls

1233

]

1234

return ie_result

1235

elif result_type == 'url':

1236

# We have to add extra_info to the results because it may be

1237

# contained in a playlist

1238

return self.extract_info(

1239

ie_result['url'], download,

1240

ie_key=ie_result.get('ie_key'),

1241

extra_info=extra_info)

1242

elif result_type == 'url_transparent':

1243

# Use the information from the embedding page

1244

info = self.extract_info(

1245

ie_result['url'], ie_key=ie_result.get('ie_key'),

1246

extra_info=extra_info, download=False, process=False)

1247

1248

# extract_info may return None when ignoreerrors is enabled and

1249

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

1255

(k, v) for k, v in ie_result.items() if v is not None)

1256

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1257

if f in force_properties:

1258

del force_properties[f]

1259

new_result = info.copy()

1260

new_result.update(force_properties)

1261

1262

# Extracted info may not be a video result (i.e.

1263

# info.get('_type', 'video') != video) but rather an url or

1264

# url_transparent. In such cases outer metadata (from ie_result)

1265

# should be propagated to inner one (info). For this to happen

1266

# _type of info should be overridden with url_transparent. This

1267

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1268

if new_result.get('_type') == 'url':

1269

new_result['_type'] = 'url_transparent'

1270

1271

return self.process_ie_result(

1272

new_result, download=download, extra_info=extra_info)

1273

elif result_type in ('playlist', 'multi_video'):

1274

# Protect from infinite recursion due to recursively nested playlists

1275

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1276

webpage_url = ie_result['webpage_url']

1277

if webpage_url in self._playlist_urls:

1278

self.to_screen(

1279

'[download] Skipping already downloaded playlist: %s'

1280

% ie_result.get('title') or ie_result.get('id'))

1281

return

1282

1283

self._playlist_level += 1

1284

self._playlist_urls.add(webpage_url)

1285

self._sanitize_thumbnails(ie_result)

1286

try:

1287

return self.__process_playlist(ie_result, download)

1288

finally:

1289

self._playlist_level -= 1

1290

if not self._playlist_level:

1291

self._playlist_urls.clear()

1292

elif result_type == 'compat_list':

1293

self.report_warning(

1294

'Extractor %s returned a compat_list result. '

1295

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1302

'webpage_url': ie_result['webpage_url'],

1303

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1304

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1309

self.process_ie_result(_fixup(r), download, extra_info)

1310

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1315

1316

def _ensure_dir_exists(self, path):

1317

return make_dir(path, self.report_error)

1318

1319

def __process_playlist(self, ie_result, download):

1320

# We process each entry in the playlist

1321

playlist = ie_result.get('title') or ie_result.get('id')

1322

self.to_screen('[download] Downloading playlist: %s' % playlist)

1323

1324

if 'entries' not in ie_result:

1325

raise EntryNotInPlaylist()

1326

incomplete_entries = bool(ie_result.get('requested_entries'))

1327

if incomplete_entries:

1328

def fill_missing_entries(entries, indexes):

1329

ret = [None] * max(*indexes)

1330

for i, entry in zip(indexes, entries):

1331

ret[i - 1] = entry

1332

return ret

1333

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1334

1335

playlist_results = []

1336

1337

playliststart = self.params.get('playliststart', 1)

1338

playlistend = self.params.get('playlistend')

1339

# For backwards compatibility, interpret -1 as whole list

1340

if playlistend == -1:

1341

playlistend = None

1342

1343

playlistitems_str = self.params.get('playlist_items')

1344

playlistitems = None

1345

if playlistitems_str is not None:

1346

def iter_playlistitems(format):

1347

for string_segment in format.split(','):

1348

if '-' in string_segment:

1349

start, end = string_segment.split('-')

1350

for item in range(int(start), int(end) + 1):

1351

yield int(item)

1352

else:

1353

yield int(string_segment)

1354

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1355

1356

ie_entries = ie_result['entries']

1357

msg = (

1358

'Downloading %d videos' if not isinstance(ie_entries, list)

1359

else 'Collected %d videos; downloading %%d of them' % len(ie_entries))

1360

if not isinstance(ie_entries, (list, PagedList)):

1361

ie_entries = LazyList(ie_entries)

1362

1363

entries = []

1364

for i in playlistitems or itertools.count(playliststart):

1365

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = ie_entries[i - 1]

1370

if entry is None:

1371

raise EntryNotInPlaylist()

1372

except (IndexError, EntryNotInPlaylist):

1373

if incomplete_entries:

1374

raise EntryNotInPlaylist()

1375

elif not playlistitems:

1376

break

1377

entries.append(entry)

1378

try:

1379

if entry is not None:

1380

self._match_entry(entry, incomplete=True, silent=True)

1381

except (ExistingVideoReached, RejectedVideoReached):

1382

break

1383

ie_result['entries'] = entries

1384

1385

# Save playlist_index before re-ordering

1386

entries = [

1387

((playlistitems[i - 1] if playlistitems else i), entry)

1388

for i, entry in enumerate(entries, 1)

1389

if entry is not None]

1390

n_entries = len(entries)

1391

1392

if not playlistitems and (playliststart or playlistend):

1393

playlistitems = list(range(playliststart, playliststart + n_entries))

1394

ie_result['requested_entries'] = playlistitems

1395

1396

if self.params.get('allow_playlist_files', True):

1397

ie_copy = {

1398

'playlist': playlist,

1399

'playlist_id': ie_result.get('id'),

1400

'playlist_title': ie_result.get('title'),

1401

'playlist_uploader': ie_result.get('uploader'),

1402

'playlist_uploader_id': ie_result.get('uploader_id'),

1403

'playlist_index': 0,

1404

}

1405

ie_copy.update(dict(ie_result))

1406

1407

if self.params.get('writeinfojson', False):

1408

infofn = self.prepare_filename(ie_copy, 'pl_infojson')

1409

if not self._ensure_dir_exists(encodeFilename(infofn)):

1410

return

1411

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):

1412

self.to_screen('[info] Playlist metadata is already present')

1413

else:

1414

self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)

1415

try:

1416

write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)

1417

except (OSError, IOError):

1418

self.report_error('Cannot write playlist metadata to JSON file ' + infofn)

1419

1420

# TODO: This should be passed to ThumbnailsConvertor if necessary

1421

self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1422

1423

if self.params.get('writedescription', False):

1424

descfn = self.prepare_filename(ie_copy, 'pl_description')

1425

if not self._ensure_dir_exists(encodeFilename(descfn)):

1426

return

1427

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):

1428

self.to_screen('[info] Playlist description is already present')

1429

elif ie_result.get('description') is None:

1430

self.report_warning('There\'s no playlist description to write.')

1431

else:

1432

try:

1433

self.to_screen('[info] Writing playlist description to: ' + descfn)

1434

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1435

descfile.write(ie_result['description'])

1436

except (OSError, IOError):

1437

self.report_error('Cannot write playlist description file ' + descfn)

1438

return

1439

1440

if self.params.get('playlistreverse', False):

1441

entries = entries[::-1]

1442

if self.params.get('playlistrandom', False):

1443

random.shuffle(entries)

1444

1445

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1446

1447

self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))

1448

failures = 0

1449

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1450

for i, entry_tuple in enumerate(entries, 1):

1451

playlist_index, entry = entry_tuple

1452

if 'playlist_index' in self.params.get('compat_options', []):

1453

playlist_index = playlistitems[i - 1] if playlistitems else i

1454

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1455

# This __x_forwarded_for_ip thing is a bit ugly but requires

1456

# minimal changes

1457

if x_forwarded_for:

1458

entry['__x_forwarded_for_ip'] = x_forwarded_for

1459

extra = {

1460

'n_entries': n_entries,

1461

'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1462

'playlist_index': playlist_index,

1463

'playlist_autonumber': i,

1464

'playlist': playlist,

1465

'playlist_id': ie_result.get('id'),

1466

'playlist_title': ie_result.get('title'),

1467

'playlist_uploader': ie_result.get('uploader'),

1468

'playlist_uploader_id': ie_result.get('uploader_id'),

1469

'extractor': ie_result['extractor'],

1470

'webpage_url': ie_result['webpage_url'],

1471

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1472

'extractor_key': ie_result['extractor_key'],

1473

}

1474

1475

if self._match_entry(entry, incomplete=True) is not None:

1476

continue

1477

1478

entry_result = self.__process_iterable_entry(entry, download, extra)

1479

if not entry_result:

1480

failures += 1

1481

if failures >= max_failures:

1482

self.report_error(

1483

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1484

break

1485

# TODO: skip failed (empty) entries?

1486

playlist_results.append(entry_result)

1487

ie_result['entries'] = playlist_results

1488

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1489

return ie_result

1490

1491

@__handle_extraction_exceptions

1492

def __process_iterable_entry(self, entry, download, extra_info):

1493

return self.process_ie_result(

1494

entry, download=download, extra_info=extra_info)

1495

1496

def _build_format_filter(self, filter_spec):

1497

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1508

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1509

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1510

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1511

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1512

m = operator_rex.fullmatch(filter_spec)

1513

if m:

1514

try:

1515

comparison_value = int(m.group('value'))

1516

except ValueError:

1517

comparison_value = parse_filesize(m.group('value'))

1518

if comparison_value is None:

1519

comparison_value = parse_filesize(m.group('value') + 'B')

1520

if comparison_value is None:

1521

raise ValueError(

1522

'Invalid value %r in format specification %r' % (

1523

m.group('value'), filter_spec))

1524

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1530

'$=': lambda attr, value: attr.endswith(value),

1531

'*=': lambda attr, value: value in attr,

1532

}

1533

str_operator_rex = re.compile(r'''(?x)\s*

1534

(?P<key>[a-zA-Z0-9._-]+)\s*

1535

(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1536

(?P<value>[a-zA-Z0-9._-]+)\s*

1537

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1538

m = str_operator_rex.fullmatch(filter_spec)

1539

if m:

1540

comparison_value = m.group('value')

1541

str_op = STR_OPERATORS[m.group('op')]

1542

if m.group('negation'):

1543

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1549

1550

def _filter(f):

1551

actual_value = f.get(m.group('key'))

1552

if actual_value is None:

1553

return m.group('none_inclusive')

1554

return op(actual_value, comparison_value)

1555

return _filter

1556

1557

def _default_format_spec(self, info_dict, download=True):

1558

1559

def can_merge():

1560

merger = FFmpegMergerPP(self)

1561

return merger.available and merger.can_merge()

1562

1563

prefer_best = (

1564

not self.params.get('simulate', False)

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1569

or self.outtmpl_dict['default'] == '-'))

1570

compat = (

1571

prefer_best

1572

or self.params.get('allow_multiple_audio_streams', False)

1573

or 'format-spec' in self.params.get('compat_opts', []))

1574

1575

return (

1576

'best/bestvideo+bestaudio' if prefer_best

1577

else 'bestvideo*+bestaudio/best' if not compat

1578

else 'bestvideo+bestaudio/best')

1579

1580

def build_format_selector(self, format_spec):

1581

def syntax_error(note, start):

1582

message = (

1583

'Invalid format specification: '

1584

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1585

return SyntaxError(message)

1586

1587

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1592

1593

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1594

'video': self.params.get('allow_multiple_video_streams', False)}

1595

1596

check_formats = self.params.get('check_formats')

1597

1598

def _parse_filter(tokens):

1599

filter_parts = []

1600

for type, string, start, _, _ in tokens:

1601

if type == tokenize.OP and string == ']':

1602

return ''.join(filter_parts)

1603

else:

1604

filter_parts.append(string)

1605

1606

def _remove_unused_ops(tokens):

1607

# Remove operators that we don't use and join them with the surrounding strings

1608

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1609

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1610

last_string, last_start, last_end, last_line = None, None, None, None

1611

for type, string, start, end, line in tokens:

1612

if type == tokenize.OP and string == '[':

1613

if last_string:

1614

yield tokenize.NAME, last_string, last_start, last_end, last_line

1615

last_string = None

1616

yield type, string, start, end, line

1617

# everything inside brackets will be handled by _parse_filter

1618

for type, string, start, end, line in tokens:

1619

yield type, string, start, end, line

1620

if type == tokenize.OP and string == ']':

1621

break

1622

elif type == tokenize.OP and string in ALLOWED_OPS:

1623

if last_string:

1624

yield tokenize.NAME, last_string, last_start, last_end, last_line

1625

last_string = None

1626

yield type, string, start, end, line

1627

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1634

if last_string:

1635

yield tokenize.NAME, last_string, last_start, last_end, last_line

1636

1637

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1638

selectors = []

1639

current_selector = None

1640

for type, string, start, _, _ in tokens:

1641

# ENCODING is only defined in python 3.x

1642

if type == getattr(tokenize, 'ENCODING', None):

1643

continue

1644

elif type in [tokenize.NAME, tokenize.NUMBER]:

1645

current_selector = FormatSelector(SINGLE, string, [])

1646

elif type == tokenize.OP:

1647

if string == ')':

1648

if not inside_group:

1649

# ')' will be handled by the parentheses group

1650

tokens.restore_last_token()

1651

break

1652

elif inside_merge and string in ['/', ',']:

1653

tokens.restore_last_token()

1654

break

1655

elif inside_choice and string == ',':

1656

tokens.restore_last_token()

1657

break

1658

elif string == ',':

1659

if not current_selector:

1660

raise syntax_error('"," must follow a format selector', start)

1661

selectors.append(current_selector)

1662

current_selector = None

1663

elif string == '/':

1664

if not current_selector:

1665

raise syntax_error('"/" must follow a format selector', start)

1666

first_choice = current_selector

1667

second_choice = _parse_format_selection(tokens, inside_choice=True)

1668

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1669

elif string == '[':

1670

if not current_selector:

1671

current_selector = FormatSelector(SINGLE, 'best', [])

1672

format_filter = _parse_filter(tokens)

1673

current_selector.filters.append(format_filter)

1674

elif string == '(':

1675

if current_selector:

1676

raise syntax_error('Unexpected "("', start)

1677

group = _parse_format_selection(tokens, inside_group=True)

1678

current_selector = FormatSelector(GROUP, group, [])

1679

elif string == '+':

1680

if not current_selector:

1681

raise syntax_error('Unexpected "+"', start)

1682

selector_1 = current_selector

1683

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1684

if not selector_2:

1685

raise syntax_error('Expected a selector', start)

1686

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1687

else:

1688

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1689

elif type == tokenize.ENDMARKER:

1690

break

1691

if current_selector:

1692

selectors.append(current_selector)

1693

return selectors

1694

1695

def _merge(formats_pair):

1696

format_1, format_2 = formats_pair

1697

1698

formats_info = []

1699

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1700

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1701

1702

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1703

get_no_more = {'video': False, 'audio': False}

1704

for (i, fmt_info) in enumerate(formats_info):

1705

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

1706

formats_info.pop(i)

1707

continue

1708

for aud_vid in ['audio', 'video']:

1709

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1710

if get_no_more[aud_vid]:

1711

formats_info.pop(i)

1712

get_no_more[aud_vid] = True

1713

1714

if len(formats_info) == 1:

1715

return formats_info[0]

1716

1717

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1718

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1719

1720

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1721

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1722

1723

output_ext = self.params.get('merge_output_format')

1724

if not output_ext:

1725

if the_only_video:

1726

output_ext = the_only_video['ext']

1727

elif the_only_audio and not video_fmts:

1728

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1734

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1735

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1742

'height': the_only_video.get('height'),

1743

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

1744

'fps': the_only_video.get('fps'),

1745

'vcodec': the_only_video.get('vcodec'),

1746

'vbr': the_only_video.get('vbr'),

1747

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1753

'abr': the_only_audio.get('abr'),

})

return new_dict

def _check_formats(formats):

1759

if not check_formats:

1760

yield from formats

1761

for f in formats:

1762

self.to_screen('[info] Testing format %s' % f['format_id'])

1763

temp_file = tempfile.NamedTemporaryFile(

1764

suffix='.tmp', delete=False,

1765

dir=self.get_output_path('temp') or None)

1766

temp_file.close()

1767

try:

1768

success, _ = self.dl(temp_file.name, f, test=True)

1769

except (DownloadError, IOError, OSError, ValueError) + network_exceptions:

1770

success = False

1771

finally:

1772

if os.path.exists(temp_file.name):

1773

try:

1774

os.remove(temp_file.name)

1775

except OSError:

1776

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1781

1782

def _build_selector_function(selector):

1783

if isinstance(selector, list): # ,

1784

fs = [_build_selector_function(s) for s in selector]

1785

1786

def selector_function(ctx):

1787

for f in fs:

1788

yield from f(ctx)

1789

return selector_function

1790

1791

elif selector.type == GROUP: # ()

1792

selector_function = _build_selector_function(selector.selector)

1793

1794

elif selector.type == PICKFIRST: # /

1795

fs = [_build_selector_function(s) for s in selector.selector]

1796

1797

def selector_function(ctx):

1798

for f in fs:

1799

picked_formats = list(f(ctx))

1800

if picked_formats:

1801

return picked_formats

1802

return []

1803

1804

elif selector.type == MERGE: # +

1805

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1806

1807

def selector_function(ctx):

1808

for pair in itertools.product(

1809

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1810

yield _merge(pair)

1811

1812

elif selector.type == SINGLE: # atom

1813

format_spec = selector.selector or 'best'

1814

1815

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

1816

if format_spec == 'all':

1817

def selector_function(ctx):

1818

yield from _check_formats(ctx['formats'])

1819

elif format_spec == 'mergeall':

1820

def selector_function(ctx):

1821

formats = list(_check_formats(ctx['formats']))

1822

if not formats:

1823

return

1824

merged_format = formats[-1]

1825

for f in formats[-2::-1]:

1826

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, format_reverse, format_idx = False, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

1836

format_reverse = mobj.group('bw')[0] == 'b'

1837

format_type = (mobj.group('type') or [None])[0]

1838

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

1839

format_modified = mobj.group('mod') is not None

1840

1841

format_fallback = not format_type and not format_modified # for b, w

1842

_filter_f = (

1843

(lambda f: f.get('%scodec' % format_type) != 'none')

1844

if format_type and format_modified # bv*, ba*, wv*, wa*

1845

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

1846

if format_type # bv, ba, wv, wa

1847

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1848

if not format_modified # b, w

1849

else lambda f: True) # b*, w*

1850

filter_f = lambda f: _filter_f(f) and (

1851

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

1852

else:

1853

filter_f = ((lambda f: f.get('ext') == format_spec)

1854

if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension

1855

else (lambda f: f.get('format_id') == format_spec)) # id

1856

1857

def selector_function(ctx):

1858

formats = list(ctx['formats'])

1859

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1860

if format_fallback and ctx['incomplete_formats'] and not matches:

1861

# for extractors with incomplete formats (audio only (soundcloud)

1862

# or video only (imgur)) best/worst will fallback to

1863

# best/worst {video,audio}-only format

1864

matches = formats

1865

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

1866

try:

1867

yield matches[format_idx - 1]

except IndexError:

return

filters = [self._build_format_filter(f) for f in selector.filters]

1872

1873

def final_selector(ctx):

1874

ctx_copy = copy.deepcopy(ctx)

1875

for _filter in filters:

1876

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1877

return selector_function(ctx_copy)

1878

return final_selector

1879

1880

stream = io.BytesIO(format_spec.encode('utf-8'))

1881

try:

1882

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1883

except tokenize.TokenError:

1884

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1885

1886

class TokenIterator(object):

1887

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1896

raise StopIteration()

1897

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1904

self.counter -= 1

1905

1906

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1907

return _build_selector_function(parsed_selector)

1908

1909

def _calc_headers(self, info_dict):

1910

res = std_headers.copy()

1911

1912

add_headers = info_dict.get('http_headers')

1913

if add_headers:

1914

res.update(add_headers)

1915

1916

cookies = self._calc_cookies(info_dict)

1917

if cookies:

1918

res['Cookie'] = cookies

1919

1920

if 'X-Forwarded-For' not in res:

1921

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1922

if x_forwarded_for_ip:

1923

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1928

pr = sanitized_Request(info_dict['url'])

1929

self.cookiejar.add_cookie_header(pr)

1930

return pr.get_header('Cookie')

1931

1932

def _sanitize_thumbnails(self, info_dict):

1933

thumbnails = info_dict.get('thumbnails')

1934

if thumbnails is None:

1935

thumbnail = info_dict.get('thumbnail')

1936

if thumbnail:

1937

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1938

if thumbnails:

1939

thumbnails.sort(key=lambda t: (

1940

t.get('preference') if t.get('preference') is not None else -1,

1941

t.get('width') if t.get('width') is not None else -1,

1942

t.get('height') if t.get('height') is not None else -1,

1943

t.get('id') if t.get('id') is not None else '',

1944

t.get('url')))

1945

1946

def test_thumbnail(t):

1947

self.to_screen('[info] Testing thumbnail %s' % t['id'])

1948

try:

1949

self.urlopen(HEADRequest(t['url']))

1950

except network_exceptions as err:

1951

self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (

1952

t['id'], t['url'], error_to_compat_str(err)))

return False

return True

for i, t in enumerate(thumbnails):

1957

if t.get('id') is None:

1958

t['id'] = '%d' % i

1959

if t.get('width') and t.get('height'):

1960

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1961

t['url'] = sanitize_url(t['url'])

1962

if self.params.get('check_formats'):

1963

info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()

1964

1965

def process_video_result(self, info_dict, download=True):

1966

assert info_dict.get('_type', 'video') == 'video'

1967

1968

if 'id' not in info_dict:

1969

raise ExtractorError('Missing "id" field in extractor result')

1970

if 'title' not in info_dict:

1971

raise ExtractorError('Missing "title" field in extractor result')

1972

1973

def report_force_conversion(field, field_not, conversion):

1974

self.report_warning(

1975

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1976

% (field, field_not, conversion))

1977

1978

def sanitize_string_field(info, string_field):

1979

field = info.get(string_field)

1980

if field is None or isinstance(field, compat_str):

1981

return

1982

report_force_conversion(string_field, 'a string', 'string')

1983

info[string_field] = compat_str(field)

1984

1985

def sanitize_numeric_fields(info):

1986

for numeric_field in self._NUMERIC_FIELDS:

1987

field = info.get(numeric_field)

1988

if field is None or isinstance(field, compat_numeric_types):

1989

continue

1990

report_force_conversion(numeric_field, 'numeric', 'int')

1991

info[numeric_field] = int_or_none(field)

1992

1993

sanitize_string_field(info_dict, 'id')

1994

sanitize_numeric_fields(info_dict)

1995

1996

if 'playlist' not in info_dict:

1997

# It isn't part of a playlist

1998

info_dict['playlist'] = None

1999

info_dict['playlist_index'] = None

2000

2001

self._sanitize_thumbnails(info_dict)

2002

2003

thumbnail = info_dict.get('thumbnail')

2004

thumbnails = info_dict.get('thumbnails')

2005

if thumbnail:

2006

info_dict['thumbnail'] = sanitize_url(thumbnail)

2007

elif thumbnails:

2008

info_dict['thumbnail'] = thumbnails[-1]['url']

2009

2010

if 'display_id' not in info_dict and 'id' in info_dict:

2011

info_dict['display_id'] = info_dict['id']

2012

2013

for ts_key, date_key in (

2014

('timestamp', 'upload_date'),

2015

('release_timestamp', 'release_date'),

2016

):

2017

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2018

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2019

# see http://bugs.python.org/issue1646728)

2020

try:

2021

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2022

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2023

except (ValueError, OverflowError, OSError):

2024

pass

2025

2026

# Auto generate title fields corresponding to the *_number fields when missing

2027

# in order to always have clean titles. This is very common for TV series.

2028

for field in ('chapter', 'season', 'episode'):

2029

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2030

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2031

2032

for cc_kind in ('subtitles', 'automatic_captions'):

2033

cc = info_dict.get(cc_kind)

2034

if cc:

2035

for _, subtitle in cc.items():

2036

for subtitle_format in subtitle:

2037

if subtitle_format.get('url'):

2038

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2039

if subtitle_format.get('ext') is None:

2040

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2041

2042

automatic_captions = info_dict.get('automatic_captions')

2043

subtitles = info_dict.get('subtitles')

2044

2045

info_dict['requested_subtitles'] = self.process_subtitles(

2046

info_dict['id'], subtitles, automatic_captions)

2047

2048

# We now pick which formats have to be downloaded

2049

if info_dict.get('formats') is None:

2050

# There's only one format available

2051

formats = [info_dict]

2052

else:

2053

formats = info_dict['formats']

2054

2055

if not formats:

2056

if not self.params.get('ignore_no_formats_error'):

2057

raise ExtractorError('No video formats found!')

2058

else:

2059

self.report_warning('No video formats found!')

2060

2061

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2066

'there is an error in extractor')

2067

return False

2068

if isinstance(url, bytes):

2069

sanitize_string_field(f, 'url')

2070

return True

2071

2072

# Filter out malformed formats for better extraction robustness

2073

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2078

for i, format in enumerate(formats):

2079

sanitize_string_field(format, 'format_id')

2080

sanitize_numeric_fields(format)

2081

format['url'] = sanitize_url(format['url'])

2082

if not format.get('format_id'):

2083

format['format_id'] = compat_str(i)

2084

else:

2085

# Sanitize format_id from characters used in format selector expression

2086

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2087

format_id = format['format_id']

2088

if format_id not in formats_dict:

2089

formats_dict[format_id] = []

2090

formats_dict[format_id].append(format)

2091

2092

# Make sure all formats have unique format_id

2093

for format_id, ambiguous_formats in formats_dict.items():

2094

if len(ambiguous_formats) > 1:

2095

for i, format in enumerate(ambiguous_formats):

2096

format['format_id'] = '%s-%d' % (format_id, i)

2097

2098

for i, format in enumerate(formats):

2099

if format.get('format') is None:

2100

format['format'] = '{id} - {res}{note}'.format(

2101

id=format['format_id'],

2102

res=self.format_resolution(format),

2103

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

2104

)

2105

# Automatically determine file extension if missing

2106

if format.get('ext') is None:

2107

format['ext'] = determine_ext(format['url']).lower()

2108

# Automatically determine protocol if missing (useful for format

2109

# selection purposes)

2110

if format.get('protocol') is None:

2111

format['protocol'] = determine_protocol(format)

2112

# Add HTTP headers, so that external programs can use them from the

2113

# json output

2114

full_format_info = info_dict.copy()

2115

full_format_info.update(format)

2116

format['http_headers'] = self._calc_headers(full_format_info)

2117

# Remove private housekeeping stuff

2118

if '__x_forwarded_for_ip' in info_dict:

2119

del info_dict['__x_forwarded_for_ip']

2120

2121

# TODO Central sorting goes here

2122

2123

if formats and formats[0] is not info_dict:

2124

# only set the 'formats' fields if the original info_dict list them

2125

# otherwise we end up with a circular reference, the first (and unique)

2126

# element in the 'formats' field in info_dict is info_dict itself,

2127

# which can't be exported to json

2128

info_dict['formats'] = formats

2129

2130

info_dict, _ = self.pre_process(info_dict)

2131

2132

list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')

2133

if list_only:

2134

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2135

if self.params.get('list_thumbnails'):

2136

self.list_thumbnails(info_dict)

2137

if self.params.get('listformats'):

2138

if not info_dict.get('formats'):

2139

raise ExtractorError('No video formats found', expected=True)

2140

self.list_formats(info_dict)

2141

if self.params.get('listsubtitles'):

2142

if 'automatic_captions' in info_dict:

2143

self.list_subtitles(

2144

info_dict['id'], automatic_captions, 'automatic captions')

2145

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2146

return

2147

2148

format_selector = self.format_selector

2149

if format_selector is None:

2150

req_format = self._default_format_spec(info_dict, download=download)

2151

self.write_debug('Default format spec: %s' % req_format)

2152

format_selector = self.build_format_selector(req_format)

2153

2154

# While in format selection we may need to have an access to the original

2155

# format set in order to calculate some metrics or do some processing.

2156

# For now we need to be able to guess whether original formats provided

2157

# by extractor are incomplete or not (i.e. whether extractor provides only

2158

# video-only or audio-only formats) for proper formats selection for

2159

# extractors with such incomplete formats (see

2160

# https://github.com/ytdl-org/youtube-dl/pull/5556).

2161

# Since formats may be filtered during format selection and may not match

2162

# the original formats the results may be incorrect. Thus original formats

2163

# or pre-calculated metrics should be passed to format selection routines

2164

# as well.

2165

# We will pass a context object containing all necessary additional data

2166

# instead of just formats.

2167

# This fixes incorrect format selection issue (see

2168

# https://github.com/ytdl-org/youtube-dl/issues/10083).

2169

incomplete_formats = (

2170

# All formats are video-only or

2171

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2172

# all formats are audio-only

2173

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

2178

}

2179

2180

formats_to_download = list(format_selector(ctx))

2181

if not formats_to_download:

2182

if not self.params.get('ignore_no_formats_error'):

2183

raise ExtractorError('Requested format is not available', expected=True)

2184

else:

2185

self.report_warning('Requested format is not available')

2186

# Process what we can, even without any available formats.

2187

self.process_info(dict(info_dict))

2188

elif download:

2189

self.to_screen(

2190

'[info] %s: Downloading %d format(s): %s' % (

2191

info_dict['id'], len(formats_to_download),

2192

", ".join([f['format_id'] for f in formats_to_download])))

2193

for fmt in formats_to_download:

2194

new_info = dict(info_dict)

2195

# Save a reference to the original info_dict so that it can be modified in process_info if needed

2196

new_info['__original_infodict'] = info_dict

2197

new_info.update(fmt)

2198

self.process_info(new_info)

2199

# We update the info dict with the best quality format (backwards compatibility)

2200

if formats_to_download:

2201

info_dict.update(formats_to_download[-1])

2202

return info_dict

2203

2204

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2205

"""Select the requested subtitles and their format"""

2206

available_subs = {}

2207

if normal_subtitles and self.params.get('writesubtitles'):

2208

available_subs.update(normal_subtitles)

2209

if automatic_captions and self.params.get('writeautomaticsub'):

2210

for lang, cap_info in automatic_captions.items():

2211

if lang not in available_subs:

2212

available_subs[lang] = cap_info

2213

2214

if (not self.params.get('writesubtitles') and not

2215

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = available_subs.keys()

2220

if self.params.get('allsubtitles', False):

2221

requested_langs = all_sub_langs

2222

elif self.params.get('subtitleslangs', False):

2223

requested_langs = set()

2224

for lang in self.params.get('subtitleslangs'):

2225

if lang == 'all':

2226

requested_langs.update(all_sub_langs)

2227

continue

2228

discard = lang[0] == '-'

2229

if discard:

2230

lang = lang[1:]

2231

current_langs = filter(re.compile(lang + '$').match, all_sub_langs)

2232

if discard:

2233

for lang in current_langs:

2234

requested_langs.discard(lang)

2235

else:

2236

requested_langs.update(current_langs)

2237

elif 'en' in available_subs:

2238

requested_langs = ['en']

2239

else:

2240

requested_langs = [list(all_sub_langs)[0]]

2241

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2242

2243

formats_query = self.params.get('subtitlesformat', 'best')

2244

formats_preference = formats_query.split('/') if formats_query else []

2245

subs = {}

2246

for lang in requested_langs:

2247

formats = available_subs.get(lang)

2248

if formats is None:

2249

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

2250

continue

2251

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2263

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

2268

def print_mandatory(field, actual_field=None):

2269

if actual_field is None:

2270

actual_field = field

2271

if (self.params.get('force%s' % field, False)

2272

and (not incomplete or info_dict.get(actual_field) is not None)):

2273

self.to_stdout(info_dict[actual_field])

2274

2275

def print_optional(field):

2276

if (self.params.get('force%s' % field, False)

2277

and info_dict.get(field) is not None):

2278

self.to_stdout(info_dict[field])

2279

2280

info_dict = info_dict.copy()

2281

if filename is not None:

2282

info_dict['filename'] = filename

2283

if info_dict.get('requested_formats') is not None:

2284

# For RTMP URLs, also include the playpath

2285

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2286

elif 'url' in info_dict:

2287

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2288

2289

for tmpl in self.params.get('forceprint', []):

2290

if re.match(r'\w+$', tmpl):

2291

tmpl = '%({})s'.format(tmpl)

2292

tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)

2293

self.to_stdout(tmpl % info_copy)

2294

2295

print_mandatory('title')

2296

print_mandatory('id')

2297

print_mandatory('url', 'urls')

2298

print_optional('thumbnail')

2299

print_optional('description')

2300

print_optional('filename')

2301

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

2302

self.to_stdout(formatSeconds(info_dict['duration']))

2303

print_mandatory('format')

2304

2305

if self.params.get('forcejson', False):

2306

self.post_extract(info_dict)

2307

self.to_stdout(json.dumps(info_dict, default=repr))

2308

2309

def dl(self, name, info, subtitle=False, test=False):

2310

2311

if test:

2312

verbose = self.params.get('verbose')

2313

params = {

2314

'test': True,

2315

'quiet': not verbose,

2316

'verbose': verbose,

2317

'noprogress': not verbose,

2318

'nopart': True,

2319

'skip_unavailable_fragments': False,

2320

'keep_fragments': False,

2321

'overwrites': True,

2322

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params)(self, params)

2327

if not test:

2328

for ph in self._progress_hooks:

2329

fd.add_progress_hook(ph)

2330

urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])

2331

self.write_debug('Invoking downloader on "%s"' % urls)

2332

new_info = dict(info)

2333

if new_info.get('http_headers') is None:

2334

new_info['http_headers'] = self._calc_headers(new_info)

2335

return fd.download(name, new_info, subtitle)

2336

2337

def process_info(self, info_dict):

2338

"""Process a single resolved IE result."""

2339

2340

assert info_dict.get('_type', 'video') == 'video'

2341

2342

info_dict.setdefault('__postprocessors', [])

2343

2344

max_downloads = self.params.get('max_downloads')

2345

if max_downloads is not None:

2346

if self._num_downloads >= int(max_downloads):

2347

raise MaxDownloadsReached()

2348

2349

# TODO: backward compatibility, to be removed

2350

info_dict['fulltitle'] = info_dict['title']

2351

2352

if 'format' not in info_dict and 'ext' in info_dict:

2353

info_dict['format'] = info_dict['ext']

2354

2355

if self._match_entry(info_dict) is not None:

2356

return

2357

2358

self.post_extract(info_dict)

2359

self._num_downloads += 1

2360

2361

# info_dict['_filename'] needs to be set for backward compatibility

2362

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2363

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2368

2369

if self.params.get('simulate', False):

2370

if self.params.get('force_write_download_archive', False):

2371

self.record_download_archive(info_dict)

2372

2373

# Do nothing else if in simulate mode

2374

return

2375

2376

if full_filename is None:

2377

return

2378

2379

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2380

return

2381

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2382

return

2383

2384

if self.params.get('writedescription', False):

2385

descfn = self.prepare_filename(info_dict, 'description')

2386

if not self._ensure_dir_exists(encodeFilename(descfn)):

2387

return

2388

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):

2389

self.to_screen('[info] Video description is already present')

2390

elif info_dict.get('description') is None:

2391

self.report_warning('There\'s no description to write.')

2392

else:

2393

try:

2394

self.to_screen('[info] Writing video description to: ' + descfn)

2395

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

2396

descfile.write(info_dict['description'])

2397

except (OSError, IOError):

2398

self.report_error('Cannot write description file ' + descfn)

2399

return

2400

2401

if self.params.get('writeannotations', False):

2402

annofn = self.prepare_filename(info_dict, 'annotation')

2403

if not self._ensure_dir_exists(encodeFilename(annofn)):

2404

return

2405

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2406

self.to_screen('[info] Video annotations are already present')

2407

elif not info_dict.get('annotations'):

2408

self.report_warning('There are no annotations to write.')

2409

else:

2410

try:

2411

self.to_screen('[info] Writing video annotations to: ' + annofn)

2412

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2413

annofile.write(info_dict['annotations'])

2414

except (KeyError, TypeError):

2415

self.report_warning('There are no annotations to write.')

2416

except (OSError, IOError):

2417

self.report_error('Cannot write annotations file: ' + annofn)

2418

return

2419

2420

subtitles_are_requested = any([self.params.get('writesubtitles', False),

2421

self.params.get('writeautomaticsub')])

2422

2423

if subtitles_are_requested and info_dict.get('requested_subtitles'):

2424

# subtitles download errors are already managed as troubles in relevant IE

2425

# that way it will silently go on when used with unsupporting IE

2426

subtitles = info_dict['requested_subtitles']

2427

# ie = self.get_info_extractor(info_dict['extractor_key'])

2428

for sub_lang, sub_info in subtitles.items():

2429

sub_format = sub_info['ext']

2430

sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))

2431

sub_filename_final = subtitles_filename(

2432

self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))

2433

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):

2434

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

2435

sub_info['filepath'] = sub_filename

2436

files_to_move[sub_filename] = sub_filename_final

2437

else:

2438

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

2439

if sub_info.get('data') is not None:

2440

try:

2441

# Use newline='' to prevent conversion of newline characters

2442

# See https://github.com/ytdl-org/youtube-dl/issues/10268

2443

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

2444

subfile.write(sub_info['data'])

2445

sub_info['filepath'] = sub_filename

2446

files_to_move[sub_filename] = sub_filename_final

2447

except (OSError, IOError):

2448

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

self.dl(sub_filename, sub_info.copy(), subtitle=True)

2453

sub_info['filepath'] = sub_filename

2454

files_to_move[sub_filename] = sub_filename_final

2455

except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

2456

self.report_warning('Unable to download subtitle for "%s": %s' %

2457

(sub_lang, error_to_compat_str(err)))

2458

continue

2459

2460

if self.params.get('writeinfojson', False):

2461

infofn = self.prepare_filename(info_dict, 'infojson')

2462

if not self._ensure_dir_exists(encodeFilename(infofn)):

2463

return

2464

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):

2465

self.to_screen('[info] Video metadata is already present')

2466

else:

2467

self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)

2468

try:

2469

write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)

2470

except (OSError, IOError):

2471

self.report_error('Cannot write video metadata to JSON file ' + infofn)

2472

return

2473

info_dict['__infojson_filename'] = infofn

2474

2475

for thumb_ext in self._write_thumbnails(info_dict, temp_filename):

2476

thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))

2477

thumb_filename = replace_extension(

2478

self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))

2479

files_to_move[thumb_filename_temp] = thumb_filename

2480

2481

# Write internet shortcut files

2482

url_link = webloc_link = desktop_link = False

2483

if self.params.get('writelink', False):

2484

if sys.platform == "darwin": # macOS.

2485

webloc_link = True

2486

elif sys.platform.startswith("linux"):

2487

desktop_link = True

2488

else: # if sys.platform in ['win32', 'cygwin']:

2489

url_link = True

2490

if self.params.get('writeurllink', False):

2491

url_link = True

2492

if self.params.get('writewebloclink', False):

2493

webloc_link = True

2494

if self.params.get('writedesktoplink', False):

2495

desktop_link = True

2496

2497

if url_link or webloc_link or desktop_link:

2498

if 'webpage_url' not in info_dict:

2499

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2500

return

2501

ascii_url = iri_to_uri(info_dict['webpage_url'])

2502

2503

def _write_link_file(extension, template, newline, embed_filename):

2504

linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))

2505

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2506

self.to_screen('[info] Internet shortcut is already present')

2507

else:

2508

try:

2509

self.to_screen('[info] Writing internet shortcut to: ' + linkfn)

2510

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:

2511

template_vars = {'url': ascii_url}

2512

if embed_filename:

2513

template_vars['filename'] = linkfn[:-(len(extension) + 1)]

2514

linkfile.write(template % template_vars)

2515

except (OSError, IOError):

2516

self.report_error('Cannot write internet shortcut ' + linkfn)

return False

return True

if url_link:

if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):

2522

return

2523

if webloc_link:

2524

if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):

2525

return

2526

if desktop_link:

2527

if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):

return

try:

info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2532

except PostProcessingError as err:

2533

self.report_error('Preprocessing: %s' % str(err))

2534

return

2535

2536

must_record_download_archive = False

2537

if self.params.get('skip_download', False):

2538

info_dict['filepath'] = temp_filename

2539

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2540

info_dict['__files_to_move'] = files_to_move

2541

info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)

else:

# Download

try:

def existing_file(*filepaths):

2547

ext = info_dict.get('ext')

2548

final_ext = self.params.get('final_ext', ext)

2549

existing_files = []

2550

for file in orderedSet(filepaths):

2551

if final_ext != ext:

2552

converted = replace_extension(file, final_ext, ext)

2553

if os.path.exists(encodeFilename(converted)):

2554

existing_files.append(converted)

2555

if os.path.exists(encodeFilename(file)):

2556

existing_files.append(file)

2557

2558

if not existing_files or self.params.get('overwrites', False):

2559

for file in orderedSet(existing_files):

2560

self.report_file_delete(file)

2561

os.remove(encodeFilename(file))

2562

return None

2563

2564

self.report_file_already_downloaded(existing_files[0])

2565

info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]

2566

return existing_files[0]

2567

2568

success = True

2569

if info_dict.get('requested_formats') is not None:

2570

2571

def compatible_formats(formats):

2572

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2573

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2574

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2575

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2580

COMPATIBLE_EXTS = (

2581

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2582

set(('webm',)),

2583

)

2584

for ext_sets in COMPATIBLE_EXTS:

2585

if ext_sets.issuperset(exts):

2586

return True

2587

# TODO: Check acodec/vcodec

2588

return False

2589

2590

requested_formats = info_dict['requested_formats']

2591

old_ext = info_dict['ext']

2592

if self.params.get('merge_output_format') is None:

2593

if not compatible_formats(requested_formats):

2594

info_dict['ext'] = 'mkv'

2595

self.report_warning(

2596

'Requested formats are incompatible for merge and will be merged into mkv.')

2597

if (info_dict['ext'] == 'webm'

2598

and self.params.get('writethumbnail', False)

2599

and info_dict.get('thumbnails')):

2600

info_dict['ext'] = 'mkv'

2601

self.report_warning(

2602

'webm doesn\'t support embedding a thumbnail, mkv will be used.')

2603

2604

def correct_ext(filename):

2605

filename_real_ext = os.path.splitext(filename)[1][1:]

2606

filename_wo_ext = (

2607

os.path.splitext(filename)[0]

2608

if filename_real_ext == old_ext

2609

else filename)

2610

return '%s.%s' % (filename_wo_ext, info_dict['ext'])

2611

2612

# Ensure filename always has a correct extension for successful merge

2613

full_filename = correct_ext(full_filename)

2614

temp_filename = correct_ext(temp_filename)

2615

dl_filename = existing_file(full_filename, temp_filename)

2616

info_dict['__real_download'] = False

2617

2618

_protocols = set(determine_protocol(f) for f in requested_formats)

2619

if len(_protocols) == 1:

2620

info_dict['protocol'] = _protocols.pop()

2621

directly_mergable = (

2622

'no-direct-merge' not in self.params.get('compat_opts', [])

2623

and info_dict.get('protocol') is not None # All requested formats have same protocol

2624

and not self.params.get('allow_unplayable_formats')

2625

and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')

2626

if directly_mergable:

2627

info_dict['url'] = requested_formats[0]['url']

2628

# Treat it as a single download

2629

dl_filename = existing_file(full_filename, temp_filename)

2630

if dl_filename is None:

2631

success, real_download = self.dl(temp_filename, info_dict)

2632

info_dict['__real_download'] = real_download

2633

else:

2634

downloaded = []

2635

merger = FFmpegMergerPP(self)

2636

if self.params.get('allow_unplayable_formats'):

2637

self.report_warning(

2638

'You have requested merging of multiple formats '

2639

'while also allowing unplayable formats to be downloaded. '

2640

'The formats won\'t be merged to prevent data corruption.')

2641

elif not merger.available:

2642

self.report_warning(

2643

'You have requested merging of multiple formats but ffmpeg is not installed. '

2644

'The formats won\'t be merged.')

2645

2646

if dl_filename is None:

2647

for f in requested_formats:

2648

new_info = dict(info_dict)

2649

del new_info['requested_formats']

2650

new_info.update(f)

2651

fname = prepend_extension(

2652

self.prepare_filename(new_info, 'temp'),

2653

'f%s' % f['format_id'], new_info['ext'])

2654

if not self._ensure_dir_exists(fname):

2655

return

2656

downloaded.append(fname)

2657

partial_success, real_download = self.dl(fname, new_info)

2658

info_dict['__real_download'] = info_dict['__real_download'] or real_download

2659

success = success and partial_success

2660

if merger.available and not self.params.get('allow_unplayable_formats'):

2661

info_dict['__postprocessors'].append(merger)

2662

info_dict['__files_to_merge'] = downloaded

2663

# Even if there were no downloads, it is being merged only now

2664

info_dict['__real_download'] = True

2665

else:

2666

for file in downloaded:

2667

files_to_move[file] = None

2668

else:

2669

# Just a single file

2670

dl_filename = existing_file(full_filename, temp_filename)

2671

if dl_filename is None:

2672

success, real_download = self.dl(temp_filename, info_dict)

2673

info_dict['__real_download'] = real_download

2674

2675

dl_filename = dl_filename or temp_filename

2676

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2677

2678

except network_exceptions as err:

2679

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2680

return

2681

except (OSError, IOError) as err:

2682

raise UnavailableVideoError(err)

2683

except (ContentTooShortError, ) as err:

2684

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2685

return

2686

2687

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

2692

vid = info_dict['id']

2693

2694

if fixup_policy in ('ignore', 'never'):

2695

return

2696

elif fixup_policy == 'warn':

2697

do_fixup = False

2698

elif fixup_policy != 'force':

2699

assert fixup_policy in ('detect_or_warn', None)

2700

if not info_dict.get('__real_download'):

2701

do_fixup = False

2702

2703

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

2712

else:

2713

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

2714

2715

stretched_ratio = info_dict.get('stretched_ratio')

2716

ffmpeg_fixup(

2717

stretched_ratio not in (1, None),

2718

f'Non-uniform pixel ratio {stretched_ratio}',

2719

FFmpegFixupStretchedPP)

2720

2721

ffmpeg_fixup(

2722

(info_dict.get('requested_formats') is None

2723

and info_dict.get('container') == 'm4a_dash'

2724

and info_dict.get('ext') == 'm4a'),

2725

'writing DASH m4a. Only some players support this container',

2726

FFmpegFixupM4aPP)

2727

2728

downloader = (get_suitable_downloader(info_dict, self.params).__name__

2729

if 'protocol' in info_dict else None)

2730

ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)

2731

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)

2732

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

info_dict = self.post_process(dl_filename, info_dict, files_to_move)

2737

except PostProcessingError as err:

2738

self.report_error('Postprocessing: %s' % str(err))

2739

return

2740

try:

2741

for ph in self._post_hooks:

2742

ph(info_dict['filepath'])

2743

except Exception as err:

2744

self.report_error('post hooks: %s' % str(err))

2745

return

2746

must_record_download_archive = True

2747

2748

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2749

self.record_download_archive(info_dict)

2750

max_downloads = self.params.get('max_downloads')

2751

if max_downloads is not None and self._num_downloads >= int(max_downloads):

2752

raise MaxDownloadsReached()

2753

2754

def download(self, url_list):

2755

"""Download a given list of URLs."""

2756

outtmpl = self.outtmpl_dict['default']

2757

if (len(url_list) > 1

2758

and outtmpl != '-'

2759

and '%' not in outtmpl

2760

and self.params.get('max_downloads') != 1):

2761

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2766

res = self.extract_info(

2767

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2768

except UnavailableVideoError:

2769

self.report_error('unable to download video')

2770

except MaxDownloadsReached:

2771

self.to_screen('[info] Maximum number of downloaded files reached')

2772

raise

2773

except ExistingVideoReached:

2774

self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')

2775

raise

2776

except RejectedVideoReached:

2777

self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')

2778

raise

2779

else:

2780

if self.params.get('dump_single_json', False):

2781

self.post_extract(res)

2782

self.to_stdout(json.dumps(res, default=repr))

2783

2784

return self._download_retcode

2785

2786

def download_with_info_file(self, info_filename):

2787

with contextlib.closing(fileinput.FileInput(

2788

[info_filename], mode='r',

2789

openhook=fileinput.hook_encoded('utf-8'))) as f:

2790

# FileInput doesn't have a read method, we can't call json.load

2791

info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

2792

try:

2793

self.process_ie_result(info, download=True)

2794

except (DownloadError, EntryNotInPlaylist, ThrottledDownload):

2795

webpage_url = info.get('webpage_url')

2796

if webpage_url is not None:

2797

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2798

return self.download([webpage_url])

2799

else:

2800

raise

2801

return self._download_retcode

2802

2803

@staticmethod

2804

def filter_requested_info(info_dict, actually_filter=True):

2805

remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict

2806

keep_keys = ['_type'], # Always keep this to facilitate load-info-json

2807

if actually_filter:

2808

remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')

2809

empty_values = (None, {}, [], set(), tuple())

2810

reject = lambda k, v: k not in keep_keys and (

2811

k.startswith('_') or k in remove_keys or v in empty_values)

2812

else:

2813

info_dict['epoch'] = int(time.time())

2814

reject = lambda k, v: k in remove_keys

2815

filter_fn = lambda obj: (

2816

list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))

2817

else obj if not isinstance(obj, dict)

2818

else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))

2819

return filter_fn(info_dict)

2820

2821

def run_pp(self, pp, infodict):

2822

files_to_delete = []

2823

if '__files_to_move' not in infodict:

2824

infodict['__files_to_move'] = {}

2825

files_to_delete, infodict = pp.run(infodict)

2826

if not files_to_delete:

2827

return infodict

2828

2829

if self.params.get('keepvideo', False):

2830

for f in files_to_delete:

2831

infodict['__files_to_move'].setdefault(f, '')

2832

else:

2833

for old_filename in set(files_to_delete):

2834

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2835

try:

2836

os.remove(encodeFilename(old_filename))

2837

except (IOError, OSError):

2838

self.report_warning('Unable to remove downloaded original file')

2839

if old_filename in infodict['__files_to_move']:

2840

del infodict['__files_to_move'][old_filename]

return infodict

@staticmethod

def post_extract(info_dict):

2845

def actual_post_extract(info_dict):

2846

if info_dict.get('_type') in ('playlist', 'multi_video'):

2847

for video_dict in info_dict.get('entries', {}):

2848

actual_post_extract(video_dict or {})

2849

return

2850

2851

post_extractor = info_dict.get('__post_extractor') or (lambda: {})

2852

extra = post_extractor().items()

2853

info_dict.update(extra)

2854

info_dict.pop('__post_extractor', None)

2855

2856

original_infodict = info_dict.get('__original_infodict') or {}

2857

original_infodict.update(extra)

2858

original_infodict.pop('__post_extractor', None)

2859

2860

actual_post_extract(info_dict or {})

2861

2862

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

2863

info = dict(ie_info)

2864

info['__files_to_move'] = files_to_move or {}

2865

for pp in self._pps[key]:

2866

info = self.run_pp(pp, info)

2867

return info, info.pop('__files_to_move', None)

2868

2869

def post_process(self, filename, ie_info, files_to_move=None):

2870

"""Run all the postprocessors on the given file."""

2871

info = dict(ie_info)

2872

info['filepath'] = filename

2873

info['__files_to_move'] = files_to_move or {}

2874

2875

for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:

2876

info = self.run_pp(pp, info)

2877

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

2878

del info['__files_to_move']

2879

for pp in self._pps['after_move']:

2880

info = self.run_pp(pp, info)

2881

return info

2882

2883

def _make_archive_id(self, info_dict):

2884

video_id = info_dict.get('id')

2885

if not video_id:

2886

return

2887

# Future-proof against any change in case

2888

# and backwards compatibility with prior versions

2889

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

2890

if extractor is None:

2891

url = str_or_none(info_dict.get('url'))

2892

if not url:

2893

return

2894

# Try to find matching extractor for the URL and take its ie_key

2895

for ie in self._ies:

2896

if ie.suitable(url):

2897

extractor = ie.ie_key()

break

else:

return

return '%s %s' % (extractor.lower(), video_id)

2902

2903

def in_download_archive(self, info_dict):

2904

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2909

if not vid_id:

2910

return False # Incomplete video information

2911

2912

return vid_id in self.archive

2913

2914

def record_download_archive(self, info_dict):

2915

fn = self.params.get('download_archive')

2916

if fn is None:

2917

return

2918

vid_id = self._make_archive_id(info_dict)

2919

assert vid_id

2920

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2921

archive_file.write(vid_id + '\n')

2922

self.archive.add(vid_id)

2923

2924

@staticmethod

2925

def format_resolution(format, default='unknown'):

2926

if format.get('vcodec') == 'none':

2927

if format.get('acodec') == 'none':

2928

return 'images'

2929

return 'audio only'

2930

if format.get('resolution') is not None:

2931

return format['resolution']

2932

if format.get('width') and format.get('height'):

2933

res = '%dx%d' % (format['width'], format['height'])

2934

elif format.get('height'):

2935

res = '%sp' % format['height']

2936

elif format.get('width'):

2937

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2943

res = ''

2944

if fdict.get('ext') in ['f4f', 'f4m']:

2945

res += '(unsupported) '

2946

if fdict.get('language'):

2947

if res:

2948

res += ' '

2949

res += '[%s] ' % fdict['language']

2950

if fdict.get('format_note') is not None:

2951

res += fdict['format_note'] + ' '

2952

if fdict.get('tbr') is not None:

2953

res += '%4dk ' % fdict['tbr']

2954

if fdict.get('container') is not None:

2955

if res:

2956

res += ', '

2957

res += '%s container' % fdict['container']

2958

if (fdict.get('vcodec') is not None

2959

and fdict.get('vcodec') != 'none'):

2960

if res:

2961

res += ', '

2962

res += fdict['vcodec']

2963

if fdict.get('vbr') is not None:

2964

res += '@'

2965

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2966

res += 'video@'

2967

if fdict.get('vbr') is not None:

2968

res += '%4dk' % fdict['vbr']

2969

if fdict.get('fps') is not None:

2970

if res:

2971

res += ', '

2972

res += '%sfps' % fdict['fps']

2973

if fdict.get('acodec') is not None:

2974

if res:

2975

res += ', '

2976

if fdict['acodec'] == 'none':

2977

res += 'video only'

2978

else:

2979

res += '%-5s' % fdict['acodec']

2980

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2985

res += '@%3dk' % fdict['abr']

2986

if fdict.get('asr') is not None:

2987

res += ' (%5dHz)' % fdict['asr']

2988

if fdict.get('filesize') is not None:

2989

if res:

2990

res += ', '

2991

res += format_bytes(fdict['filesize'])

2992

elif fdict.get('filesize_approx') is not None:

2993

if res:

2994

res += ', '

2995

res += '~' + format_bytes(fdict['filesize_approx'])

2996

return res

2997

2998

def _format_note_table(self, f):

2999

def join_fields(*vargs):

3000

return ', '.join((val for val in vargs if val != ''))

3001

3002

return join_fields(

3003

'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',

3004

format_field(f, 'language', '[%s]'),

3005

format_field(f, 'format_note'),

3006

format_field(f, 'container', ignore=(None, f.get('ext'))),

3007

format_field(f, 'asr', '%5dHz'))

3008

3009

def list_formats(self, info_dict):

3010

formats = info_dict.get('formats', [info_dict])

3011

new_format = (

3012

'list-formats' not in self.params.get('compat_opts', [])

3013

and self.params.get('listformats_table', True) is not False)

if new_format:

table = [

[

format_field(f, 'format_id'),

3018

format_field(f, 'ext'),

3019

self.format_resolution(f),

3020

format_field(f, 'fps', '%d'),

3021

'|',

3022

format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),

3023

format_field(f, 'tbr', '%4dk'),

3024

shorten_protocol_name(f.get('protocol', '').replace("native", "n")),

3025

'|',

3026

format_field(f, 'vcodec', default='unknown').replace('none', ''),

3027

format_field(f, 'vbr', '%4dk'),

3028

format_field(f, 'acodec', default='unknown').replace('none', ''),

3029

format_field(f, 'abr', '%3dk'),

3030

format_field(f, 'asr', '%5dHz'),

3031

self._format_note_table(f)]

3032

for f in formats

3033

if f.get('preference') is None or f['preference'] >= -1000]

3034

header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',

3035

'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']

else:

table = [

[

format_field(f, 'format_id'),

3040

format_field(f, 'ext'),

3041

self.format_resolution(f),

3042

self._format_note(f)]

3043

for f in formats

3044

if f.get('preference') is None or f['preference'] >= -1000]

3045

header_line = ['format code', 'extension', 'resolution', 'note']

3046

3047

self.to_screen(

3048

'[info] Available formats for %s:' % info_dict['id'])

3049

self.to_stdout(render_table(

3050

header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))

3051

3052

def list_thumbnails(self, info_dict):

3053

thumbnails = list(info_dict.get('thumbnails'))

3054

if not thumbnails:

3055

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

3060

self.to_stdout(render_table(

3061

['ID', 'width', 'height', 'URL'],

3062

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

3063

3064

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3065

if not subtitles:

3066

self.to_screen('%s has no %s' % (video_id, name))

3067

return

3068

self.to_screen(

3069

'Available %s for %s:' % (name, video_id))

3070

3071

def _row(lang, formats):

3072

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3073

if len(set(names)) == 1:

3074

names = [] if names[0] == 'unknown' else names[:1]

3075

return [lang, ', '.join(names), ', '.join(exts)]

3076

3077

self.to_stdout(render_table(

3078

['Language', 'Name', 'Formats'],

3079

[_row(lang, formats) for lang, formats in subtitles.items()],

3080

hideEmpty=True))

3081

3082

def urlopen(self, req):

3083

""" Start an HTTP download """

3084

if isinstance(req, compat_basestring):

3085

req = sanitized_Request(req)

3086

return self._opener.open(req, timeout=self._socket_timeout)

3087

3088

def print_debug_header(self):

3089

if not self.params.get('verbose'):

3090

return

3091

3092

if type('') is not compat_str:

3093

# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)

3094

self.report_warning(

3095

'Your Python is broken! Update to a newer and supported version')

3096

3097

stdout_encoding = getattr(

3098

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

3099

encoding_str = (

3100

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

3101

locale.getpreferredencoding(),

3102

sys.getfilesystemencoding(),

3103

stdout_encoding,

3104

self.get_encoding()))

3105

write_string(encoding_str, encoding=None)

3106

3107

source = (

3108

'(exe)' if hasattr(sys, 'frozen')

3109

else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)

3110

else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'

3111

else '')

3112

self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))

3113

if _LAZY_LOADER:

3114

self._write_string('[debug] Lazy loading extractors enabled\n')

3115

if _PLUGIN_CLASSES:

3116

self._write_string(

3117

'[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])

3118

if self.params.get('compat_opts'):

3119

self._write_string(

3120

'[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))

3121

try:

3122

sp = subprocess.Popen(

3123

['git', 'rev-parse', '--short', 'HEAD'],

3124

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3125

cwd=os.path.dirname(os.path.abspath(__file__)))

3126

out, err = process_communicate_or_kill(sp)

3127

out = out.decode().strip()

3128

if re.match('[0-9a-f]+', out):

3129

self._write_string('[debug] Git HEAD: %s\n' % out)

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

3137

impl_name = platform.python_implementation()

3138

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3139

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3140

return impl_name

3141

3142

self._write_string('[debug] Python version %s (%s %s) - %s\n' % (

3143

platform.python_version(),

3144

python_implementation(),

3145

platform.architecture()[0],

3146

platform_name()))

3147

3148

exe_versions = FFmpegPostProcessor.get_versions(self)

3149

exe_versions['rtmpdump'] = rtmpdump_version()

3150

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3151

exe_str = ', '.join(

3152

'%s %s' % (exe, v)

3153

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

3159

3160

proxy_map = {}

3161

for handler in self._opener.handlers:

3162

if hasattr(handler, 'proxies'):

3163

proxy_map.update(handler.proxies)

3164

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

3165

3166

if self.params.get('call_home', False):

3167

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3168

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

3169

return

3170

latest_version = self.urlopen(

3171

'https://yt-dl.org/latest/version').read().decode('utf-8')

3172

if version_tuple(latest_version) > version_tuple(__version__):

3173

self.report_warning(

3174

'You are using an outdated version (newest version: %s)! '

3175

'See https://yt-dl.org/update if you need help updating.' %

3176

latest_version)

3177

3178

def _setup_opener(self):

3179

timeout_val = self.params.get('socket_timeout')

3180

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

3181

3182

opts_cookiefile = self.params.get('cookiefile')

3183

opts_proxy = self.params.get('proxy')

3184

3185

if opts_cookiefile is None:

3186

self.cookiejar = compat_cookiejar.CookieJar()

3187

else:

3188

opts_cookiefile = expand_path(opts_cookiefile)

3189

self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)

3190

if os.access(opts_cookiefile, os.R_OK):

3191

self.cookiejar.load(ignore_discard=True, ignore_expires=True)

3192

3193

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3194

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3199

else:

3200

proxies = compat_urllib_request.getproxies()

3201

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3202

if 'http' in proxies and 'https' not in proxies:

3203

proxies['https'] = proxies['http']

3204

proxy_handler = PerRequestProxyHandler(proxies)

3205

3206

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3207

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3208

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3209

redirect_handler = YoutubeDLRedirectHandler()

3210

data_handler = compat_urllib_request_DataHandler()

3211

3212

# When passing our own FileHandler instance, build_opener won't add the

3213

# default FileHandler and allows us to disable the file protocol, which

3214

# can be used for malicious purposes (see

3215

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3216

file_handler = compat_urllib_request.FileHandler()

3217

3218

def file_open(*args, **kwargs):

3219

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3220

file_handler.file_open = file_open

3221

3222

opener = compat_urllib_request.build_opener(

3223

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3224

3225

# Delete the default user-agent header, which would otherwise apply in

3226

# cases where our custom HTTP handler doesn't come into play

3227

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3228

opener.addheaders = []

3229

self._opener = opener

3230

3231

def encode(self, s):

3232

if isinstance(s, bytes):

3233

return s # Already encoded

3234

3235

try:

3236

return s.encode(self.get_encoding())

3237

except UnicodeEncodeError as err:

3238

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3239

raise

3240

3241

def get_encoding(self):

3242

encoding = self.params.get('encoding')

3243

if encoding is None:

3244

encoding = preferredencoding()

3245

return encoding

3246

3247

def _write_thumbnails(self, info_dict, filename): # return the extensions

3248

write_all = self.params.get('write_all_thumbnails', False)

3249

thumbnails = []

3250

if write_all or self.params.get('writethumbnail', False):

3251

thumbnails = info_dict.get('thumbnails') or []

3252

multiple = write_all and len(thumbnails) > 1

3253

3254

ret = []

3255

for t in thumbnails[::-1]:

3256

thumb_ext = determine_ext(t['url'], 'jpg')

3257

suffix = '%s.' % t['id'] if multiple else ''

3258

thumb_display_id = '%s ' % t['id'] if multiple else ''

3259

thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))

3260

3261

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):

3262

ret.append(suffix + thumb_ext)

3263

t['filepath'] = thumb_filename

3264

self.to_screen('[%s] %s: Thumbnail %sis already present' %

3265

(info_dict['extractor'], info_dict['id'], thumb_display_id))

3266

else:

3267

self.to_screen('[%s] %s: Downloading thumbnail %s ...' %

3268

(info_dict['extractor'], info_dict['id'], thumb_display_id))

3269

try:

3270

uf = self.urlopen(t['url'])

3271

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3272

shutil.copyfileobj(uf, thumbf)

3273

ret.append(suffix + thumb_ext)

3274

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

3275

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

3276

t['filepath'] = thumb_filename

3277

except network_exceptions as err:

3278

self.report_warning('Unable to download thumbnail "%s": %s' %

3279

(t['url'], error_to_compat_str(err)))

3280

if ret and not write_all:

3281

break

3282

return ret