jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import datetime
	4	import errno
	5	import fileinput
	6	import functools
	7	import io
	8	import itertools
	9	import json
	10	import locale
	11	import operator
	12	import os
	13	import random
	14	import re
	15	import shutil
	16	import string
	17	import subprocess
	18	import sys
	19	import tempfile
	20	import time
	21	import tokenize
	22	import traceback
	23	import unicodedata
	24
	25	from .cache import Cache
	26	from .compat import urllib # isort: split
	27	from .compat import compat_os_name, compat_shlex_quote
	28	from .cookies import load_cookies
	29	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	30	from .downloader.rtmp import rtmpdump_version
	31	from .extractor import gen_extractor_classes, get_info_extractor
	32	from .extractor.common import UnsupportedURLIE
	33	from .extractor.openload import PhantomJSwrapper
	34	from .minicurses import format_text
	35	from .plugins import directories as plugin_directories
	36	from .postprocessor import _PLUGIN_CLASSES as plugin_pps
	37	from .postprocessor import (
	38	EmbedThumbnailPP,
	39	FFmpegFixupDuplicateMoovPP,
	40	FFmpegFixupDurationPP,
	41	FFmpegFixupM3u8PP,
	42	FFmpegFixupM4aPP,
	43	FFmpegFixupStretchedPP,
	44	FFmpegFixupTimestampPP,
	45	FFmpegMergerPP,
	46	FFmpegPostProcessor,
	47	FFmpegVideoConvertorPP,
	48	MoveFilesAfterDownloadPP,
	49	get_postprocessor,
	50	)
	51	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	52	from .update import REPOSITORY, current_git_head, detect_variant
	53	from .utils import (
	54	DEFAULT_OUTTMPL,
	55	IDENTITY,
	56	LINK_TEMPLATES,
	57	MEDIA_EXTENSIONS,
	58	NO_DEFAULT,
	59	NUMBER_RE,
	60	OUTTMPL_TYPES,
	61	POSTPROCESS_WHEN,
	62	STR_FORMAT_RE_TMPL,
	63	STR_FORMAT_TYPES,
	64	ContentTooShortError,
	65	DateRange,
	66	DownloadCancelled,
	67	DownloadError,
	68	EntryNotInPlaylist,
	69	ExistingVideoReached,
	70	ExtractorError,
	71	FormatSorter,
	72	GeoRestrictedError,
	73	HEADRequest,
	74	ISO3166Utils,
	75	LazyList,
	76	MaxDownloadsReached,
	77	Namespace,
	78	PagedList,
	79	PerRequestProxyHandler,
	80	PlaylistEntries,
	81	Popen,
	82	PostProcessingError,
	83	ReExtractInfo,
	84	RejectedVideoReached,
	85	SameFileError,
	86	UnavailableVideoError,
	87	UserNotLive,
	88	YoutubeDLCookieProcessor,
	89	YoutubeDLHandler,
	90	YoutubeDLRedirectHandler,
	91	age_restricted,
	92	args_to_str,
	93	bug_reports_message,
	94	date_from_str,
	95	deprecation_warning,
	96	determine_ext,
	97	determine_protocol,
	98	encode_compat_str,
	99	encodeFilename,
	100	error_to_compat_str,
	101	escapeHTML,
	102	expand_path,
	103	filter_dict,
	104	float_or_none,
	105	format_bytes,
	106	format_decimal_suffix,
	107	format_field,
	108	formatSeconds,
	109	get_compatible_ext,
	110	get_domain,
	111	int_or_none,
	112	iri_to_uri,
	113	is_path_like,
	114	join_nonempty,
	115	locked_file,
	116	make_archive_id,
	117	make_dir,
	118	make_HTTPS_handler,
	119	merge_headers,
	120	network_exceptions,
	121	number_of_digits,
	122	orderedSet,
	123	orderedSet_from_options,
	124	parse_filesize,
	125	preferredencoding,
	126	prepend_extension,
	127	remove_terminal_sequences,
	128	render_table,
	129	replace_extension,
	130	sanitize_filename,
	131	sanitize_path,
	132	sanitize_url,
	133	sanitized_Request,
	134	std_headers,
	135	str_or_none,
	136	strftime_or_none,
	137	subtitles_filename,
	138	supports_terminal_sequences,
	139	system_identifier,
	140	timetuple_from_msec,
	141	to_high_limit_path,
	142	traverse_obj,
	143	try_call,
	144	try_get,
	145	url_basename,
	146	variadic,
	147	version_tuple,
	148	windows_enable_vt_mode,
	149	write_json_file,
	150	write_string,
	151	)
	152	from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
	153
	154	if compat_os_name == 'nt':
	155	import ctypes
	156
	157
	158	class YoutubeDL:
	159	"""YoutubeDL class.
	160
	161	YoutubeDL objects are the ones responsible of downloading the
	162	actual video file and writing it to disk if the user has requested
	163	it, among some other tasks. In most cases there should be one per
	164	program. As, given a video URL, the downloader doesn't know how to
	165	extract all the needed information, task that InfoExtractors do, it
	166	has to pass the URL to one of them.
	167
	168	For this, YoutubeDL objects have a method that allows
	169	InfoExtractors to be registered in a given order. When it is passed
	170	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	171	finds that reports being able to handle it. The InfoExtractor extracts
	172	all the information about the video or videos the URL refers to, and
	173	YoutubeDL process the extracted information, possibly using a File
	174	Downloader to download the video.
	175
	176	YoutubeDL objects accept a lot of parameters. In order not to saturate
	177	the object constructor with arguments, it receives a dictionary of
	178	options instead. These options are available through the params
	179	attribute for the InfoExtractors to use. The YoutubeDL also
	180	registers itself as the downloader in charge for the InfoExtractors
	181	that are added to it, so this is a "mutual registration".
	182
	183	Available options:
	184
	185	username: Username for authentication purposes.
	186	password: Password for authentication purposes.
	187	videopassword: Password for accessing a video.
	188	ap_mso: Adobe Pass multiple-system operator identifier.
	189	ap_username: Multiple-system operator account username.
	190	ap_password: Multiple-system operator account password.
	191	usenetrc: Use netrc for authentication instead.
	192	netrc_location: Location of the netrc file. Defaults to ~/.netrc.
	193	netrc_cmd: Use a shell command to get credentials
	194	verbose: Print additional info to stdout.
	195	quiet: Do not print messages to stdout.
	196	no_warnings: Do not print out anything for warnings.
	197	forceprint: A dict with keys WHEN mapped to a list of templates to
	198	print to stdout. The allowed keys are video or any of the
	199	items in utils.POSTPROCESS_WHEN.
	200	For compatibility, a single list is also accepted
	201	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	202	a list of tuples with (template, filename)
	203	forcejson: Force printing info_dict as JSON.
	204	dump_single_json: Force printing the info_dict of the whole playlist
	205	(or video) as a single JSON line.
	206	force_write_download_archive: Force writing download archive regardless
	207	of 'skip_download' or 'simulate'.
	208	simulate: Do not download the video files. If unset (or None),
	209	simulate only if listsubtitles, listformats or list_thumbnails is used
	210	format: Video format code. see "FORMAT SELECTION" for more details.
	211	You can also pass a function. The function takes 'ctx' as
	212	argument and returns the formats to download.
	213	See "build_format_selector" for an implementation
	214	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	215	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	216	extracting metadata even if the video is not actually
	217	available for download (experimental)
	218	format_sort: A list of fields by which to sort the video formats.
	219	See "Sorting Formats" for more details.
	220	format_sort_force: Force the given format_sort. see "Sorting Formats"
	221	for more details.
	222	prefer_free_formats: Whether to prefer video formats with free containers
	223	over non-free ones of same quality.
	224	allow_multiple_video_streams: Allow multiple video streams to be merged
	225	into a single file
	226	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	227	into a single file
	228	check_formats Whether to test if the formats are downloadable.
	229	Can be True (check all), False (check none),
	230	'selected' (check selected formats),
	231	or None (check only if requested by extractor)
	232	paths: Dictionary of output paths. The allowed keys are 'home'
	233	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	234	outtmpl: Dictionary of templates for output names. Allowed keys
	235	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	236	For compatibility with youtube-dl, a single string can also be used
	237	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	238	restrictfilenames: Do not allow "&" and spaces in file names
	239	trim_file_name: Limit length of filename (extension excluded)
	240	windowsfilenames: Force the filenames to be windows compatible
	241	ignoreerrors: Do not stop on download/postprocessing errors.
	242	Can be 'only_download' to ignore only download errors.
	243	Default is 'only_download' for CLI, but False for API
	244	skip_playlist_after_errors: Number of allowed failures until the rest of
	245	the playlist is skipped
	246	allowed_extractors: List of regexes to match against extractor names that are allowed
	247	overwrites: Overwrite all video and metadata files if True,
	248	overwrite only non-video files if None
	249	and don't overwrite any file if False
	250	For compatibility with youtube-dl,
	251	"nooverwrites" may also be used instead
	252	playlist_items: Specific indices of playlist to download.
	253	playlistrandom: Download playlist items in random order.
	254	lazy_playlist: Process playlist entries as they are received.
	255	matchtitle: Download only matching titles.
	256	rejecttitle: Reject downloads for matching titles.
	257	logger: Log messages to a logging.Logger instance.
	258	logtostderr: Print everything to stderr instead of stdout.
	259	consoletitle: Display progress in console window's titlebar.
	260	writedescription: Write the video description to a .description file
	261	writeinfojson: Write the video description to a .info.json file
	262	clean_infojson: Remove internal metadata from the infojson
	263	getcomments: Extract video comments. This will not be written to disk
	264	unless writeinfojson is also given
	265	writeannotations: Write the video annotations to a .annotations.xml file
	266	writethumbnail: Write the thumbnail image to a file
	267	allow_playlist_files: Whether to write playlists' description, infojson etc
	268	also to disk when using the 'write*' options
	269	write_all_thumbnails: Write all thumbnail formats to files
	270	writelink: Write an internet shortcut file, depending on the
	271	current platform (.url/.webloc/.desktop)
	272	writeurllink: Write a Windows internet shortcut file (.url)
	273	writewebloclink: Write a macOS internet shortcut file (.webloc)
	274	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	275	writesubtitles: Write the video subtitles to a file
	276	writeautomaticsub: Write the automatically generated subtitles to a file
	277	listsubtitles: Lists all available subtitles for the video
	278	subtitlesformat: The format code for subtitles
	279	subtitleslangs: List of languages of the subtitles to download (can be regex).
	280	The list may contain "all" to refer to all the available
	281	subtitles. The language can be prefixed with a "-" to
	282	exclude it from the requested languages, e.g. ['all', '-live_chat']
	283	keepvideo: Keep the video file after post-processing
	284	daterange: A utils.DateRange object, download only if the upload_date is in the range.
	285	skip_download: Skip the actual download of the video file
	286	cachedir: Location of the cache files in the filesystem.
	287	False to disable filesystem cache.
	288	noplaylist: Download single video instead of a playlist if in doubt.
	289	age_limit: An integer representing the user's age in years.
	290	Unsuitable videos for the given age are skipped.
	291	min_views: An integer representing the minimum view count the video
	292	must have in order to not be skipped.
	293	Videos without view count information are always
	294	downloaded. None for no limit.
	295	max_views: An integer representing the maximum view count.
	296	Videos that are more popular than that are not
	297	downloaded.
	298	Videos without view count information are always
	299	downloaded. None for no limit.
	300	download_archive: A set, or the name of a file where all downloads are recorded.
	301	Videos already present in the file are not downloaded again.
	302	break_on_existing: Stop the download process after attempting to download a
	303	file that is in the archive.
	304	break_per_url: Whether break_on_reject and break_on_existing
	305	should act on each input URL as opposed to for the entire queue
	306	cookiefile: File name or text stream from where cookies should be read and dumped to
	307	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	308	name/path from where cookies are loaded, the name of the keyring,
	309	and the container name, e.g. ('chrome', ) or
	310	('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
	311	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	312	support RFC 5746 secure renegotiation
	313	nocheckcertificate: Do not verify SSL certificates
	314	client_certificate: Path to client certificate file in PEM format. May include the private key
	315	client_certificate_key: Path to private key file for client certificate
	316	client_certificate_password: Password for client certificate private key, if encrypted.
	317	If not provided and the key is encrypted, yt-dlp will ask interactively
	318	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	319	(Only supported by some extractors)
	320	enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
	321	http_headers: A dictionary of custom headers to be used for all requests
	322	proxy: URL of the proxy server to use
	323	geo_verification_proxy: URL of the proxy to use for IP address verification
	324	on geo-restricted sites.
	325	socket_timeout: Time to wait for unresponsive hosts, in seconds
	326	bidi_workaround: Work around buggy terminals without bidirectional text
	327	support, using fridibi
	328	debug_printtraffic:Print out sent and received HTTP traffic
	329	default_search: Prepend this string if an input url is not valid.
	330	'auto' for elaborate guessing
	331	encoding: Use this encoding instead of the system-specified.
	332	extract_flat: Whether to resolve and process url_results further
	333	* False: Always process. Default for API
	334	* True: Never process
	335	* 'in_playlist': Do not process inside playlist/multi_video
	336	* 'discard': Always process, but don't return the result
	337	from inside playlist/multi_video
	338	* 'discard_in_playlist': Same as "discard", but only for
	339	playlists (not multi_video). Default for CLI
	340	wait_for_video: If given, wait for scheduled streams to become available.
	341	The value should be a tuple containing the range
	342	(min_secs, max_secs) to wait between retries
	343	postprocessors: A list of dictionaries, each with an entry
	344	* key: The name of the postprocessor. See
	345	yt_dlp/postprocessor/__init__.py for a list.
	346	* when: When to run the postprocessor. Allowed values are
	347	the entries of utils.POSTPROCESS_WHEN
	348	Assumed to be 'post_process' if not given
	349	progress_hooks: A list of functions that get called on download
	350	progress, with a dictionary with the entries
	351	* status: One of "downloading", "error", or "finished".
	352	Check this first and ignore unknown values.
	353	* info_dict: The extracted info_dict
	354
	355	If status is one of "downloading", or "finished", the
	356	following properties may also be present:
	357	* filename: The final filename (always present)
	358	* tmpfilename: The filename we're currently writing to
	359	* downloaded_bytes: Bytes on disk
	360	* total_bytes: Size of the whole file, None if unknown
	361	* total_bytes_estimate: Guess of the eventual file size,
	362	None if unavailable.
	363	* elapsed: The number of seconds since download started.
	364	* eta: The estimated time in seconds, None if unknown
	365	* speed: The download speed in bytes/second, None if
	366	unknown
	367	* fragment_index: The counter of the currently
	368	downloaded video fragment.
	369	* fragment_count: The number of fragments (= individual
	370	files that will be merged)
	371
	372	Progress hooks are guaranteed to be called at least once
	373	(with status "finished") if the download is successful.
	374	postprocessor_hooks: A list of functions that get called on postprocessing
	375	progress, with a dictionary with the entries
	376	* status: One of "started", "processing", or "finished".
	377	Check this first and ignore unknown values.
	378	* postprocessor: Name of the postprocessor
	379	* info_dict: The extracted info_dict
	380
	381	Progress hooks are guaranteed to be called at least twice
	382	(with status "started" and "finished") if the processing is successful.
	383	merge_output_format: "/" separated list of extensions to use when merging formats.
	384	final_ext: Expected final extension; used to detect when the file was
	385	already downloaded and converted
	386	fixup: Automatically correct known faults of the file.
	387	One of:
	388	- "never": do nothing
	389	- "warn": only emit a warning
	390	- "detect_or_warn": check whether we can do anything
	391	about it, warn otherwise (default)
	392	source_address: Client-side IP address to bind to.
	393	sleep_interval_requests: Number of seconds to sleep between requests
	394	during extraction
	395	sleep_interval: Number of seconds to sleep before each download when
	396	used alone or a lower bound of a range for randomized
	397	sleep before each download (minimum possible number
	398	of seconds to sleep) when used along with
	399	max_sleep_interval.
	400	max_sleep_interval:Upper bound of a range for randomized sleep before each
	401	download (maximum possible number of seconds to sleep).
	402	Must only be used along with sleep_interval.
	403	Actual sleep time will be a random float from range
	404	[sleep_interval; max_sleep_interval].
	405	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	406	listformats: Print an overview of available video formats and exit.
	407	list_thumbnails: Print a table of all thumbnails and exit.
	408	match_filter: A function that gets called for every video with the signature
	409	(info_dict, *, incomplete: bool) -> Optional[str]
	410	For backward compatibility with youtube-dl, the signature
	411	(info_dict) -> Optional[str] is also allowed.
	412	- If it returns a message, the video is ignored.
	413	- If it returns None, the video is downloaded.
	414	- If it returns utils.NO_DEFAULT, the user is interactively
	415	asked whether to download the video.
	416	- Raise utils.DownloadCancelled(msg) to abort remaining
	417	downloads when a video is rejected.
	418	match_filter_func in utils.py is one example for this.
	419	color: A Dictionary with output stream names as keys
	420	and their respective color policy as values.
	421	Can also just be a single color policy,
	422	in which case it applies to all outputs.
	423	Valid stream names are 'stdout' and 'stderr'.
	424	Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
	425	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	426	HTTP header
	427	geo_bypass_country:
	428	Two-letter ISO 3166-2 country code that will be used for
	429	explicit geographic restriction bypassing via faking
	430	X-Forwarded-For HTTP header
	431	geo_bypass_ip_block:
	432	IP range in CIDR notation that will be used similarly to
	433	geo_bypass_country
	434	external_downloader: A dictionary of protocol keys and the executable of the
	435	external downloader to use for it. The allowed protocols
	436	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	437	Set the value to 'native' to use the native downloader
	438	compat_opts: Compatibility options. See "Differences in default behavior".
	439	The following options do not work when used through the API:
	440	filename, abort-on-error, multistreams, no-live-chat, format-sort
	441	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	442	Refer __init__.py for their implementation
	443	progress_template: Dictionary of templates for progress outputs.
	444	Allowed keys are 'download', 'postprocess',
	445	'download-title' (console title) and 'postprocess-title'.
	446	The template is mapped on a dictionary with keys 'progress' and 'info'
	447	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	448	as argument and returns the time to sleep in seconds.
	449	Allowed keys are 'http', 'fragment', 'file_access'
	450	download_ranges: A callback function that gets called for every video with
	451	the signature (info_dict, ydl) -> Iterable[Section].
	452	Only the returned sections will be downloaded.
	453	Each Section is a dict with the following keys:
	454	* start_time: Start time of the section in seconds
	455	* end_time: End time of the section in seconds
	456	* title: Section title (Optional)
	457	* index: Section number (Optional)
	458	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	459	noprogress: Do not print the progress bar
	460	live_from_start: Whether to download livestreams videos from the start
	461
	462	The following parameters are not used by YoutubeDL itself, they are used by
	463	the downloader (see yt_dlp/downloader/common.py):
	464	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	465	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	466	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	467	external_downloader_args, concurrent_fragment_downloads.
	468
	469	The following options are used by the post processors:
	470	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	471	to the binary or its containing directory.
	472	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	473	and a list of additional command-line arguments for the
	474	postprocessor/executable. The dict can also have "PP+EXE" keys
	475	which are used when the given exe is used by the given PP.
	476	Use 'default' as the name for arguments to passed to all PP
	477	For compatibility with youtube-dl, a single list of args
	478	can also be used
	479
	480	The following options are used by the extractors:
	481	extractor_retries: Number of times to retry for known errors (default: 3)
	482	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	483	hls_split_discontinuity: Split HLS playlists to different formats at
	484	discontinuities such as ad breaks (default: False)
	485	extractor_args: A dictionary of arguments to be passed to the extractors.
	486	See "EXTRACTOR ARGUMENTS" for details.
	487	E.g. {'youtube': {'skip': ['dash', 'hls']}}
	488	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	489
	490	The following options are deprecated and may be removed in the future:
	491
	492	break_on_reject: Stop the download process when encountering a video that
	493	has been filtered out.
	494	- `raise DownloadCancelled(msg)` in match_filter instead
	495	force_generic_extractor: Force downloader to use the generic extractor
	496	- Use allowed_extractors = ['generic', 'default']
	497	playliststart: - Use playlist_items
	498	Playlist item to start at.
	499	playlistend: - Use playlist_items
	500	Playlist item to end at.

1

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import string

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

from .cache import Cache

26

from .compat import urllib # isort: split

27

from .compat import compat_os_name, compat_shlex_quote

28

from .cookies import load_cookies

29

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

30

from .downloader.rtmp import rtmpdump_version

31

from .extractor import gen_extractor_classes, get_info_extractor

32

from .extractor.common import UnsupportedURLIE

33

from .extractor.openload import PhantomJSwrapper

34

from .minicurses import format_text

35

from .plugins import directories as plugin_directories

36

from .postprocessor import _PLUGIN_CLASSES as plugin_pps

37

from .postprocessor import (

38

EmbedThumbnailPP,

39

FFmpegFixupDuplicateMoovPP,

40

FFmpegFixupDurationPP,

41

FFmpegFixupM3u8PP,

42

FFmpegFixupM4aPP,

43

FFmpegFixupStretchedPP,

44

FFmpegFixupTimestampPP,

45

FFmpegMergerPP,

46

FFmpegPostProcessor,

47

FFmpegVideoConvertorPP,

48

MoveFilesAfterDownloadPP,

49

get_postprocessor,

50

)

51

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

52

from .update import REPOSITORY, current_git_head, detect_variant

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

FormatSorter,

GeoRestrictedError,

HEADRequest,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

85

SameFileError,

86

UnavailableVideoError,

87

UserNotLive,

88

YoutubeDLCookieProcessor,

89

YoutubeDLHandler,

90

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

bug_reports_message,

date_from_str,

deprecation_warning,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

escapeHTML,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

is_path_like,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

orderedSet_from_options,

parse_filesize,

preferredencoding,

prepend_extension,

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__

153

154

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

162

actual video file and writing it to disk if the user has requested

163

it, among some other tasks. In most cases there should be one per

164

program. As, given a video URL, the downloader doesn't know how to

165

extract all the needed information, task that InfoExtractors do, it

166

has to pass the URL to one of them.

167

168

For this, YoutubeDL objects have a method that allows

169

InfoExtractors to be registered in a given order. When it is passed

170

a URL, the YoutubeDL object handles it to the first InfoExtractor it

171

finds that reports being able to handle it. The InfoExtractor extracts

172

all the information about the video or videos the URL refers to, and

173

YoutubeDL process the extracted information, possibly using a File

174

Downloader to download the video.

175

176

YoutubeDL objects accept a lot of parameters. In order not to saturate

177

the object constructor with arguments, it receives a dictionary of

178

options instead. These options are available through the params

179

attribute for the InfoExtractors to use. The YoutubeDL also

180

registers itself as the downloader in charge for the InfoExtractors

181

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

186

password: Password for authentication purposes.

187

videopassword: Password for accessing a video.

188

ap_mso: Adobe Pass multiple-system operator identifier.

189

ap_username: Multiple-system operator account username.

190

ap_password: Multiple-system operator account password.

191

usenetrc: Use netrc for authentication instead.

192

netrc_location: Location of the netrc file. Defaults to ~/.netrc.

193

netrc_cmd: Use a shell command to get credentials

194

verbose: Print additional info to stdout.

195

quiet: Do not print messages to stdout.

196

no_warnings: Do not print out anything for warnings.

197

forceprint: A dict with keys WHEN mapped to a list of templates to

198

print to stdout. The allowed keys are video or any of the

199

items in utils.POSTPROCESS_WHEN.

200

For compatibility, a single list is also accepted

201

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

202

a list of tuples with (template, filename)

203

forcejson: Force printing info_dict as JSON.

204

dump_single_json: Force printing the info_dict of the whole playlist

205

(or video) as a single JSON line.

206

force_write_download_archive: Force writing download archive regardless

207

of 'skip_download' or 'simulate'.

208

simulate: Do not download the video files. If unset (or None),

209

simulate only if listsubtitles, listformats or list_thumbnails is used

210

format: Video format code. see "FORMAT SELECTION" for more details.

211

You can also pass a function. The function takes 'ctx' as

212

argument and returns the formats to download.

213

See "build_format_selector" for an implementation

214

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

215

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

216

extracting metadata even if the video is not actually

217

available for download (experimental)

218

format_sort: A list of fields by which to sort the video formats.

219

See "Sorting Formats" for more details.

220

format_sort_force: Force the given format_sort. see "Sorting Formats"

221

for more details.

222

prefer_free_formats: Whether to prefer video formats with free containers

223

over non-free ones of same quality.

224

allow_multiple_video_streams: Allow multiple video streams to be merged

225

into a single file

226

allow_multiple_audio_streams: Allow multiple audio streams to be merged

227

into a single file

228

check_formats Whether to test if the formats are downloadable.

229

Can be True (check all), False (check none),

230

'selected' (check selected formats),

231

or None (check only if requested by extractor)

232

paths: Dictionary of output paths. The allowed keys are 'home'

233

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

234

outtmpl: Dictionary of templates for output names. Allowed keys

235

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

236

For compatibility with youtube-dl, a single string can also be used

237

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

238

restrictfilenames: Do not allow "&" and spaces in file names

239

trim_file_name: Limit length of filename (extension excluded)

240

windowsfilenames: Force the filenames to be windows compatible

241

ignoreerrors: Do not stop on download/postprocessing errors.

242

Can be 'only_download' to ignore only download errors.

243

Default is 'only_download' for CLI, but False for API

244

skip_playlist_after_errors: Number of allowed failures until the rest of

245

the playlist is skipped

246

allowed_extractors: List of regexes to match against extractor names that are allowed

247

overwrites: Overwrite all video and metadata files if True,

248

overwrite only non-video files if None

249

and don't overwrite any file if False

250

For compatibility with youtube-dl,

251

"nooverwrites" may also be used instead

252

playlist_items: Specific indices of playlist to download.

253

playlistrandom: Download playlist items in random order.

254

lazy_playlist: Process playlist entries as they are received.

255

matchtitle: Download only matching titles.

256

rejecttitle: Reject downloads for matching titles.

257

logger: Log messages to a logging.Logger instance.

258

logtostderr: Print everything to stderr instead of stdout.

259

consoletitle: Display progress in console window's titlebar.

260

writedescription: Write the video description to a .description file

261

writeinfojson: Write the video description to a .info.json file

262

clean_infojson: Remove internal metadata from the infojson

263

getcomments: Extract video comments. This will not be written to disk

264

unless writeinfojson is also given

265

writeannotations: Write the video annotations to a .annotations.xml file

266

writethumbnail: Write the thumbnail image to a file

267

allow_playlist_files: Whether to write playlists' description, infojson etc

268

also to disk when using the 'write*' options

269

write_all_thumbnails: Write all thumbnail formats to files

270

writelink: Write an internet shortcut file, depending on the

271

current platform (.url/.webloc/.desktop)

272

writeurllink: Write a Windows internet shortcut file (.url)

273

writewebloclink: Write a macOS internet shortcut file (.webloc)

274

writedesktoplink: Write a Linux internet shortcut file (.desktop)

275

writesubtitles: Write the video subtitles to a file

276

writeautomaticsub: Write the automatically generated subtitles to a file

277

listsubtitles: Lists all available subtitles for the video

278

subtitlesformat: The format code for subtitles

279

subtitleslangs: List of languages of the subtitles to download (can be regex).

280

The list may contain "all" to refer to all the available

281

subtitles. The language can be prefixed with a "-" to

282

exclude it from the requested languages, e.g. ['all', '-live_chat']

283

keepvideo: Keep the video file after post-processing

284

daterange: A utils.DateRange object, download only if the upload_date is in the range.

285

skip_download: Skip the actual download of the video file

286

cachedir: Location of the cache files in the filesystem.

287

False to disable filesystem cache.

288

noplaylist: Download single video instead of a playlist if in doubt.

289

age_limit: An integer representing the user's age in years.

290

Unsuitable videos for the given age are skipped.

291

min_views: An integer representing the minimum view count the video

292

must have in order to not be skipped.

293

Videos without view count information are always

294

downloaded. None for no limit.

295

max_views: An integer representing the maximum view count.

296

Videos that are more popular than that are not

297

downloaded.

298

Videos without view count information are always

299

downloaded. None for no limit.

300

download_archive: A set, or the name of a file where all downloads are recorded.

301

Videos already present in the file are not downloaded again.

302

break_on_existing: Stop the download process after attempting to download a

303

file that is in the archive.

304

break_per_url: Whether break_on_reject and break_on_existing

305

should act on each input URL as opposed to for the entire queue

306

cookiefile: File name or text stream from where cookies should be read and dumped to

307

cookiesfrombrowser: A tuple containing the name of the browser, the profile

308

name/path from where cookies are loaded, the name of the keyring,

309

and the container name, e.g. ('chrome', ) or

310

('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')

311

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

312

support RFC 5746 secure renegotiation

313

nocheckcertificate: Do not verify SSL certificates

314

client_certificate: Path to client certificate file in PEM format. May include the private key

315

client_certificate_key: Path to private key file for client certificate

316

client_certificate_password: Password for client certificate private key, if encrypted.

317

If not provided and the key is encrypted, yt-dlp will ask interactively

318

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

319

(Only supported by some extractors)

320

enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.

321

http_headers: A dictionary of custom headers to be used for all requests

322

proxy: URL of the proxy server to use

323

geo_verification_proxy: URL of the proxy to use for IP address verification

324

on geo-restricted sites.

325

socket_timeout: Time to wait for unresponsive hosts, in seconds

326

bidi_workaround: Work around buggy terminals without bidirectional text

327

support, using fridibi

328

debug_printtraffic:Print out sent and received HTTP traffic

329

default_search: Prepend this string if an input url is not valid.

330

'auto' for elaborate guessing

331

encoding: Use this encoding instead of the system-specified.

332

extract_flat: Whether to resolve and process url_results further

333

* False: Always process. Default for API

334

* True: Never process

335

* 'in_playlist': Do not process inside playlist/multi_video

336

* 'discard': Always process, but don't return the result

337

from inside playlist/multi_video

338

* 'discard_in_playlist': Same as "discard", but only for

339

playlists (not multi_video). Default for CLI

340

wait_for_video: If given, wait for scheduled streams to become available.

341

The value should be a tuple containing the range

342

(min_secs, max_secs) to wait between retries

343

postprocessors: A list of dictionaries, each with an entry

344

* key: The name of the postprocessor. See

345

yt_dlp/postprocessor/__init__.py for a list.

346

* when: When to run the postprocessor. Allowed values are

347

the entries of utils.POSTPROCESS_WHEN

348

Assumed to be 'post_process' if not given

349

progress_hooks: A list of functions that get called on download

350

progress, with a dictionary with the entries

351

* status: One of "downloading", "error", or "finished".

352

Check this first and ignore unknown values.

353

* info_dict: The extracted info_dict

354

355

If status is one of "downloading", or "finished", the

356

following properties may also be present:

357

* filename: The final filename (always present)

358

* tmpfilename: The filename we're currently writing to

359

* downloaded_bytes: Bytes on disk

360

* total_bytes: Size of the whole file, None if unknown

361

* total_bytes_estimate: Guess of the eventual file size,

362

None if unavailable.

363

* elapsed: The number of seconds since download started.

364

* eta: The estimated time in seconds, None if unknown

365

* speed: The download speed in bytes/second, None if

366

unknown

367

* fragment_index: The counter of the currently

368

downloaded video fragment.

369

* fragment_count: The number of fragments (= individual

370

files that will be merged)

371

372

Progress hooks are guaranteed to be called at least once

373

(with status "finished") if the download is successful.

374

postprocessor_hooks: A list of functions that get called on postprocessing

375

progress, with a dictionary with the entries

376

* status: One of "started", "processing", or "finished".

377

Check this first and ignore unknown values.

378

* postprocessor: Name of the postprocessor

379

* info_dict: The extracted info_dict

380

381

Progress hooks are guaranteed to be called at least twice

382

(with status "started" and "finished") if the processing is successful.

383

merge_output_format: "/" separated list of extensions to use when merging formats.

384

final_ext: Expected final extension; used to detect when the file was

385

already downloaded and converted

386

fixup: Automatically correct known faults of the file.

387

One of:

388

- "never": do nothing

389

- "warn": only emit a warning

390

- "detect_or_warn": check whether we can do anything

391

about it, warn otherwise (default)

392

source_address: Client-side IP address to bind to.

393

sleep_interval_requests: Number of seconds to sleep between requests

394

during extraction

395

sleep_interval: Number of seconds to sleep before each download when

396

used alone or a lower bound of a range for randomized

397

sleep before each download (minimum possible number

398

of seconds to sleep) when used along with

399

max_sleep_interval.

400

max_sleep_interval:Upper bound of a range for randomized sleep before each

401

download (maximum possible number of seconds to sleep).

402

Must only be used along with sleep_interval.

403

Actual sleep time will be a random float from range

404

[sleep_interval; max_sleep_interval].

405

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

406

listformats: Print an overview of available video formats and exit.

407

list_thumbnails: Print a table of all thumbnails and exit.

408

match_filter: A function that gets called for every video with the signature

409

(info_dict, *, incomplete: bool) -> Optional[str]

410

For backward compatibility with youtube-dl, the signature

411

(info_dict) -> Optional[str] is also allowed.

412

- If it returns a message, the video is ignored.

413

- If it returns None, the video is downloaded.

414

- If it returns utils.NO_DEFAULT, the user is interactively

415

asked whether to download the video.

416

- Raise utils.DownloadCancelled(msg) to abort remaining

417

downloads when a video is rejected.

418

match_filter_func in utils.py is one example for this.

419

color: A Dictionary with output stream names as keys

420

and their respective color policy as values.

421

Can also just be a single color policy,

422

in which case it applies to all outputs.

423

Valid stream names are 'stdout' and 'stderr'.

424

Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.

425

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

426

HTTP header

427

geo_bypass_country:

428

Two-letter ISO 3166-2 country code that will be used for

429

explicit geographic restriction bypassing via faking

430

X-Forwarded-For HTTP header

431

geo_bypass_ip_block:

432

IP range in CIDR notation that will be used similarly to

433

geo_bypass_country

434

external_downloader: A dictionary of protocol keys and the executable of the

435

external downloader to use for it. The allowed protocols

436

437

Set the value to 'native' to use the native downloader

438

compat_opts: Compatibility options. See "Differences in default behavior".

439

The following options do not work when used through the API:

440

filename, abort-on-error, multistreams, no-live-chat, format-sort

441

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

442

Refer __init__.py for their implementation

443

progress_template: Dictionary of templates for progress outputs.

444

Allowed keys are 'download', 'postprocess',

445

'download-title' (console title) and 'postprocess-title'.

446

The template is mapped on a dictionary with keys 'progress' and 'info'

447

retry_sleep_functions: Dictionary of functions that takes the number of attempts

448

as argument and returns the time to sleep in seconds.

449

Allowed keys are 'http', 'fragment', 'file_access'

450

download_ranges: A callback function that gets called for every video with

451

the signature (info_dict, ydl) -> Iterable[Section].

452

Only the returned sections will be downloaded.

453

Each Section is a dict with the following keys:

454

* start_time: Start time of the section in seconds

455

* end_time: End time of the section in seconds

456

* title: Section title (Optional)

457

* index: Section number (Optional)

458

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

459

noprogress: Do not print the progress bar

460

live_from_start: Whether to download livestreams videos from the start

461

462

The following parameters are not used by YoutubeDL itself, they are used by

463

the downloader (see yt_dlp/downloader/common.py):

464

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

465

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

466

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

467

external_downloader_args, concurrent_fragment_downloads.

468

469

The following options are used by the post processors:

470

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

471

to the binary or its containing directory.

472

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

473

and a list of additional command-line arguments for the

474

postprocessor/executable. The dict can also have "PP+EXE" keys

475

which are used when the given exe is used by the given PP.

476

Use 'default' as the name for arguments to passed to all PP

477

For compatibility with youtube-dl, a single list of args

478

can also be used

479

480

The following options are used by the extractors:

481

extractor_retries: Number of times to retry for known errors (default: 3)

482

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

483

hls_split_discontinuity: Split HLS playlists to different formats at

484

discontinuities such as ad breaks (default: False)

485

extractor_args: A dictionary of arguments to be passed to the extractors.

486

See "EXTRACTOR ARGUMENTS" for details.

487

E.g. {'youtube': {'skip': ['dash', 'hls']}}

488

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

489

490

The following options are deprecated and may be removed in the future:

491

492

break_on_reject: Stop the download process when encountering a video that

493

has been filtered out.

494

- `raise DownloadCancelled(msg)` in match_filter instead

495

force_generic_extractor: Force downloader to use the generic extractor

496

- Use allowed_extractors = ['generic', 'default']

497

playliststart: - Use playlist_items

498

Playlist item to start at.

499

playlistend: - Use playlist_items

500

Playlist item to end at.

501

playlistreverse: - Use playlist_items

502

Download playlist items in reverse order.

503

forceurl: - Use forceprint

504

Force printing final URL.

505

forcetitle: - Use forceprint

506

Force printing title.

507

forceid: - Use forceprint

508

Force printing ID.

509

forcethumbnail: - Use forceprint

510

Force printing thumbnail URL.

511

forcedescription: - Use forceprint

512

Force printing description.

513

forcefilename: - Use forceprint

514

Force printing final filename.

515

forceduration: - Use forceprint

516

Force printing duration.

517

allsubtitles: - Use subtitleslangs = ['all']

518

Downloads all the subtitles of the video

519

(requires writesubtitles or writeautomaticsub)

520

include_ads: - Doesn't work

521

Download ads as well

522

call_home: - Not implemented

523

Boolean, true iff we are allowed to contact the

524

yt-dlp servers for debugging.

525

post_hooks: - Register a custom postprocessor

526

A list of functions that get called as the final step

527

for each video file, after all postprocessors have been

528

called. The filename will be passed as the only argument.

529

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

530

Use the native HLS downloader instead of ffmpeg/avconv

531

if True, otherwise use ffmpeg/avconv if False, otherwise

532

use downloader suggested by extractor if None.

533

prefer_ffmpeg: - avconv support is deprecated

534

If False, use avconv instead of ffmpeg if both are available,

535

otherwise prefer ffmpeg.

536

youtube_include_dash_manifest: - Use extractor_args

537

If True (default), DASH manifests and related

538

data will be downloaded and processed by extractor.

539

You can reduce network I/O by disabling it if you don't

540

care about DASH. (only for youtube)

541

youtube_include_hls_manifest: - Use extractor_args

542

If True (default), HLS manifests and related

543

data will be downloaded and processed by extractor.

544

You can reduce network I/O by disabling it if you don't

545

care about HLS. (only for youtube)

546

no_color: Same as `color='no_color'`

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

551

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

552

'timestamp', 'release_timestamp',

553

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

554

'average_rating', 'comment_count', 'age_limit',

555

'start_time', 'end_time',

556

'chapter_number', 'season_number', 'episode_number',

557

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

562

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

563

'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

564

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',

565

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

566

'preference', 'language', 'language_preference', 'quality', 'source_preference',

567

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',

568

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

569

}

570

_format_selection_exts = {

571

'audio': set(MEDIA_EXTENSIONS.common_audio),

572

'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),

573

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

574

}

575

576

def __init__(self, params=None, auto_init=True):

577

"""Create a FileDownloader object with the given options.

578

@param auto_init Whether to load the default extractors and print header (if verbose).

579

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

586

self._pps = {k: [] for k in POSTPROCESS_WHEN}

587

self._printed_messages = set()

588

self._first_webpage_request = True

589

self._post_hooks = []

590

self._progress_hooks = []

591

self._postprocessor_hooks = []

592

self._download_retcode = 0

593

self._num_downloads = 0

594

self._num_videos = 0

595

self._playlist_level = 0

596

self._playlist_urls = set()

597

self.cache = Cache(self)

598

599

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

600

self._out_files = Namespace(

601

out=stdout,

602

error=sys.stderr,

603

screen=sys.stderr if self.params.get('quiet') else stdout,

604

console=None if compat_os_name == 'nt' else next(

605

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

)

try:

windows_enable_vt_mode()

610

except Exception as e:

611

self.write_debug(f'Failed to enable VT mode: {e}')

612

613

if self.params.get('no_color'):

614

if self.params.get('color') is not None:

615

self.report_warning('Overwriting params from "color" with "no_color"')

616

self.params['color'] = 'no_color'

617

618

term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'

619

620

def process_color_policy(stream):

621

stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]

622

policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)

623

if policy in ('auto', None):

624

return term_allow_color and supports_terminal_sequences(stream)

625

assert policy in ('always', 'never', 'no_color')

626

return {'always': True, 'never': False}.get(policy, policy)

627

628

self._allow_colors = Namespace(**{

629

name: process_color_policy(stream)

630

for name, stream in self._out_files.items_ if name != 'console'

631

})

632

633

# The code is left like this to be reused for future deprecations

634

MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)

635

current_version = sys.version_info[:2]

636

if current_version < MIN_RECOMMENDED:

637

msg = ('Support for Python version %d.%d has been deprecated. '

638

'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'

639

'\n You will no longer receive updates on this version')

640

if current_version < MIN_SUPPORTED:

641

msg = 'Python version %d.%d is no longer supported'

642

self.deprecated_feature(

643

f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))

644

645

if self.params.get('allow_unplayable_formats'):

646

self.report_warning(

647

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

648

'This is a developer option intended for debugging. \n'

649

' If you experience any issues while using this option, '

650

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

651

652

if self.params.get('bidi_workaround', False):

653

try:

654

import pty

655

master, slave = pty.openpty()

656

width = shutil.get_terminal_size().columns

657

width_args = [] if width is None else ['-w', str(width)]

658

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

659

try:

660

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

661

except OSError:

662

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

663

self._output_channel = os.fdopen(master, 'rb')

664

except OSError as ose:

665

if ose.errno == errno.ENOENT:

666

self.report_warning(

667

'Could not find fribidi executable, ignoring --bidi-workaround. '

668

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

673

if auto_init and auto_init != 'no_verbose_header':

674

self.print_debug_header()

675

676

def check_deprecated(param, option, suggestion):

677

if self.params.get(param) is not None:

678

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

683

if self.params.get('geo_verification_proxy') is None:

684

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

685

686

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

687

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

688

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

689

690

for msg in self.params.get('_warnings', []):

691

self.report_warning(msg)

692

for msg in self.params.get('_deprecation_warnings', []):

693

self.deprecated_feature(msg)

694

695

if 'list-formats' in self.params['compat_opts']:

696

self.params['listformats_table'] = False

697

698

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

699

# nooverwrites was unnecessarily changed to overwrites

700

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

701

# This ensures compatibility with both keys

702

self.params['overwrites'] = not self.params['nooverwrites']

703

elif self.params.get('overwrites') is None:

704

self.params.pop('overwrites', None)

705

else:

706

self.params['nooverwrites'] = not self.params['overwrites']

707

708

if self.params.get('simulate') is None and any((

709

self.params.get('list_thumbnails'),

710

self.params.get('listformats'),

711

self.params.get('listsubtitles'),

712

)):

713

self.params['simulate'] = 'list_only'

714

715

self.params.setdefault('forceprint', {})

716

self.params.setdefault('print_to_file', {})

717

718

# Compatibility with older syntax

719

if not isinstance(params['forceprint'], dict):

720

self.params['forceprint'] = {'video': params['forceprint']}

721

722

if auto_init:

723

self.add_default_info_extractors()

724

725

if (sys.platform != 'win32'

726

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

727

and not self.params.get('restrictfilenames', False)):

728

# Unicode filesystem API will throw errors (#1474, #13027)

729

self.report_warning(

730

'Assuming --restrict-filenames since file system encoding '

731

'cannot encode all characters. '

732

'Set the LC_ALL environment variable to fix this.')

733

self.params['restrictfilenames'] = True

734

735

self._parse_outtmpl()

736

737

# Creating format selector here allows us to catch syntax errors before the extraction

738

self.format_selector = (

739

self.params.get('format') if self.params.get('format') in (None, '-')

740

else self.params['format'] if callable(self.params['format'])

741

else self.build_format_selector(self.params['format']))

742

743

# Set http_headers defaults according to std_headers

744

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

745

746

hooks = {

747

'post_hooks': self.add_post_hook,

748

'progress_hooks': self.add_progress_hook,

749

'postprocessor_hooks': self.add_postprocessor_hook,

750

}

751

for opt, fn in hooks.items():

752

for ph in self.params.get(opt, []):

753

fn(ph)

754

755

for pp_def_raw in self.params.get('postprocessors', []):

756

pp_def = dict(pp_def_raw)

757

when = pp_def.pop('when', 'post_process')

758

self.add_post_processor(

759

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

def preload_download_archive(fn):

765

"""Preload the archive, if any is specified"""

archive = set()

if fn is None:

return archive

elif not is_path_like(fn):

770

return fn

771

772

self.write_debug(f'Loading archive file {fn!r}')

773

try:

774

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

775

for line in archive_file:

776

archive.add(line.strip())

777

except OSError as ioe:

778

if ioe.errno != errno.ENOENT:

raise

return archive

self.archive = preload_download_archive(self.params.get('download_archive'))

783

784

def warn_if_short_id(self, argv):

785

# short YouTube ID starting with dash?

786

idxs = [

787

i for i, a in enumerate(argv)

788

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

793

+ ['--'] + [argv[i] for i in idxs]

794

)

795

self.report_warning(

796

'Long argument string detected. '

797

'Use -- to separate parameters and URLs, like this:\n%s' %

798

args_to_str(correct_argv))

799

800

def add_info_extractor(self, ie):

801

"""Add an InfoExtractor object to the end of the list."""

802

ie_key = ie.ie_key()

803

self._ies[ie_key] = ie

804

if not isinstance(ie, type):

805

self._ies_instances[ie_key] = ie

806

ie.set_downloader(self)

807

808

def get_info_extractor(self, ie_key):

809

"""

810

Get an instance of an IE with name ie_key, it will try to get one from

811

the _ies list, if there's no instance it will create a new one and add

812

it to the extractor list.

813

"""

814

ie = self._ies_instances.get(ie_key)

815

if ie is None:

816

ie = get_info_extractor(ie_key)()

817

self.add_info_extractor(ie)

818

return ie

819

820

def add_default_info_extractors(self):

821

"""

822

Add the InfoExtractors returned by gen_extractors to the end of the list

823

"""

824

all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}

825

all_ies['end'] = UnsupportedURLIE()

826

try:

827

ie_names = orderedSet_from_options(

828

self.params.get('allowed_extractors', ['default']), {

829

'all': list(all_ies),

830

'default': [name for name, ie in all_ies.items() if ie._ENABLED],

831

}, use_regex=True)

832

except re.error as e:

833

raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')

834

for name in ie_names:

835

self.add_info_extractor(all_ies[name])

836

self.write_debug(f'Loaded {len(ie_names)} extractors')

837

838

def add_post_processor(self, pp, when='post_process'):

839

"""Add a PostProcessor object to the end of the chain."""

840

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

841

self._pps[when].append(pp)

842

pp.set_downloader(self)

843

844

def add_post_hook(self, ph):

845

"""Add the post hook"""

846

self._post_hooks.append(ph)

847

848

def add_progress_hook(self, ph):

849

"""Add the download progress hook"""

850

self._progress_hooks.append(ph)

851

852

def add_postprocessor_hook(self, ph):

853

"""Add the postprocessing progress hook"""

854

self._postprocessor_hooks.append(ph)

855

for pps in self._pps.values():

856

for pp in pps:

857

pp.add_progress_hook(ph)

858

859

def _bidi_workaround(self, message):

860

if not hasattr(self, '_output_channel'):

861

return message

862

863

assert hasattr(self, '_output_process')

864

assert isinstance(message, str)

865

line_count = message.count('\n') + 1

866

self._output_process.stdin.write((message + '\n').encode())

867

self._output_process.stdin.flush()

868

res = ''.join(self._output_channel.readline().decode()

869

for _ in range(line_count))

870

return res[:-len('\n')]

871

872

def _write_string(self, message, out=None, only_once=False):

873

if only_once:

874

if message in self._printed_messages:

875

return

876

self._printed_messages.add(message)

877

write_string(message, out=out, encoding=self.params.get('encoding'))

878

879

def to_stdout(self, message, skip_eol=False, quiet=None):

880

"""Print message to stdout"""

881

if quiet is not None:

882

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '

883

'Use "YoutubeDL.to_screen" instead')

884

if skip_eol is not False:

885

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '

886

'Use "YoutubeDL.to_screen" instead')

887

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

888

889

def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):

890

"""Print message to screen if not in quiet mode"""

891

if self.params.get('logger'):

892

self.params['logger'].debug(message)

893

return

894

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

895

return

896

self._write_string(

897

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

898

self._out_files.screen, only_once=only_once)

899

900

def to_stderr(self, message, only_once=False):

901

"""Print message to stderr"""

902

assert isinstance(message, str)

903

if self.params.get('logger'):

904

self.params['logger'].error(message)

905

else:

906

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

907

908

def _send_console_code(self, code):

909

if compat_os_name == 'nt' or not self._out_files.console:

910

return

911

self._write_string(code, self._out_files.console)

912

913

def to_console_title(self, message):

914

if not self.params.get('consoletitle', False):

915

return

916

message = remove_terminal_sequences(message)

917

if compat_os_name == 'nt':

918

if ctypes.windll.kernel32.GetConsoleWindow():

919

# c_wchar_p() might not be necessary if `message` is

920

# already of type unicode()

921

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

922

else:

923

self._send_console_code(f'\033]0;{message}\007')

924

925

def save_console_title(self):

926

if not self.params.get('consoletitle') or self.params.get('simulate'):

927

return

928

self._send_console_code('\033[22;0t') # Save the title on stack

929

930

def restore_console_title(self):

931

if not self.params.get('consoletitle') or self.params.get('simulate'):

932

return

933

self._send_console_code('\033[23;0t') # Restore the title from stack

934

935

def __enter__(self):

936

self.save_console_title()

937

return self

938

939

def __exit__(self, *args):

940

self.restore_console_title()

941

942

if self.params.get('cookiefile') is not None:

943

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

944

945

def trouble(self, message=None, tb=None, is_error=True):

946

"""Determine action to take when a download problem appears.

947

948

Depending on if the downloader has been configured to ignore

949

download errors or not, this method may throw an exception or

950

not when errors are found, after printing the message.

951

952

@param tb If given, is additional traceback information

953

@param is_error Whether to raise error according to ignorerrors

954

"""

955

if message is not None:

956

self.to_stderr(message)

957

if self.params.get('verbose'):

958

if tb is None:

959

if sys.exc_info()[0]: # if .trouble has been called from an except block

960

tb = ''

961

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

962

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

963

tb += encode_compat_str(traceback.format_exc())

964

else:

965

tb_data = traceback.format_list(traceback.extract_stack())

966

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

972

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

973

exc_info = sys.exc_info()[1].exc_info

974

else:

975

exc_info = sys.exc_info()

976

raise DownloadError(message, exc_info)

977

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

BAD_FORMAT='light red',

987

WARNING='yellow',

988

SUPPRESS='light black',

989

)

990

991

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

996

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

997

text = text.encode(encoding, 'ignore').decode(encoding)

998

if fallback is not None and text != original_text:

999

text = fallback

1000

return format_text(text, f) if allow_colors is True else text if fallback is None else fallback

1001

1002

def _format_out(self, *args, **kwargs):

1003

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

1004

1005

def _format_screen(self, *args, **kwargs):

1006

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

1007

1008

def _format_err(self, *args, **kwargs):

1009

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

1010

1011

def report_warning(self, message, only_once=False):

1012

'''

1013

Print the message to stderr, it will be prefixed with 'WARNING:'

1014

If stderr is a tty file the 'WARNING:' will be colored

1015

'''

1016

if self.params.get('logger') is not None:

1017

self.params['logger'].warning(message)

1018

else:

1019

if self.params.get('no_warnings'):

1020

return

1021

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

1022

1023

def deprecation_warning(self, message, *, stacklevel=0):

1024

deprecation_warning(

1025

message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)

1026

1027

def deprecated_feature(self, message):

1028

if self.params.get('logger') is not None:

1029

self.params['logger'].warning(f'Deprecated Feature: {message}')

1030

self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)

1031

1032

def report_error(self, message, *args, **kwargs):

1033

'''

1034

Do the same as trouble, but prefixes the message with 'ERROR:', colored

1035

in red if stderr is a tty file.

1036

'''

1037

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

1038

1039

def write_debug(self, message, only_once=False):

1040

'''Log debug message or Print message to stderr'''

1041

if not self.params.get('verbose', False):

1042

return

1043

message = f'[debug] {message}'

1044

if self.params.get('logger'):

1045

self.params['logger'].debug(message)

1046

else:

1047

self.to_stderr(message, only_once)

1048

1049

def report_file_already_downloaded(self, file_name):

1050

"""Report file has already been fully downloaded."""

1051

try:

1052

self.to_screen('[download] %s has already been downloaded' % file_name)

1053

except UnicodeEncodeError:

1054

self.to_screen('[download] The file has already been downloaded')

1055

1056

def report_file_delete(self, file_name):

1057

"""Report that existing file will be deleted."""

1058

try:

1059

self.to_screen('Deleting existing file %s' % file_name)

1060

except UnicodeEncodeError:

1061

self.to_screen('Deleting existing file')

1062

1063

def raise_no_formats(self, info, forced=False, *, msg=None):

1064

has_drm = info.get('_has_drm')

1065

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1066

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1067

if forced or not ignored:

1068

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1069

expected=has_drm or ignored or expected)

1070

else:

1071

self.report_warning(msg)

1072

1073

def parse_outtmpl(self):

1074

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1075

self._parse_outtmpl()

1076

return self.params['outtmpl']

1077

1078

def _parse_outtmpl(self):

1079

sanitize = IDENTITY

1080

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1081

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1082

1083

outtmpl = self.params.setdefault('outtmpl', {})

1084

if not isinstance(outtmpl, dict):

1085

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1086

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1087

1088

def get_output_path(self, dir_type='', filename=None):

1089

paths = self.params.get('paths', {})

1090

assert isinstance(paths, dict), '"paths" parameter must be a dictionary'

1091

path = os.path.join(

1092

expand_path(paths.get('home', '').strip()),

1093

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1094

filename or '')

1095

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1096

1097

@staticmethod

1098

def _outtmpl_expandpath(outtmpl):

1099

# expand_path translates '%%' into '%' and '$$' into '$'

1100

# correspondingly that is not what we want since we need to keep

1101

# '%%' intact for template dict substitution step. Working around

1102

# with boundary-alike separator hack.

1103

sep = ''.join(random.choices(string.ascii_letters, k=32))

1104

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1105

1106

# outtmpl should be expand_path'ed before template dict substitution

1107

# because meta fields may contain env variables we don't want to

1108

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1109

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1110

return expand_path(outtmpl).replace(sep, '')

1111

1112

@staticmethod

1113

def escape_outtmpl(outtmpl):

1114

''' Escape any remaining strings like %s, %abc% etc. '''

1115

return re.sub(

1116

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1117

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1122

''' @return None or Exception object '''

1123

outtmpl = re.sub(

1124

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1125

lambda mobj: f'{mobj.group(0)[:-1]}s',

1126

cls._outtmpl_expandpath(outtmpl))

1127

try:

1128

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1129

return None

1130

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1135

info_dict = dict(info_dict)

1136

info_dict.pop('__postprocessors', None)

1137

info_dict.pop('__pending_error', None)

1138

return info_dict

1139

1140

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1141

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1142

@param sanitize Whether to sanitize the output as a filename.

1143

For backward compatibility, a function can also be passed

1144

"""

1145

1146

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1147

1148

info_dict = self._copy_infodict(info_dict)

1149

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1150

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1151

if info_dict.get('duration', None) is not None

1152

else None)

1153

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1154

info_dict['video_autonumber'] = self._num_videos

1155

if info_dict.get('resolution') is None:

1156

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1157

1158

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1159

# of %(field)s to %(field)0Nd for backward compatibility

1160

field_size_compat_map = {

1161

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1162

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1163

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1173

# where keys (except first) can be string, int, slice or "{field, ...}"

1174

FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}

1175

FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {

1176

'inner': FIELD_INNER_RE,

1177

'field': rf'\w*(?:\.{FIELD_INNER_RE})*'

1178

}

1179

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1180

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1181

INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)

1182

(?P<negate>-)?

1183

(?P<fields>{FIELD_RE})

1184

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1185

(?:>(?P<strf_format>.+?))?

1186

(?P<remaining>

1187

(?P<alternate>(?<!\\),[^|&)]+)?

1188

(?:&(?P<replacement>.*?))?

1189

(?:\|(?P<default>.*?))?

1190

)$''')

1191

1192

def _traverse_infodict(fields):

1193

fields = [f for x in re.split(r'\.({.+?})\.?', fields)

1194

for f in ([x] if x.startswith('{') else x.split('.'))]

1195

for i in (0, -1):

1196

if fields and not fields[i]:

1197

fields.pop(i)

1198

1199

for i, f in enumerate(fields):

1200

if not f.startswith('{'):

1201

continue

1202

assert f.endswith('}'), f'No closing brace for {f} in {fields}'

1203

fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}

1204

1205

return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)

1206

1207

def get_value(mdict):

1208

# Object traversal

1209

value = _traverse_infodict(mdict['fields'])

1210

# Negative

1211

if mdict['negate']:

1212

value = float_or_none(value)

1213

if value is not None:

1214

value *= -1

1215

# Do maths

1216

offset_key = mdict['maths']

1217

if offset_key:

1218

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1223

offset_key).group(0)

1224

offset_key = offset_key[len(item):]

1225

if operator is None:

1226

operator = MATH_FUNCTIONS[item]

1227

continue

1228

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1229

offset = float_or_none(item)

1230

if offset is None:

1231

offset = float_or_none(_traverse_infodict(item))

1232

try:

1233

value = operator(value, multiplier * offset)

1234

except (TypeError, ZeroDivisionError):

1235

return None

1236

operator = None

1237

# Datetime formatting

1238

if mdict['strf_format']:

1239

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1240

1241

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1242

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1247

1248

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1249

return sanitize_filename(str(value), restricted=restricted, is_id=(

1250

bool(re.search(r'(^|[_.])id(\.|$)', key))

1251

if 'filename-sanitization' in self.params['compat_opts']

1252

else NO_DEFAULT))

1253

1254

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1255

sanitize = bool(sanitize)

1256

1257

def _dumpjson_default(obj):

1258

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

class _ReplacementFormatter(string.Formatter):

1263

def get_field(self, field_name, args, kwargs):

1264

if field_name.isdigit():

1265

return args[0], -1

1266

raise ValueError('Unsupported field')

1267

1268

replacement_formatter = _ReplacementFormatter()

1269

1270

def create_key(outer_mobj):

1271

if not outer_mobj.group('has_key'):

1272

return outer_mobj.group(0)

1273

key = outer_mobj.group('key')

1274

mobj = re.match(INTERNAL_FORMAT_RE, key)

1275

value, replacement, default, last_field = None, None, na, ''

1276

while mobj:

1277

mobj = mobj.groupdict()

1278

default = mobj['default'] if mobj['default'] is not None else default

1279

value = get_value(mobj)

1280

last_field, replacement = mobj['fields'], mobj['replacement']

1281

if value is None and mobj['alternate']:

1282

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1287

if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():

1288

fmt = f'0{field_size_compat_map[last_field]:d}d'

1289

1290

if None not in (value, replacement):

1291

try:

1292

value = replacement_formatter.format(replacement, value)

1293

except ValueError:

1294

value, default = None, na

1295

1296

flags = outer_mobj.group('conversion') or ''

1297

str_fmt = f'{fmt[:-1]}s'

1298

if value is None:

1299

value, fmt = default, 's'

1300

elif fmt[-1] == 'l': # list

1301

delim = '\n' if '#' in flags else ', '

1302

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1303

elif fmt[-1] == 'j': # json

1304

value, fmt = json.dumps(

1305

value, default=_dumpjson_default,

1306

indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt

1307

elif fmt[-1] == 'h': # html

1308

value, fmt = escapeHTML(str(value)), str_fmt

1309

elif fmt[-1] == 'q': # quoted

1310

value = map(str, variadic(value) if '#' in flags else [value])

1311

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1312

elif fmt[-1] == 'B': # bytes

1313

value = f'%{str_fmt}'.encode() % str(value).encode()

1314

value, fmt = value.decode('utf-8', 'ignore'), 's'

1315

elif fmt[-1] == 'U': # unicode normalized

1316

value, fmt = unicodedata.normalize(

1317

# "+" = compatibility equivalence, "#" = NFD

1318

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1319

value), str_fmt

1320

elif fmt[-1] == 'D': # decimal suffix

1321

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1322

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1323

factor=1024 if '#' in flags else 1000)

1324

elif fmt[-1] == 'S': # filename sanitization

1325

value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt

1326

elif fmt[-1] == 'c':

1327

if value:

1328

value = str(value)[0]

1329

else:

1330

fmt = str_fmt

1331

elif fmt[-1] not in 'rsa': # numeric

1332

value = float_or_none(value)

1333

if value is None:

1334

value, fmt = default, 's'

1335

1336

if sanitize:

1337

# If value is an object, sanitize might convert it to a string

1338

# So we convert it to repr first

1339

if fmt[-1] == 'r':

1340

value, fmt = repr(value), str_fmt

1341

elif fmt[-1] == 'a':

1342

value, fmt = ascii(value), str_fmt

1343

if fmt[-1] in 'csra':

1344

value = sanitizer(last_field, value)

1345

1346

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1347

TMPL_DICT[key] = value

1348

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1349

1350

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1351

1352

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1353

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1354

return self.escape_outtmpl(outtmpl) % info_dict

1355

1356

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1357

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1358

if outtmpl is None:

1359

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1360

try:

1361

outtmpl = self._outtmpl_expandpath(outtmpl)

1362

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1367

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1368

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1369

filename = replace_extension(filename, ext, final_ext)

1370

elif tmpl_type:

1371

force_ext = OUTTMPL_TYPES[tmpl_type]

1372

if force_ext:

1373

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1374

1375

# https://github.com/blackjack4494/youtube-dlc/issues/85

1376

trim_file_name = self.params.get('trim_file_name', False)

1377

if trim_file_name:

1378

no_ext, *ext = filename.rsplit('.', 2)

1379

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1380

1381

return filename

1382

except ValueError as err:

1383

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1384

return None

1385

1386

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1387

"""Generate the output filename"""

1388

if outtmpl:

1389

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1390

dir_type = None

1391

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1392

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1397

pass

1398

elif filename == '-':

1399

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1400

elif os.path.isabs(filename):

1401

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1402

if filename == '-' or not filename:

1403

return filename

1404

1405

return self.get_output_path(dir_type, filename)

1406

1407

def _match_entry(self, info_dict, incomplete=False, silent=False):

1408

"""Returns None if the file should be downloaded"""

1409

_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')

1410

assert incomplete or _type == 'video', 'Only video result can be considered complete'

1411

1412

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1413

1414

def check_filter():

1415

if _type in ('playlist', 'multi_video'):

1416

return

1417

elif _type in ('url', 'url_transparent') and not try_call(

1418

lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):

1419

return

1420

1421

if 'title' in info_dict:

1422

# This can happen when we're just evaluating the playlist

1423

title = info_dict['title']

1424

matchtitle = self.params.get('matchtitle', False)

1425

if matchtitle:

1426

if not re.search(matchtitle, title, re.IGNORECASE):

1427

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1428

rejecttitle = self.params.get('rejecttitle', False)

1429

if rejecttitle:

1430

if re.search(rejecttitle, title, re.IGNORECASE):

1431

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1432

1433

date = info_dict.get('upload_date')

1434

if date is not None:

1435

dateRange = self.params.get('daterange', DateRange())

1436

if date not in dateRange:

1437

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1438

view_count = info_dict.get('view_count')

1439

if view_count is not None:

1440

min_views = self.params.get('min_views')

1441

if min_views is not None and view_count < min_views:

1442

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1443

max_views = self.params.get('max_views')

1444

if max_views is not None and view_count > max_views:

1445

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1446

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1447

return 'Skipping "%s" because it is age restricted' % video_title

1448

1449

match_filter = self.params.get('match_filter')

1450

if match_filter is None:

return None

cancelled = None

try:

try:

ret = match_filter(info_dict, incomplete=incomplete)

1457

except TypeError:

1458

# For backward compatibility

1459

ret = None if incomplete else match_filter(info_dict)

1460

except DownloadCancelled as err:

1461

if err.msg is not NO_DEFAULT:

1462

raise

1463

ret, cancelled = err.msg, err

1464

1465

if ret is NO_DEFAULT:

1466

while True:

1467

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1468

reply = input(self._format_screen(

1469

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1470

if reply in {'y', ''}:

return None

elif reply == 'n':

if cancelled:

raise type(cancelled)(f'Skipping {video_title}')

1475

return f'Skipping {video_title}'

1476

return ret

1477

1478

if self.in_download_archive(info_dict):

1479

reason = '%s has already been recorded in the archive' % video_title

1480

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1481

else:

1482

try:

1483

reason = check_filter()

1484

except DownloadCancelled as e:

1485

reason, break_opt, break_err = e.msg, 'match_filter', type(e)

1486

else:

1487

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1488

if reason is not None:

1489

if not silent:

1490

self.to_screen('[download] ' + reason)

1491

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1497

'''Set the keys from extra_info in info dict if they are missing'''

1498

for key, value in extra_info.items():

1499

info_dict.setdefault(key, value)

1500

1501

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1502

process=True, force_generic_extractor=False):

1503

"""

1504

Extract and return the information dictionary of the URL

1505

1506

Arguments:

1507

@param url URL to extract

1508

1509

Keyword arguments:

1510

@param download Whether to download videos

1511

@param process Whether to resolve all unresolved references (URLs, playlist items).

1512

Must be True for download to work

1513

@param ie_key Use only the extractor with this key

1514

1515

@param extra_info Dictionary containing the extra values to add to the info (For internal use only)

1516

@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')

1517

"""

1518

1519

if extra_info is None:

1520

extra_info = {}

1521

1522

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}

else:

ies = self._ies

for key, ie in ies.items():

1531

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1536

'and will probably not work.')

1537

1538

temp_id = ie.get_temp_id(url)

1539

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):

1540

self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')

1541

if self.params.get('break_on_existing', False):

1542

raise ExistingVideoReached()

1543

break

1544

return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)

1545

else:

1546

extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

1547

self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',

1548

tb=False if extractors_restricted else None)

1549

1550

def _handle_extraction_exceptions(func):

1551

@functools.wraps(func)

1552

def wrapper(self, *args, **kwargs):

1553

while True:

1554

try:

1555

return func(self, *args, **kwargs)

1556

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1557

raise

1558

except ReExtractInfo as e:

1559

if e.expected:

1560

self.to_screen(f'{e}; Re-extracting data')

1561

else:

1562

self.to_stderr('\r')

1563

self.report_warning(f'{e}; Re-extracting data')

1564

continue

1565

except GeoRestrictedError as e:

1566

msg = e.msg

1567

if e.countries:

1568

msg += '\nThis video is available in %s.' % ', '.join(

1569

map(ISO3166Utils.short2full, e.countries))

1570

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1571

self.report_error(msg)

1572

except ExtractorError as e: # An error we somewhat expected

1573

self.report_error(str(e), e.format_traceback())

1574

except Exception as e:

1575

if self.params.get('ignoreerrors'):

1576

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1583

if (not self.params.get('wait_for_video')

1584

or ie_result.get('_type', 'video') != 'video'

1585

or ie_result.get('formats') or ie_result.get('url')):

1586

return

1587

1588

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1594

if not self.params.get('noprogress'):

1595

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1596

elif last_msg:

1597

return

1598

self.to_screen(full_msg, skip_eol=True)

1599

last_msg = msg

1600

1601

min_wait, max_wait = self.params.get('wait_for_video')

1602

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1603

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1604

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1605

self.report_warning('Release time of video is not known')

1606

elif ie_result and (diff or 0) <= 0:

1607

self.report_warning('Video should already be available according to extracted info')

1608

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1609

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1610

1611

wait_till = time.time() + diff

1612

try:

1613

while True:

1614

diff = wait_till - time.time()

1615

if diff <= 0:

1616

progress('')

1617

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1618

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1619

time.sleep(1)

1620

except KeyboardInterrupt:

1621

progress('')

1622

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1623

except BaseException as e:

1624

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@_handle_extraction_exceptions

1629

def __extract_info(self, url, ie, download, extra_info, process):

1630

try:

1631

ie_result = ie.extract(url)

1632

except UserNotLive as e:

1633

if process:

1634

if self.params.get('wait_for_video'):

1635

self.report_warning(e)

1636

self._wait_for_video()

1637

raise

1638

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1639

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1640

return

1641

if isinstance(ie_result, list):

1642

# Backwards compatibility: old IE result format

1643

ie_result = {

1644

'_type': 'compat_list',

1645

'entries': ie_result,

1646

}

1647

if extra_info.get('original_url'):

1648

ie_result.setdefault('original_url', extra_info['original_url'])

1649

self.add_default_extra_info(ie_result, ie, url)

1650

if process:

1651

self._wait_for_video(ie_result)

1652

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1657

if url is not None:

1658

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1663

if webpage_url:

1664

self.add_extra_info(ie_result, {

1665

'webpage_url_basename': url_basename(webpage_url),

1666

'webpage_url_domain': get_domain(webpage_url),

1667

})

1668

if ie is not None:

1669

self.add_extra_info(ie_result, {

1670

'extractor': ie.IE_NAME,

1671

'extractor_key': ie.ie_key(),

1672

})

1673

1674

def process_ie_result(self, ie_result, download=True, extra_info=None):

1675

"""

1676

Take the result of the ie(may be modified) and resolve all unresolved

1677

references (URLs, playlist items).

1678

1679

It will also download the videos if 'download'.

1680

Returns the resolved ie_result.

1681

"""

1682

if extra_info is None:

1683

extra_info = {}

1684

result_type = ie_result.get('_type', 'video')

1685

1686

if result_type in ('url', 'url_transparent'):

1687

ie_result['url'] = sanitize_url(

1688

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1689

if ie_result.get('original_url') and not extra_info.get('original_url'):

1690

extra_info = {'original_url': ie_result['original_url'], **extra_info}

1691

1692

extract_flat = self.params.get('extract_flat', False)

1693

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1694

or extract_flat is True):

1695

info_copy = ie_result.copy()

1696

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1697

if ie and not ie_result.get('id'):

1698

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1699

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1700

self.add_extra_info(info_copy, extra_info)

1701

info_copy, _ = self.pre_process(info_copy)

1702

self._fill_common_fields(info_copy, False)

1703

self.__forced_printings(info_copy)

1704

self._raise_pending_errors(info_copy)

1705

if self.params.get('force_write_download_archive', False):

1706

self.record_download_archive(info_copy)

1707

return ie_result

1708

1709

if result_type == 'video':

1710

self.add_extra_info(ie_result, extra_info)

1711

ie_result = self.process_video_result(ie_result, download=download)

1712

self._raise_pending_errors(ie_result)

1713

additional_urls = (ie_result or {}).get('additional_urls')

1714

if additional_urls:

1715

# TODO: Improve MetadataParserPP to allow setting a list

1716

if isinstance(additional_urls, str):

1717

additional_urls = [additional_urls]

1718

self.to_screen(

1719

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1720

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1721

ie_result['additional_entries'] = [

1722

self.extract_info(

1723

url, download, extra_info=extra_info,

1724

force_generic_extractor=self.params.get('force_generic_extractor'))

1725

for url in additional_urls

1726

]

1727

return ie_result

1728

elif result_type == 'url':

1729

# We have to add extra_info to the results because it may be

1730

# contained in a playlist

1731

return self.extract_info(

1732

ie_result['url'], download,

1733

ie_key=ie_result.get('ie_key'),

1734

extra_info=extra_info)

1735

elif result_type == 'url_transparent':

1736

# Use the information from the embedding page

1737

info = self.extract_info(

1738

ie_result['url'], ie_key=ie_result.get('ie_key'),

1739

extra_info=extra_info, download=False, process=False)

1740

1741

# extract_info may return None when ignoreerrors is enabled and

1742

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1748

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1749

# For video clips, the id etc of the clip extractor should be used

1750

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1751

1752

new_result = info.copy()

1753

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1754

1755

# Extracted info may not be a video result (i.e.

1756

# info.get('_type', 'video') != video) but rather an url or

1757

# url_transparent. In such cases outer metadata (from ie_result)

1758

# should be propagated to inner one (info). For this to happen

1759

# _type of info should be overridden with url_transparent. This

1760

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1761

if new_result.get('_type') == 'url':

1762

new_result['_type'] = 'url_transparent'

1763

1764

return self.process_ie_result(

1765

new_result, download=download, extra_info=extra_info)

1766

elif result_type in ('playlist', 'multi_video'):

1767

# Protect from infinite recursion due to recursively nested playlists

1768

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1769

webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url

1770

if webpage_url and webpage_url in self._playlist_urls:

1771

self.to_screen(

1772

'[download] Skipping already downloaded playlist: %s'

1773

% ie_result.get('title') or ie_result.get('id'))

1774

return

1775

1776

self._playlist_level += 1

1777

self._playlist_urls.add(webpage_url)

1778

self._fill_common_fields(ie_result, False)

1779

self._sanitize_thumbnails(ie_result)

1780

try:

1781

return self.__process_playlist(ie_result, download)

1782

finally:

1783

self._playlist_level -= 1

1784

if not self._playlist_level:

1785

self._playlist_urls.clear()

1786

elif result_type == 'compat_list':

1787

self.report_warning(

1788

'Extractor %s returned a compat_list result. '

1789

'It needs to be updated.' % ie_result.get('extractor'))

1790

1791

def _fixup(r):

1792

self.add_extra_info(r, {

1793

'extractor': ie_result['extractor'],

1794

'webpage_url': ie_result['webpage_url'],

1795

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1796

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1797

'extractor_key': ie_result['extractor_key'],

1798

})

1799

return r

1800

ie_result['entries'] = [

1801

self.process_ie_result(_fixup(r), download, extra_info)

1802

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1807

1808

def _ensure_dir_exists(self, path):

1809

return make_dir(path, self.report_error)

1810

1811

@staticmethod

1812

def _playlist_infodict(ie_result, strict=False, **kwargs):

1813

info = {

1814

'playlist_count': ie_result.get('playlist_count'),

1815

'playlist': ie_result.get('title') or ie_result.get('id'),

1816

'playlist_id': ie_result.get('id'),

1817

'playlist_title': ie_result.get('title'),

1818

'playlist_uploader': ie_result.get('uploader'),

1819

'playlist_uploader_id': ie_result.get('uploader_id'),

**kwargs,

}

if strict:

return info

if ie_result.get('webpage_url'):

1825

info.update({

1826

'webpage_url': ie_result['webpage_url'],

1827

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1828

'webpage_url_domain': get_domain(ie_result['webpage_url']),

})

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),

1834

'extractor': ie_result['extractor'],

1835

'extractor_key': ie_result['extractor_key'],

1836

}

1837

1838

def __process_playlist(self, ie_result, download):

1839

"""Process each entry in the playlist"""

1840

assert ie_result['_type'] in ('playlist', 'multi_video')

1841

1842

common_info = self._playlist_infodict(ie_result, strict=True)

1843

title = common_info.get('playlist') or '<Untitled>'

1844

if self._match_entry(common_info, incomplete=True) is not None:

1845

return

1846

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1847

1848

all_entries = PlaylistEntries(self, ie_result)

1849

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1850

1851

lazy = self.params.get('lazy_playlist')

1852

if lazy:

1853

resolved_entries, n_entries = [], 'N/A'

1854

ie_result['requested_entries'], ie_result['entries'] = None, None

1855

else:

1856

entries = resolved_entries = list(entries)

1857

n_entries = len(resolved_entries)

1858

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1859

if not ie_result.get('playlist_count'):

1860

# Better to do this after potentially exhausting entries

1861

ie_result['playlist_count'] = all_entries.get_full_count()

1862

1863

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1864

ie_copy = collections.ChainMap(ie_result, extra)

1865

1866

_infojson_written = False

1867

write_playlist_files = self.params.get('allow_playlist_files', True)

1868

if write_playlist_files and self.params.get('list_thumbnails'):

1869

self.list_thumbnails(ie_result)

1870

if write_playlist_files and not self.params.get('simulate'):

1871

_infojson_written = self._write_info_json(

1872

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1873

if _infojson_written is None:

1874

return

1875

if self._write_description('playlist', ie_result,

1876

self.prepare_filename(ie_copy, 'pl_description')) is None:

1877

return

1878

# TODO: This should be passed to ThumbnailsConvertor if necessary

1879

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1880

1881

if lazy:

1882

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

1883

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

1884

elif self.params.get('playlistreverse'):

1885

entries.reverse()

1886

elif self.params.get('playlistrandom'):

1887

random.shuffle(entries)

1888

1889

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'

1890

f'{format_field(ie_result, "playlist_count", " of %s")}')

1891

1892

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

1893

if self.params.get('extract_flat') == 'discard_in_playlist':

1894

keep_resolved_entries = ie_result['_type'] != 'playlist'

1895

if keep_resolved_entries:

1896

self.write_debug('The information of all playlist entries will be held in memory')

1897

1898

failures = 0

1899

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1900

for i, (playlist_index, entry) in enumerate(entries):

1901

if lazy:

1902

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

1907

if not lazy and 'playlist-index' in self.params['compat_opts']:

1908

playlist_index = ie_result['requested_entries'][i]

1909

1910

entry_copy = collections.ChainMap(entry, {

1911

**common_info,

1912

'n_entries': int_or_none(n_entries),

1913

'playlist_index': playlist_index,

1914

'playlist_autonumber': i + 1,

1915

})

1916

1917

if self._match_entry(entry_copy, incomplete=True) is not None:

1918

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

1919

resolved_entries[i] = (playlist_index, NO_DEFAULT)

1920

continue

1921

1922

self.to_screen('[download] Downloading item %s of %s' % (

1923

self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1924

1925

entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({

1926

'playlist_index': playlist_index,

1927

'playlist_autonumber': i + 1,

}, extra))

if not entry_result:

failures += 1

if failures >= max_failures:

1932

self.report_error(

1933

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

1934

break

1935

if keep_resolved_entries:

1936

resolved_entries[i] = (playlist_index, entry_result)

1937

1938

# Update with processed data

1939

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

1940

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

1941

if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):

1942

# Do not set for full playlist

1943

ie_result.pop('requested_entries')

1944

1945

# Write the updated info to json

1946

if _infojson_written is True and self._write_info_json(

1947

'updated playlist', ie_result,

1948

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1949

return

1950

1951

ie_result = self.run_all_pps('playlist', ie_result)

1952

self.to_screen(f'[download] Finished downloading playlist: {title}')

1953

return ie_result

1954

1955

@_handle_extraction_exceptions

1956

def __process_iterable_entry(self, entry, download, extra_info):

1957

return self.process_ie_result(

1958

entry, download=download, extra_info=extra_info)

1959

1960

def _build_format_filter(self, filter_spec):

1961

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1972

(?P<key>[\w.-]+)\s*

1973

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1974

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1975

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1976

m = operator_rex.fullmatch(filter_spec)

1977

if m:

1978

try:

1979

comparison_value = int(m.group('value'))

1980

except ValueError:

1981

comparison_value = parse_filesize(m.group('value'))

1982

if comparison_value is None:

1983

comparison_value = parse_filesize(m.group('value') + 'B')

1984

if comparison_value is None:

1985

raise ValueError(

1986

'Invalid value %r in format specification %r' % (

1987

m.group('value'), filter_spec))

1988

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1994

'$=': lambda attr, value: attr.endswith(value),

1995

'*=': lambda attr, value: value in attr,

1996

'~=': lambda attr, value: value.search(attr) is not None

1997

}

1998

str_operator_rex = re.compile(r'''(?x)\s*

1999

(?P<key>[a-zA-Z0-9._-]+)\s*

2000

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

2001

(?P<quote>["'])?

2002

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

2003

(?(quote)(?P=quote))\s*

2004

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

2005

m = str_operator_rex.fullmatch(filter_spec)

2006

if m:

2007

if m.group('op') == '~=':

2008

comparison_value = re.compile(m.group('value'))

2009

else:

2010

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

2011

str_op = STR_OPERATORS[m.group('op')]

2012

if m.group('negation'):

2013

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

2019

2020

def _filter(f):

2021

actual_value = f.get(m.group('key'))

2022

if actual_value is None:

2023

return m.group('none_inclusive')

2024

return op(actual_value, comparison_value)

2025

return _filter

2026

2027

def _check_formats(self, formats):

2028

for f in formats:

2029

self.to_screen('[info] Testing format %s' % f['format_id'])

2030

path = self.get_output_path('temp')

2031

if not self._ensure_dir_exists(f'{path}/'):

2032

continue

2033

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

2034

temp_file.close()

2035

try:

2036

success, _ = self.dl(temp_file.name, f, test=True)

2037

except (DownloadError, OSError, ValueError) + network_exceptions:

2038

success = False

2039

finally:

2040

if os.path.exists(temp_file.name):

2041

try:

2042

os.remove(temp_file.name)

2043

except OSError:

2044

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

2049

2050

def _default_format_spec(self, info_dict, download=True):

2051

2052

def can_merge():

2053

merger = FFmpegMergerPP(self)

2054

return merger.available and merger.can_merge()

2055

2056

prefer_best = (

2057

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

2062

or self.params['outtmpl']['default'] == '-'))

2063

compat = (

2064

prefer_best

2065

or self.params.get('allow_multiple_audio_streams', False)

2066

or 'format-spec' in self.params['compat_opts'])

2067

2068

return (

2069

'best/bestvideo+bestaudio' if prefer_best

2070

else 'bestvideo*+bestaudio/best' if not compat

2071

else 'bestvideo+bestaudio/best')

2072

2073

def build_format_selector(self, format_spec):

2074

def syntax_error(note, start):

2075

message = (

2076

'Invalid format specification: '

2077

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

2078

return SyntaxError(message)

2079

2080

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

2085

2086

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

2087

'video': self.params.get('allow_multiple_video_streams', False)}

2088

2089

def _parse_filter(tokens):

2090

filter_parts = []

2091

for type, string_, start, _, _ in tokens:

2092

if type == tokenize.OP and string_ == ']':

2093

return ''.join(filter_parts)

2094

else:

2095

filter_parts.append(string_)

2096

2097

def _remove_unused_ops(tokens):

2098

# Remove operators that we don't use and join them with the surrounding strings.

2099

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

2100

ALLOWED_OPS = ('/', '+', ',', '(', ')')

2101

last_string, last_start, last_end, last_line = None, None, None, None

2102

for type, string_, start, end, line in tokens:

2103

if type == tokenize.OP and string_ == '[':

2104

if last_string:

2105

yield tokenize.NAME, last_string, last_start, last_end, last_line

2106

last_string = None

2107

yield type, string_, start, end, line

2108

# everything inside brackets will be handled by _parse_filter

2109

for type, string_, start, end, line in tokens:

2110

yield type, string_, start, end, line

2111

if type == tokenize.OP and string_ == ']':

2112

break

2113

elif type == tokenize.OP and string_ in ALLOWED_OPS:

2114

if last_string:

2115

yield tokenize.NAME, last_string, last_start, last_end, last_line

2116

last_string = None

2117

yield type, string_, start, end, line

2118

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

2119

if not last_string:

2120

last_string = string_

last_start = start

last_end = end

else:

last_string += string_

2125

if last_string:

2126

yield tokenize.NAME, last_string, last_start, last_end, last_line

2127

2128

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2129

selectors = []

2130

current_selector = None

2131

for type, string_, start, _, _ in tokens:

2132

# ENCODING is only defined in python 3.x

2133

if type == getattr(tokenize, 'ENCODING', None):

2134

continue

2135

elif type in [tokenize.NAME, tokenize.NUMBER]:

2136

current_selector = FormatSelector(SINGLE, string_, [])

2137

elif type == tokenize.OP:

2138

if string_ == ')':

2139

if not inside_group:

2140

# ')' will be handled by the parentheses group

2141

tokens.restore_last_token()

2142

break

2143

elif inside_merge and string_ in ['/', ',']:

2144

tokens.restore_last_token()

2145

break

2146

elif inside_choice and string_ == ',':

2147

tokens.restore_last_token()

2148

break

2149

elif string_ == ',':

2150

if not current_selector:

2151

raise syntax_error('"," must follow a format selector', start)

2152

selectors.append(current_selector)

2153

current_selector = None

2154

elif string_ == '/':

2155

if not current_selector:

2156

raise syntax_error('"/" must follow a format selector', start)

2157

first_choice = current_selector

2158

second_choice = _parse_format_selection(tokens, inside_choice=True)

2159

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2160

elif string_ == '[':

2161

if not current_selector:

2162

current_selector = FormatSelector(SINGLE, 'best', [])

2163

format_filter = _parse_filter(tokens)

2164

current_selector.filters.append(format_filter)

2165

elif string_ == '(':

2166

if current_selector:

2167

raise syntax_error('Unexpected "("', start)

2168

group = _parse_format_selection(tokens, inside_group=True)

2169

current_selector = FormatSelector(GROUP, group, [])

2170

elif string_ == '+':

2171

if not current_selector:

2172

raise syntax_error('Unexpected "+"', start)

2173

selector_1 = current_selector

2174

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2175

if not selector_2:

2176

raise syntax_error('Expected a selector', start)

2177

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2178

else:

2179

raise syntax_error(f'Operator not recognized: "{string_}"', start)

2180

elif type == tokenize.ENDMARKER:

2181

break

2182

if current_selector:

2183

selectors.append(current_selector)

2184

return selectors

2185

2186

def _merge(formats_pair):

2187

format_1, format_2 = formats_pair

2188

2189

formats_info = []

2190

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2191

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2192

2193

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2194

get_no_more = {'video': False, 'audio': False}

2195

for (i, fmt_info) in enumerate(formats_info):

2196

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2197

formats_info.pop(i)

2198

continue

2199

for aud_vid in ['audio', 'video']:

2200

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2201

if get_no_more[aud_vid]:

2202

formats_info.pop(i)

2203

break

2204

get_no_more[aud_vid] = True

2205

2206

if len(formats_info) == 1:

2207

return formats_info[0]

2208

2209

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2210

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2211

2212

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2213

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2214

2215

output_ext = get_compatible_ext(

2216

vcodecs=[f.get('vcodec') for f in video_fmts],

2217

acodecs=[f.get('acodec') for f in audio_fmts],

2218

vexts=[f['ext'] for f in video_fmts],

2219

aexts=[f['ext'] for f in audio_fmts],

2220

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2221

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2222

2223

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2224

2225

new_dict = {

2226

'requested_formats': formats_info,

2227

'format': '+'.join(filtered('format')),

2228

'format_id': '+'.join(filtered('format_id')),

2229

'ext': output_ext,

2230

'protocol': '+'.join(map(determine_protocol, formats_info)),

2231

'language': '+'.join(orderedSet(filtered('language'))) or None,

2232

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2233

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2234

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2240

'height': the_only_video.get('height'),

2241

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2242

'fps': the_only_video.get('fps'),

2243

'dynamic_range': the_only_video.get('dynamic_range'),

2244

'vcodec': the_only_video.get('vcodec'),

2245

'vbr': the_only_video.get('vbr'),

2246

'stretched_ratio': the_only_video.get('stretched_ratio'),

2247

'aspect_ratio': the_only_video.get('aspect_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2253

'abr': the_only_audio.get('abr'),

2254

'asr': the_only_audio.get('asr'),

2255

'audio_channels': the_only_audio.get('audio_channels')

})

return new_dict

def _check_formats(formats):

2261

if (self.params.get('check_formats') is not None

2262

or self.params.get('allow_unplayable_formats')):

2263

yield from formats

2264

return

2265

elif self.params.get('check_formats') == 'selected':

2266

yield from self._check_formats(formats)

return

for f in formats:

if f.get('has_drm'):

yield from self._check_formats([f])

else:

yield f

def _build_selector_function(selector):

2276

if isinstance(selector, list): # ,

2277

fs = [_build_selector_function(s) for s in selector]

2278

2279

def selector_function(ctx):

2280

for f in fs:

2281

yield from f(ctx)

2282

return selector_function

2283

2284

elif selector.type == GROUP: # ()

2285

selector_function = _build_selector_function(selector.selector)

2286

2287

elif selector.type == PICKFIRST: # /

2288

fs = [_build_selector_function(s) for s in selector.selector]

2289

2290

def selector_function(ctx):

2291

for f in fs:

2292

picked_formats = list(f(ctx))

2293

if picked_formats:

2294

return picked_formats

2295

return []

2296

2297

elif selector.type == MERGE: # +

2298

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2299

2300

def selector_function(ctx):

2301

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2302

yield _merge(pair)

2303

2304

elif selector.type == SINGLE: # atom

2305

format_spec = selector.selector or 'best'

2306

2307

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2308

if format_spec == 'all':

2309

def selector_function(ctx):

2310

yield from _check_formats(ctx['formats'][::-1])

2311

elif format_spec == 'mergeall':

2312

def selector_function(ctx):

2313

formats = list(_check_formats(

2314

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2315

if not formats:

2316

return

2317

merged_format = formats[-1]

2318

for f in formats[-2::-1]:

2319

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2329

format_reverse = mobj.group('bw')[0] == 'b'

2330

format_type = (mobj.group('type') or [None])[0]

2331

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2332

format_modified = mobj.group('mod') is not None

2333

2334

format_fallback = not format_type and not format_modified # for b, w

2335

_filter_f = (

2336

(lambda f: f.get('%scodec' % format_type) != 'none')

2337

if format_type and format_modified # bv*, ba*, wv*, wa*

2338

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2339

if format_type # bv, ba, wv, wa

2340

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2341

if not format_modified # b, w

2342

else lambda f: True) # b*, w*

2343

filter_f = lambda f: _filter_f(f) and (

2344

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2345

else:

2346

if format_spec in self._format_selection_exts['audio']:

2347

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2348

elif format_spec in self._format_selection_exts['video']:

2349

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2350

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2351

elif format_spec in self._format_selection_exts['storyboards']:

2352

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2353

else:

2354

filter_f = lambda f: f.get('format_id') == format_spec # id

2355

2356

def selector_function(ctx):

2357

formats = list(ctx['formats'])

2358

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2359

if not matches:

2360

if format_fallback and ctx['incomplete_formats']:

2361

# for extractors with incomplete formats (audio only (soundcloud)

2362

# or video only (imgur)) best/worst will fallback to

2363

# best/worst {video,audio}-only format

2364

matches = formats

2365

elif seperate_fallback and not ctx['has_merged_format']:

2366

# for compatibility with youtube-dl when there is no pre-merged format

2367

matches = list(filter(seperate_fallback, formats))

2368

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2369

try:

2370

yield matches[format_idx - 1]

2371

except LazyList.IndexError:

2372

return

2373

2374

filters = [self._build_format_filter(f) for f in selector.filters]

2375

2376

def final_selector(ctx):

2377

ctx_copy = dict(ctx)

2378

for _filter in filters:

2379

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2380

return selector_function(ctx_copy)

2381

return final_selector

2382

2383

stream = io.BytesIO(format_spec.encode())

2384

try:

2385

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2386

except tokenize.TokenError:

2387

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2388

2389

class TokenIterator:

2390

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2399

raise StopIteration()

2400

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2407

self.counter -= 1

2408

2409

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2410

return _build_selector_function(parsed_selector)

2411

2412

def _calc_headers(self, info_dict):

2413

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2414

if 'Youtubedl-No-Compression' in res: # deprecated

2415

res.pop('Youtubedl-No-Compression', None)

2416

res['Accept-Encoding'] = 'identity'

2417

cookies = self.cookiejar.get_cookie_header(info_dict['url'])

2418

if cookies:

2419

res['Cookie'] = cookies

2420

2421

if 'X-Forwarded-For' not in res:

2422

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2423

if x_forwarded_for_ip:

2424

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2429

self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')

2430

return self.cookiejar.get_cookie_header(url)

2431

2432

def _sort_thumbnails(self, thumbnails):

2433

thumbnails.sort(key=lambda t: (

2434

t.get('preference') if t.get('preference') is not None else -1,

2435

t.get('width') if t.get('width') is not None else -1,

2436

t.get('height') if t.get('height') is not None else -1,

2437

t.get('id') if t.get('id') is not None else '',

2438

t.get('url')))

2439

2440

def _sanitize_thumbnails(self, info_dict):

2441

thumbnails = info_dict.get('thumbnails')

2442

if thumbnails is None:

2443

thumbnail = info_dict.get('thumbnail')

2444

if thumbnail:

2445

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2450

for t in thumbnails:

2451

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2452

try:

2453

self.urlopen(HEADRequest(t['url']))

2454

except network_exceptions as err:

2455

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2460

for i, t in enumerate(thumbnails):

2461

if t.get('id') is None:

2462

t['id'] = '%d' % i

2463

if t.get('width') and t.get('height'):

2464

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2465

t['url'] = sanitize_url(t['url'])

2466

2467

if self.params.get('check_formats') is True:

2468

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2469

else:

2470

info_dict['thumbnails'] = thumbnails

2471

2472

def _fill_common_fields(self, info_dict, final=True):

2473

# TODO: move sanitization here

2474

if final:

2475

title = info_dict['fulltitle'] = info_dict.get('title')

2476

if not title:

2477

if title == '':

2478

self.write_debug('Extractor gave empty title. Creating a generic title')

2479

else:

2480

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2481

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2482

2483

if info_dict.get('duration') is not None:

2484

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2485

2486

for ts_key, date_key in (

2487

('timestamp', 'upload_date'),

2488

('release_timestamp', 'release_date'),

2489

('modified_timestamp', 'modified_date'),

2490

):

2491

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2492

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2493

# see http://bugs.python.org/issue1646728)

2494

with contextlib.suppress(ValueError, OverflowError, OSError):

2495

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2496

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2497

2498

live_keys = ('is_live', 'was_live')

2499

live_status = info_dict.get('live_status')

2500

if live_status is None:

2501

for key in live_keys:

2502

if info_dict.get(key) is False:

2503

continue

2504

if info_dict.get(key):

2505

live_status = key

2506

break

2507

if all(info_dict.get(key) is False for key in live_keys):

2508

live_status = 'not_live'

2509

if live_status:

2510

info_dict['live_status'] = live_status

2511

for key in live_keys:

2512

if info_dict.get(key) is None:

2513

info_dict[key] = (live_status == key)

2514

if live_status == 'post_live':

2515

info_dict['was_live'] = True

2516

2517

# Auto generate title fields corresponding to the *_number fields when missing

2518

# in order to always have clean titles. This is very common for TV series.

2519

for field in ('chapter', 'season', 'episode'):

2520

if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2521

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2522

2523

def _raise_pending_errors(self, info):

2524

err = info.pop('__pending_error', None)

2525

if err:

2526

self.report_error(err, tb=False)

2527

2528

def sort_formats(self, info_dict):

2529

formats = self._get_formats(info_dict)

2530

formats.sort(key=FormatSorter(

2531

self, info_dict.get('_format_sort_fields') or []).calculate_preference)

2532

2533

def process_video_result(self, info_dict, download=True):

2534

assert info_dict.get('_type', 'video') == 'video'

2535

self._num_videos += 1

2536

2537

if 'id' not in info_dict:

2538

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2539

elif not info_dict.get('id'):

2540

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2541

2542

def report_force_conversion(field, field_not, conversion):

2543

self.report_warning(

2544

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2545

% (field, field_not, conversion))

2546

2547

def sanitize_string_field(info, string_field):

2548

field = info.get(string_field)

2549

if field is None or isinstance(field, str):

2550

return

2551

report_force_conversion(string_field, 'a string', 'string')

2552

info[string_field] = str(field)

2553

2554

def sanitize_numeric_fields(info):

2555

for numeric_field in self._NUMERIC_FIELDS:

2556

field = info.get(numeric_field)

2557

if field is None or isinstance(field, (int, float)):

2558

continue

2559

report_force_conversion(numeric_field, 'numeric', 'int')

2560

info[numeric_field] = int_or_none(field)

2561

2562

sanitize_string_field(info_dict, 'id')

2563

sanitize_numeric_fields(info_dict)

2564

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2565

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2566

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2567

self.report_warning('"duration" field is negative, there is an error in extractor')

2568

2569

chapters = info_dict.get('chapters') or []

2570

if chapters and chapters[0].get('start_time'):

2571

chapters.insert(0, {'start_time': 0})

2572

2573

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2574

for idx, (prev, current, next_) in enumerate(zip(

2575

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2576

if current.get('start_time') is None:

2577

current['start_time'] = prev.get('end_time')

2578

if not current.get('end_time'):

2579

current['end_time'] = next_.get('start_time')

2580

if not current.get('title'):

2581

current['title'] = f'<Untitled Chapter {idx}>'

2582

2583

if 'playlist' not in info_dict:

2584

# It isn't part of a playlist

2585

info_dict['playlist'] = None

2586

info_dict['playlist_index'] = None

2587

2588

self._sanitize_thumbnails(info_dict)

2589

2590

thumbnail = info_dict.get('thumbnail')

2591

thumbnails = info_dict.get('thumbnails')

2592

if thumbnail:

2593

info_dict['thumbnail'] = sanitize_url(thumbnail)

2594

elif thumbnails:

2595

info_dict['thumbnail'] = thumbnails[-1]['url']

2596

2597

if info_dict.get('display_id') is None and 'id' in info_dict:

2598

info_dict['display_id'] = info_dict['id']

2599

2600

self._fill_common_fields(info_dict)

2601

2602

for cc_kind in ('subtitles', 'automatic_captions'):

2603

cc = info_dict.get(cc_kind)

2604

if cc:

2605

for _, subtitle in cc.items():

2606

for subtitle_format in subtitle:

2607

if subtitle_format.get('url'):

2608

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2609

if subtitle_format.get('ext') is None:

2610

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2611

2612

automatic_captions = info_dict.get('automatic_captions')

2613

subtitles = info_dict.get('subtitles')

2614

2615

info_dict['requested_subtitles'] = self.process_subtitles(

2616

info_dict['id'], subtitles, automatic_captions)

2617

2618

formats = self._get_formats(info_dict)

2619

2620

# Backward compatibility with InfoExtractor._sort_formats

2621

field_preference = (formats or [{}])[0].pop('__sort_fields', None)

2622

if field_preference:

2623

info_dict['_format_sort_fields'] = field_preference

2624

2625

info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it

2626

f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None

2627

if not self.params.get('allow_unplayable_formats'):

2628

formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']

2629

2630

if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2631

self.report_warning(

2632

f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'

2633

'only images are available for download. Use --list-formats to see them'.capitalize())

2634

2635

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2636

if not get_from_start:

2637

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2638

if info_dict.get('is_live') and formats:

2639

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2640

if get_from_start and not formats:

2641

self.raise_no_formats(info_dict, msg=(

2642

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2643

'If you want to download from the current time, use --no-live-from-start'))

2644

2645

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2650

'there is an error in extractor')

2651

return False

2652

if isinstance(url, bytes):

2653

sanitize_string_field(f, 'url')

2654

return True

2655

2656

# Filter out malformed formats for better extraction robustness

2657

formats = list(filter(is_wellformed, formats or []))

2658

2659

if not formats:

2660

self.raise_no_formats(info_dict)

2661

2662

for format in formats:

2663

sanitize_string_field(format, 'format_id')

2664

sanitize_numeric_fields(format)

2665

format['url'] = sanitize_url(format['url'])

2666

if format.get('ext') is None:

2667

format['ext'] = determine_ext(format['url']).lower()

2668

if format.get('protocol') is None:

2669

format['protocol'] = determine_protocol(format)

2670

if format.get('resolution') is None:

2671

format['resolution'] = self.format_resolution(format, default=None)

2672

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2673

format['dynamic_range'] = 'SDR'

2674

if format.get('aspect_ratio') is None:

2675

format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))

2676

if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average

2677

and info_dict.get('duration') and format.get('tbr')

2678

and not format.get('filesize') and not format.get('filesize_approx')):

2679

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2680

format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))

2681

2682

# This is copied to http_headers by the above _calc_headers and can now be removed

2683

if '__x_forwarded_for_ip' in info_dict:

2684

del info_dict['__x_forwarded_for_ip']

self.sort_formats({

'formats': formats,

'_format_sort_fields': info_dict.get('_format_sort_fields')

2689

})

2690

2691

# Sanitize and group by format_id

2692

formats_dict = {}

2693

for i, format in enumerate(formats):

2694

if not format.get('format_id'):

2695

format['format_id'] = str(i)

2696

else:

2697

# Sanitize format_id from characters used in format selector expression

2698

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2699

formats_dict.setdefault(format['format_id'], []).append(format)

2700

2701

# Make sure all formats have unique format_id

2702

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2703

for format_id, ambiguous_formats in formats_dict.items():

2704

ambigious_id = len(ambiguous_formats) > 1

2705

for i, format in enumerate(ambiguous_formats):

2706

if ambigious_id:

2707

format['format_id'] = '%s-%d' % (format_id, i)

2708

# Ensure there is no conflict between id and ext in format selection

2709

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2710

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2711

format['format_id'] = 'f%s' % format['format_id']

2712

2713

if format.get('format') is None:

2714

format['format'] = '{id} - {res}{note}'.format(

2715

id=format['format_id'],

2716

res=self.format_resolution(format),

2717

note=format_field(format, 'format_note', ' (%s)'),

2718

)

2719

2720

if self.params.get('check_formats') is True:

2721

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2722

2723

if not formats or formats[0] is not info_dict:

2724

# only set the 'formats' fields if the original info_dict list them

2725

# otherwise we end up with a circular reference, the first (and unique)

2726

# element in the 'formats' field in info_dict is info_dict itself,

2727

# which can't be exported to json

2728

info_dict['formats'] = formats

2729

2730

info_dict, _ = self.pre_process(info_dict)

2731

2732

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2733

return info_dict

2734

2735

self.post_extract(info_dict)

2736

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2737

2738

# The pre-processors may have modified the formats

2739

formats = self._get_formats(info_dict)

2740

2741

list_only = self.params.get('simulate') == 'list_only'

2742

interactive_format_selection = not list_only and self.format_selector == '-'

2743

if self.params.get('list_thumbnails'):

2744

self.list_thumbnails(info_dict)

2745

if self.params.get('listsubtitles'):

2746

if 'automatic_captions' in info_dict:

2747

self.list_subtitles(

2748

info_dict['id'], automatic_captions, 'automatic captions')

2749

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2750

if self.params.get('listformats') or interactive_format_selection:

2751

self.list_formats(info_dict)

2752

if list_only:

2753

# Without this printing, -F --print-json will not work

2754

self.__forced_printings(info_dict)

2755

return info_dict

2756

2757

format_selector = self.format_selector

2758

while True:

2759

if interactive_format_selection:

2760

req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)

2761

+ '(Press ENTER for default, or Ctrl+C to quit)'

2762

+ self._format_screen(': ', self.Styles.EMPHASIS))

2763

try:

2764

format_selector = self.build_format_selector(req_format) if req_format else None

2765

except SyntaxError as err:

2766

self.report_error(err, tb=False, is_error=False)

2767

continue

2768

2769

if format_selector is None:

2770

req_format = self._default_format_spec(info_dict, download=download)

2771

self.write_debug(f'Default format spec: {req_format}')

2772

format_selector = self.build_format_selector(req_format)

2773

2774

formats_to_download = list(format_selector({

2775

'formats': formats,

2776

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2777

'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video

2778

or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio

2779

}))

2780

if interactive_format_selection and not formats_to_download:

2781

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2786

if not self.params.get('ignore_no_formats_error'):

2787

raise ExtractorError(

2788

'Requested format is not available. Use --list-formats for a list of available formats',

2789

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2790

self.report_warning('Requested format is not available')

2791

# Process what we can, even without any available formats.

2792

formats_to_download = [{}]

2793

2794

requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))

2795

best_format, downloaded_formats = formats_to_download[-1], []

2796

if download:

2797

if best_format and requested_ranges:

2798

def to_screen(*msg):

2799

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2800

2801

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2802

(f['format_id'] for f in formats_to_download))

2803

if requested_ranges != ({}, ):

2804

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2805

(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))

2806

max_downloads_reached = False

2807

2808

for fmt, chapter in itertools.product(formats_to_download, requested_ranges):

2809

new_info = self._copy_infodict(info_dict)

2810

new_info.update(fmt)

2811

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2812

end_time = offset + min(chapter.get('end_time', duration), duration)

2813

# duration may not be accurate. So allow deviations <1sec

2814

if end_time == float('inf') or end_time > offset + duration + 1:

2815

end_time = None

2816

if chapter or offset:

2817

new_info.update({

2818

'section_start': offset + chapter.get('start_time', 0),

2819

'section_end': end_time,

2820

'section_title': chapter.get('title'),

2821

'section_number': chapter.get('index'),

2822

})

2823

downloaded_formats.append(new_info)

2824

try:

2825

self.process_info(new_info)

2826

except MaxDownloadsReached:

2827

max_downloads_reached = True

2828

self._raise_pending_errors(new_info)

2829

# Remove copied info

2830

for key, val in tuple(new_info.items()):

2831

if info_dict.get(key) == val:

2832

new_info.pop(key)

2833

if max_downloads_reached:

2834

break

2835

2836

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2837

assert write_archive.issubset({True, False, 'ignore'})

2838

if True in write_archive and False not in write_archive:

2839

self.record_download_archive(info_dict)

2840

2841

info_dict['requested_downloads'] = downloaded_formats

2842

info_dict = self.run_all_pps('after_video', info_dict)

2843

if max_downloads_reached:

2844

raise MaxDownloadsReached()

2845

2846

# We update the info dict with the selected best quality format (backwards compatibility)

2847

info_dict.update(best_format)

2848

return info_dict

2849

2850

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2851

"""Select the requested subtitles and their format"""

2852

available_subs, normal_sub_langs = {}, []

2853

if normal_subtitles and self.params.get('writesubtitles'):

2854

available_subs.update(normal_subtitles)

2855

normal_sub_langs = tuple(normal_subtitles.keys())

2856

if automatic_captions and self.params.get('writeautomaticsub'):

2857

for lang, cap_info in automatic_captions.items():

2858

if lang not in available_subs:

2859

available_subs[lang] = cap_info

2860

2861

if not available_subs or (

2862

not self.params.get('writesubtitles')

2863

and not self.params.get('writeautomaticsub')):

2864

return None

2865

2866

all_sub_langs = tuple(available_subs.keys())

2867

if self.params.get('allsubtitles', False):

2868

requested_langs = all_sub_langs

2869

elif self.params.get('subtitleslangs', False):

2870

try:

2871

requested_langs = orderedSet_from_options(

2872

self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)

2873

except re.error as e:

2874

raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')

2875

else:

2876

requested_langs = LazyList(itertools.chain(

2877

['en'] if 'en' in normal_sub_langs else [],

2878

filter(lambda f: f.startswith('en'), normal_sub_langs),

2879

['en'] if 'en' in all_sub_langs else [],

2880

filter(lambda f: f.startswith('en'), all_sub_langs),

2881

normal_sub_langs, all_sub_langs,

2882

))[:1]

2883

if requested_langs:

2884

self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')

2885

2886

formats_query = self.params.get('subtitlesformat', 'best')

2887

formats_preference = formats_query.split('/') if formats_query else []

2888

subs = {}

2889

for lang in requested_langs:

2890

formats = available_subs.get(lang)

2891

if formats is None:

2892

self.report_warning(f'{lang} subtitles not available for {video_id}')

2893

continue

2894

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2906

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2911

if info_dict is None:

2912

return

2913

info_copy = info_dict.copy()

2914

info_copy.setdefault('filename', self.prepare_filename(info_dict))

2915

if info_dict.get('requested_formats') is not None:

2916

# For RTMP URLs, also include the playpath

2917

info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2918

elif info_dict.get('url'):

2919

info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2920

info_copy['formats_table'] = self.render_formats_table(info_dict)

2921

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2922

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2923

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2924

2925

def format_tmpl(tmpl):

2926

mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)

if not mobj:

return tmpl

fmt = '%({})s'

if tmpl.startswith('{'):

2932

tmpl, fmt = f'.{tmpl}', '%({})j'

2933

if tmpl.endswith('='):

2934

tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'

2935

return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))

2936

2937

for tmpl in self.params['forceprint'].get(key, []):

2938

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2939

2940

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2941

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2942

tmpl = format_tmpl(tmpl)

2943

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2944

if self._ensure_dir_exists(filename):

2945

with open(filename, 'a', encoding='utf-8', newline='') as f:

2946

f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)

return info_copy

def __forced_printings(self, info_dict, filename=None, incomplete=True):

2951

if (self.params.get('forcejson')

2952

or self.params['forceprint'].get('video')

2953

or self.params['print_to_file'].get('video')):

2954

self.post_extract(info_dict)

2955

if filename:

2956

info_dict['filename'] = filename

2957

info_copy = self._forceprint('video', info_dict)

2958

2959

def print_field(field, actual_field=None, optional=False):

2960

if actual_field is None:

2961

actual_field = field

2962

if self.params.get(f'force{field}') and (

2963

info_copy.get(field) is not None or (not optional and not incomplete)):

2964

self.to_stdout(info_copy[actual_field])

print_field('title')

print_field('id')

print_field('url', 'urls')

2969

print_field('thumbnail', optional=True)

2970

print_field('description', optional=True)

2971

print_field('filename')

2972

if self.params.get('forceduration') and info_copy.get('duration') is not None:

2973

self.to_stdout(formatSeconds(info_copy['duration']))

2974

print_field('format')

2975

2976

if self.params.get('forcejson'):

2977

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2978

2979

def dl(self, name, info, subtitle=False, test=False):

2980

if not info.get('url'):

2981

self.raise_no_formats(info, True)

2982

2983

if test:

2984

verbose = self.params.get('verbose')

2985

params = {

2986

'test': True,

2987

'quiet': self.params.get('quiet') or not verbose,

2988

'verbose': verbose,

2989

'noprogress': not verbose,

2990

'nopart': True,

2991

'skip_unavailable_fragments': False,

2992

'keep_fragments': False,

2993

'overwrites': True,

2994

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2999

if not test:

3000

for ph in self._progress_hooks:

3001

fd.add_progress_hook(ph)

3002

urls = '", "'.join(

3003

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

3004

for f in info.get('requested_formats', []) or [info])

3005

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

3006

3007

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

3008

# But it may contain objects that are not deep-copyable

3009

new_info = self._copy_infodict(info)

3010

if new_info.get('http_headers') is None:

3011

new_info['http_headers'] = self._calc_headers(new_info)

3012

return fd.download(name, new_info, subtitle)

3013

3014

def existing_file(self, filepaths, *, default_overwrite=True):

3015

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

3016

if existing_files and not self.params.get('overwrites', default_overwrite):

3017

return existing_files[0]

3018

3019

for file in existing_files:

3020

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

3025

"""Process a single resolved IE result. (Modifies it in-place)"""

3026

3027

assert info_dict.get('_type', 'video') == 'video'

3028

original_infodict = info_dict

3029

3030

if 'format' not in info_dict and 'ext' in info_dict:

3031

info_dict['format'] = info_dict['ext']

3032

3033

if self._match_entry(info_dict) is not None:

3034

info_dict['__write_download_archive'] = 'ignore'

3035

return

3036

3037

# Does nothing under normal operation - for backward compatibility of process_info

3038

self.post_extract(info_dict)

3039

3040

def replace_info_dict(new_info):

3041

nonlocal info_dict

3042

if new_info == info_dict:

3043

return

3044

info_dict.clear()

3045

info_dict.update(new_info)

3046

3047

new_info, _ = self.pre_process(info_dict, 'video')

3048

replace_info_dict(new_info)

3049

self._num_downloads += 1

3050

3051

# info_dict['_filename'] needs to be set for backward compatibility

3052

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

3053

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

3058

3059

def check_max_downloads():

3060

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

3061

raise MaxDownloadsReached()

3062

3063

if self.params.get('simulate'):

3064

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3065

check_max_downloads()

3066

return

3067

3068

if full_filename is None:

3069

return

3070

if not self._ensure_dir_exists(encodeFilename(full_filename)):

3071

return

3072

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

3073

return

3074

3075

if self._write_description('video', info_dict,

3076

self.prepare_filename(info_dict, 'description')) is None:

3077

return

3078

3079

sub_files = self._write_subtitles(info_dict, temp_filename)

3080

if sub_files is None:

3081

return

3082

files_to_move.update(dict(sub_files))

3083

3084

thumb_files = self._write_thumbnails(

3085

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

3086

if thumb_files is None:

3087

return

3088

files_to_move.update(dict(thumb_files))

3089

3090

infofn = self.prepare_filename(info_dict, 'infojson')

3091

_infojson_written = self._write_info_json('video', info_dict, infofn)

3092

if _infojson_written:

3093

info_dict['infojson_filename'] = infofn

3094

# For backward compatibility, even though it was a private field

3095

info_dict['__infojson_filename'] = infofn

3096

elif _infojson_written is None:

3097

return

3098

3099

# Note: Annotations are deprecated

3100

annofn = None

3101

if self.params.get('writeannotations', False):

3102

annofn = self.prepare_filename(info_dict, 'annotation')

3103

if annofn:

3104

if not self._ensure_dir_exists(encodeFilename(annofn)):

3105

return

3106

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

3107

self.to_screen('[info] Video annotations are already present')

3108

elif not info_dict.get('annotations'):

3109

self.report_warning('There are no annotations to write.')

3110

else:

3111

try:

3112

self.to_screen('[info] Writing video annotations to: ' + annofn)

3113

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

3114

annofile.write(info_dict['annotations'])

3115

except (KeyError, TypeError):

3116

self.report_warning('There are no annotations to write.')

3117

except OSError:

3118

self.report_error('Cannot write annotations file: ' + annofn)

3119

return

3120

3121

# Write internet shortcut files

3122

def _write_link_file(link_type):

3123

url = try_get(info_dict['webpage_url'], iri_to_uri)

3124

if not url:

3125

self.report_warning(

3126

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3127

return True

3128

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3129

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3130

return False

3131

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3132

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3133

return True

3134

try:

3135

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3136

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3137

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3138

template_vars = {'url': url}

3139

if link_type == 'desktop':

3140

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3141

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3142

except OSError:

3143

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3149

'webloc': self.params.get('writewebloclink'),

3150

'desktop': self.params.get('writedesktoplink'),

3151

}

3152

if self.params.get('writelink'):

3153

link_type = ('webloc' if sys.platform == 'darwin'

3154

else 'desktop' if sys.platform.startswith('linux')

3155

else 'url')

3156

write_links[link_type] = True

3157

3158

if any(should_write and not _write_link_file(link_type)

3159

for link_type, should_write in write_links.items()):

3160

return

3161

3162

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3163

replace_info_dict(new_info)

3164

3165

if self.params.get('skip_download'):

3166

info_dict['filepath'] = temp_filename

3167

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3168

info_dict['__files_to_move'] = files_to_move

3169

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3170

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3171

else:

3172

# Download

3173

info_dict.setdefault('__postprocessors', [])

3174

try:

3175

3176

def existing_video_file(*filepaths):

3177

ext = info_dict.get('ext')

3178

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3179

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3180

default_overwrite=False)

3181

if file:

3182

info_dict['ext'] = os.path.splitext(file)[1][1:]

3183

return file

3184

3185

fd, success = None, True

3186

if info_dict.get('protocol') or info_dict.get('url'):

3187

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3188

if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (

3189

info_dict.get('section_start') or info_dict.get('section_end')):

3190

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3191

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3192

self.report_error(f'{msg}. Aborting')

3193

return

3194

3195

if info_dict.get('requested_formats') is not None:

3196

old_ext = info_dict['ext']

3197

if self.params.get('merge_output_format') is None:

3198

if (info_dict['ext'] == 'webm'

3199

and info_dict.get('thumbnails')

3200

# check with type instead of pp_key, __name__, or isinstance

3201

# since we dont want any custom PPs to trigger this

3202

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3203

info_dict['ext'] = 'mkv'

3204

self.report_warning(

3205

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3206

new_ext = info_dict['ext']

3207

3208

def correct_ext(filename, ext=new_ext):

3209

if filename == '-':

3210

return filename

3211

filename_real_ext = os.path.splitext(filename)[1][1:]

3212

filename_wo_ext = (

3213

os.path.splitext(filename)[0]

3214

if filename_real_ext in (old_ext, new_ext)

3215

else filename)

3216

return f'{filename_wo_ext}.{ext}'

3217

3218

# Ensure filename always has a correct extension for successful merge

3219

full_filename = correct_ext(full_filename)

3220

temp_filename = correct_ext(temp_filename)

3221

dl_filename = existing_video_file(full_filename, temp_filename)

3222

3223

info_dict['__real_download'] = False

3224

# NOTE: Copy so that original format dicts are not modified

3225

info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))

3226

3227

merger = FFmpegMergerPP(self)

3228

downloaded = []

3229

if dl_filename is not None:

3230

self.report_file_already_downloaded(dl_filename)

3231

elif fd:

3232

for f in info_dict['requested_formats'] if fd != FFmpegFD else []:

3233

f['filepath'] = fname = prepend_extension(

3234

correct_ext(temp_filename, info_dict['ext']),

3235

'f%s' % f['format_id'], info_dict['ext'])

3236

downloaded.append(fname)

3237

info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])

3238

success, real_download = self.dl(temp_filename, info_dict)

3239

info_dict['__real_download'] = real_download

3240

else:

3241

if self.params.get('allow_unplayable_formats'):

3242

self.report_warning(

3243

'You have requested merging of multiple formats '

3244

'while also allowing unplayable formats to be downloaded. '

3245

'The formats won\'t be merged to prevent data corruption.')

3246

elif not merger.available:

3247

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3248

if not self.params.get('ignoreerrors'):

3249

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3250

return

3251

self.report_warning(f'{msg}. The formats won\'t be merged')

3252

3253

if temp_filename == '-':

3254

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3255

else 'but the formats are incompatible for simultaneous download' if merger.available

3256

else 'but ffmpeg is not installed')

3257

self.report_warning(

3258

f'You have requested downloading multiple formats to stdout {reason}. '

3259

'The formats will be streamed one after the other')

3260

fname = temp_filename

3261

for f in info_dict['requested_formats']:

3262

new_info = dict(info_dict)

3263

del new_info['requested_formats']

3264

new_info.update(f)

3265

if temp_filename != '-':

3266

fname = prepend_extension(

3267

correct_ext(temp_filename, new_info['ext']),

3268

'f%s' % f['format_id'], new_info['ext'])

3269

if not self._ensure_dir_exists(fname):

3270

return

3271

f['filepath'] = fname

3272

downloaded.append(fname)

3273

partial_success, real_download = self.dl(fname, new_info)

3274

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3275

success = success and partial_success

3276

3277

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3278

info_dict['__postprocessors'].append(merger)

3279

info_dict['__files_to_merge'] = downloaded

3280

# Even if there were no downloads, it is being merged only now

3281

info_dict['__real_download'] = True

3282

else:

3283

for file in downloaded:

3284

files_to_move[file] = None

3285

else:

3286

# Just a single file

3287

dl_filename = existing_video_file(full_filename, temp_filename)

3288

if dl_filename is None or dl_filename == temp_filename:

3289

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3290

# So we should try to resume the download

3291

success, real_download = self.dl(temp_filename, info_dict)

3292

info_dict['__real_download'] = real_download

3293

else:

3294

self.report_file_already_downloaded(dl_filename)

3295

3296

dl_filename = dl_filename or temp_filename

3297

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3298

3299

except network_exceptions as err:

3300

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3301

return

3302

except OSError as err:

3303

raise UnavailableVideoError(err)

3304

except (ContentTooShortError, ) as err:

3305

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3306

return

3307

3308

self._raise_pending_errors(info_dict)

3309

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3314

vid = info_dict['id']

3315

3316

if fixup_policy in ('ignore', 'never'):

3317

return

3318

elif fixup_policy == 'warn':

3319

do_fixup = 'warn'

3320

elif fixup_policy != 'force':

3321

assert fixup_policy in ('detect_or_warn', None)

3322

if not info_dict.get('__real_download'):

3323

do_fixup = False

3324

3325

def ffmpeg_fixup(cndn, msg, cls):

3326

if not (do_fixup and cndn):

3327

return

3328

elif do_fixup == 'warn':

3329

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3334

else:

3335

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3336

3337

stretched_ratio = info_dict.get('stretched_ratio')

3338

ffmpeg_fixup(stretched_ratio not in (1, None),

3339

f'Non-uniform pixel ratio {stretched_ratio}',

3340

FFmpegFixupStretchedPP)

3341

3342

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3343

downloader = downloader.FD_NAME if downloader else None

3344

3345

ext = info_dict.get('ext')

3346

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3347

isinstance(pp, FFmpegVideoConvertorPP)

3348

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3349

) for pp in self._pps['post_process'])

3350

3351

if not postprocessed_by_ffmpeg:

3352

ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',

3353

'writing DASH m4a. Only some players support this container',

3354

FFmpegFixupM4aPP)

3355

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3356

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3357

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3358

FFmpegFixupM3u8PP)

3359

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',

3360

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3361

3362

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3363

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3368

except PostProcessingError as err:

3369

self.report_error('Postprocessing: %s' % str(err))

3370

return

3371

try:

3372

for ph in self._post_hooks:

3373

ph(info_dict['filepath'])

3374

except Exception as err:

3375

self.report_error('post hooks: %s' % str(err))

3376

return

3377

info_dict['__write_download_archive'] = True

3378

3379

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3380

if self.params.get('force_write_download_archive'):

3381

info_dict['__write_download_archive'] = True

3382

check_max_downloads()

3383

3384

def __download_wrapper(self, func):

3385

@functools.wraps(func)

3386

def wrapper(*args, **kwargs):

3387

try:

3388

res = func(*args, **kwargs)

3389

except UnavailableVideoError as e:

3390

self.report_error(e)

3391

except DownloadCancelled as e:

3392

self.to_screen(f'[info] {e}')

3393

if not self.params.get('break_per_url'):

3394

raise

3395

self._num_downloads = 0

3396

else:

3397

if self.params.get('dump_single_json', False):

3398

self.post_extract(res)

3399

self.to_stdout(json.dumps(self.sanitize_info(res)))

3400

return wrapper

3401

3402

def download(self, url_list):

3403

"""Download a given list of URLs."""

3404

url_list = variadic(url_list) # Passing a single URL is a common mistake

3405

outtmpl = self.params['outtmpl']['default']

3406

if (len(url_list) > 1

3407

and outtmpl != '-'

3408

and '%' not in outtmpl

3409

and self.params.get('max_downloads') != 1):

3410

raise SameFileError(outtmpl)

3411

3412

for url in url_list:

3413

self.__download_wrapper(self.extract_info)(

3414

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3415

3416

return self._download_retcode

3417

3418

def download_with_info_file(self, info_filename):

3419

with contextlib.closing(fileinput.FileInput(

3420

[info_filename], mode='r',

3421

openhook=fileinput.hook_encoded('utf-8'))) as f:

3422

# FileInput doesn't have a read method, we can't call json.load

3423

infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))

3424

for info in variadic(json.loads('\n'.join(f)))]

3425

for info in infos:

3426

try:

3427

self.__download_wrapper(self.process_ie_result)(info, download=True)

3428

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3429

if not isinstance(e, EntryNotInPlaylist):

3430

self.to_stderr('\r')

3431

webpage_url = info.get('webpage_url')

3432

if webpage_url is None:

3433

raise

3434

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3435

self.download([webpage_url])

3436

return self._download_retcode

3437

3438

@staticmethod

3439

def sanitize_info(info_dict, remove_private_keys=False):

3440

''' Sanitize the infodict for converting to json '''

3441

if info_dict is None:

3442

return info_dict

3443

info_dict.setdefault('epoch', int(time.time()))

3444

info_dict.setdefault('_type', 'video')

3445

info_dict.setdefault('_version', {

3446

'version': __version__,

3447

'current_git_head': current_git_head(),

3448

'release_git_head': RELEASE_GIT_HEAD,

3449

'repository': REPOSITORY,

3450

})

3451

3452

if remove_private_keys:

3453

reject = lambda k, v: v is None or k.startswith('__') or k in {

3454

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3455

'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',

3456

'playlist_autonumber', '_format_sort_fields',

3457

}

3458

else:

3459

reject = lambda k, v: False

3460

3461

def filter_fn(obj):

3462

if isinstance(obj, dict):

3463

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3464

elif isinstance(obj, (list, tuple, set, LazyList)):

3465

return list(map(filter_fn, obj))

3466

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3472

3473

@staticmethod

3474

def filter_requested_info(info_dict, actually_filter=True):

3475

''' Alias of sanitize_info for backward compatibility '''

3476

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3477

3478

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3479

for filename in set(filter(None, files_to_delete)):

3480

if msg:

3481

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3486

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3487

del info['__files_to_move'][filename]

3488

3489

@staticmethod

3490

def post_extract(info_dict):

3491

def actual_post_extract(info_dict):

3492

if info_dict.get('_type') in ('playlist', 'multi_video'):

3493

for video_dict in info_dict.get('entries', {}):

3494

actual_post_extract(video_dict or {})

3495

return

3496

3497

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3498

info_dict.update(post_extractor())

3499

3500

actual_post_extract(info_dict or {})

3501

3502

def run_pp(self, pp, infodict):

3503

files_to_delete = []

3504

if '__files_to_move' not in infodict:

3505

infodict['__files_to_move'] = {}

3506

try:

3507

files_to_delete, infodict = pp.run(infodict)

3508

except PostProcessingError as e:

3509

# Must be True and not 'only_download'

3510

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3516

return infodict

3517

if self.params.get('keepvideo', False):

3518

for f in files_to_delete:

3519

infodict['__files_to_move'].setdefault(f, '')

3520

else:

3521

self._delete_downloaded_files(

3522

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3523

return infodict

3524

3525

def run_all_pps(self, key, info, *, additional_pps=None):

3526

if key != 'video':

3527

self._forceprint(key, info)

3528

for pp in (additional_pps or []) + self._pps[key]:

3529

info = self.run_pp(pp, info)

3530

return info

3531

3532

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3533

info = dict(ie_info)

3534

info['__files_to_move'] = files_to_move or {}

3535

try:

3536

info = self.run_all_pps(key, info)

3537

except PostProcessingError as err:

3538

msg = f'Preprocessing: {err}'

3539

info.setdefault('__pending_error', msg)

3540

self.report_error(msg, is_error=False)

3541

return info, info.pop('__files_to_move', None)

3542

3543

def post_process(self, filename, info, files_to_move=None):

3544

"""Run all the postprocessors on the given file."""

3545

info['filepath'] = filename

3546

info['__files_to_move'] = files_to_move or {}

3547

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3548

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3549

del info['__files_to_move']

3550

return self.run_all_pps('after_move', info)

3551

3552

def _make_archive_id(self, info_dict):

3553

video_id = info_dict.get('id')

3554

if not video_id:

3555

return

3556

# Future-proof against any change in case

3557

# and backwards compatibility with prior versions

3558

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3559

if extractor is None:

3560

url = str_or_none(info_dict.get('url'))

3561

if not url:

3562

return

3563

# Try to find matching extractor for the URL and take its ie_key

3564

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3571

3572

def in_download_archive(self, info_dict):

if not self.archive:

return False

vid_ids = [self._make_archive_id(info_dict)]

3577

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3578

return any(id_ in self.archive for id_ in vid_ids)

3579

3580

def record_download_archive(self, info_dict):

3581

fn = self.params.get('download_archive')

3582

if fn is None:

3583

return

3584

vid_id = self._make_archive_id(info_dict)

3585

assert vid_id

3586

3587

self.write_debug(f'Adding to archive: {vid_id}')

3588

if is_path_like(fn):

3589

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3590

archive_file.write(vid_id + '\n')

3591

self.archive.add(vid_id)

3592

3593

@staticmethod

3594

def format_resolution(format, default='unknown'):

3595

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3596

return 'audio only'

3597

if format.get('resolution') is not None:

3598

return format['resolution']

3599

if format.get('width') and format.get('height'):

3600

return '%dx%d' % (format['width'], format['height'])

3601

elif format.get('height'):

3602

return '%sp' % format['height']

3603

elif format.get('width'):

3604

return '%dx?' % format['width']

3605

return default

3606

3607

def _list_format_headers(self, *headers):

3608

if self.params.get('listformats_table', True) is not False:

3609

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3610

return headers

3611

3612

def _format_note(self, fdict):

3613

res = ''

3614

if fdict.get('ext') in ['f4f', 'f4m']:

3615

res += '(unsupported)'

3616

if fdict.get('language'):

3617

if res:

3618

res += ' '

3619

res += '[%s]' % fdict['language']

3620

if fdict.get('format_note') is not None:

3621

if res:

3622

res += ' '

3623

res += fdict['format_note']

3624

if fdict.get('tbr') is not None:

3625

if res:

3626

res += ', '

3627

res += '%4dk' % fdict['tbr']

3628

if fdict.get('container') is not None:

3629

if res:

3630

res += ', '

3631

res += '%s container' % fdict['container']

3632

if (fdict.get('vcodec') is not None

3633

and fdict.get('vcodec') != 'none'):

3634

if res:

3635

res += ', '

3636

res += fdict['vcodec']

3637

if fdict.get('vbr') is not None:

3638

res += '@'

3639

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3640

res += 'video@'

3641

if fdict.get('vbr') is not None:

3642

res += '%4dk' % fdict['vbr']

3643

if fdict.get('fps') is not None:

3644

if res:

3645

res += ', '

3646

res += '%sfps' % fdict['fps']

3647

if fdict.get('acodec') is not None:

3648

if res:

3649

res += ', '

3650

if fdict['acodec'] == 'none':

3651

res += 'video only'

3652

else:

3653

res += '%-5s' % fdict['acodec']

3654

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3659

res += '@%3dk' % fdict['abr']

3660

if fdict.get('asr') is not None:

3661

res += ' (%5dHz)' % fdict['asr']

3662

if fdict.get('filesize') is not None:

3663

if res:

3664

res += ', '

3665

res += format_bytes(fdict['filesize'])

3666

elif fdict.get('filesize_approx') is not None:

3667

if res:

3668

res += ', '

3669

res += '~' + format_bytes(fdict['filesize_approx'])

3670

return res

3671

3672

def _get_formats(self, info_dict):

3673

if info_dict.get('formats') is None:

3674

if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':

3675

return [info_dict]

3676

return []

3677

return info_dict['formats']

3678

3679

def render_formats_table(self, info_dict):

3680

formats = self._get_formats(info_dict)

3681

if not formats:

3682

return

3683

if not self.params.get('listformats_table', True) is not False:

3684

table = [

3685

[

3686

format_field(f, 'format_id'),

3687

format_field(f, 'ext'),

3688

self.format_resolution(f),

3689

self._format_note(f)

3690

] for f in formats if (f.get('preference') or 0) >= -1000]

3691

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3692

3693

def simplified_codec(f, field):

3694

assert field in ('acodec', 'vcodec')

codec = f.get(field)

if not codec:

return 'unknown'

elif codec != 'none':

3699

return '.'.join(codec.split('.')[:4])

3700

3701

if field == 'vcodec' and f.get('acodec') == 'none':

3702

return 'images'

3703

elif field == 'acodec' and f.get('vcodec') == 'none':

3704

return ''

3705

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3706

self.Styles.SUPPRESS)

3707

3708

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3709

table = [

3710

[

3711

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3712

format_field(f, 'ext'),

3713

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3714

format_field(f, 'fps', '\t%d', func=round),

3715

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3716

format_field(f, 'audio_channels', '\t%s'),

3717

delim, (

3718

format_field(f, 'filesize', ' \t%s', func=format_bytes)

3719

or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)

3720

or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),

3721

None, self._format_out('~\t%s', self.Styles.SUPPRESS))),

3722

format_field(f, 'tbr', '\t%dk', func=round),

3723

shorten_protocol_name(f.get('protocol', '')),

3724

delim,

3725

simplified_codec(f, 'vcodec'),

3726

format_field(f, 'vbr', '\t%dk', func=round),

3727

simplified_codec(f, 'acodec'),

3728

format_field(f, 'abr', '\t%dk', func=round),

3729

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3730

join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(

3731

self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,

3732

(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'

3733

else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),

3734

format_field(f, 'format_note'),

3735

format_field(f, 'container', ignore=(None, f.get('ext'))),

3736

delim=', '), delim=' '),

3737

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3738

header_line = self._list_format_headers(

3739

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3740

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3741

3742

return render_table(

3743

header_line, table, hide_empty=True,

3744

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3745

3746

def render_thumbnails_table(self, info_dict):

3747

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3752

[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])

3753

3754

def render_subtitles_table(self, video_id, subtitles):

3755

def _row(lang, formats):

3756

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3757

if len(set(names)) == 1:

3758

names = [] if names[0] == 'unknown' else names[:1]

3759

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3765

[_row(lang, formats) for lang, formats in subtitles.items()],

3766

hide_empty=True)

3767

3768

def __list_table(self, video_id, name, func, *args):

3769

table = func(*args)

3770

if not table:

3771

self.to_screen(f'{video_id} has no {name}')

3772

return

3773

self.to_screen(f'[info] Available {name} for {video_id}:')

3774

self.to_stdout(table)

3775

3776

def list_formats(self, info_dict):

3777

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3778

3779

def list_thumbnails(self, info_dict):

3780

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3781

3782

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3783

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3784

3785

def urlopen(self, req):

3786

""" Start an HTTP download """

3787

if isinstance(req, str):

3788

req = sanitized_Request(req)

3789

return self._opener.open(req, timeout=self._socket_timeout)

3790

3791

def print_debug_header(self):

3792

if not self.params.get('verbose'):

3793

return

3794

3795

from . import _IN_CLI # Must be delayed import

3796

3797

# These imports can be slow. So import them only as needed

3798

from .extractor.extractors import _LAZY_LOADER

3799

from .extractor.extractors import (

3800

_PLUGIN_CLASSES as plugin_ies,

3801

_PLUGIN_OVERRIDES as plugin_ie_overrides

3802

)

3803

3804

def get_encoding(stream):

3805

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3806

additional_info = []

3807

if os.environ.get('TERM', '').lower() == 'dumb':

3808

additional_info.append('dumb')

3809

if not supports_terminal_sequences(stream):

3810

from .utils import WINDOWS_VT_MODE # Must be imported locally

3811

additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')

3812

if additional_info:

3813

ret = f'{ret} ({",".join(additional_info)})'

3814

return ret

3815

3816

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3817

locale.getpreferredencoding(),

3818

sys.getfilesystemencoding(),

3819

self.get_encoding(),

3820

', '.join(

3821

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3822

if stream is not None and key != 'console')

3823

)

3824

3825

logger = self.params.get('logger')

3826

if logger:

3827

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3828

write_debug(encoding_str)

3829

else:

3830

write_string(f'[debug] {encoding_str}\n', encoding=None)

3831

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3832

3833

source = detect_variant()

3834

if VARIANT not in (None, 'pip'):

3835

source += '*'

3836

klass = type(self)

3837

write_debug(join_nonempty(

3838

f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',

3839

f'{CHANNEL}@{__version__}',

3840

f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',

3841

'' if source == 'unknown' else f'({source})',

3842

'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',

delim=' '))

if not _IN_CLI:

write_debug(f'params: {self.params}')

3847

3848

if not _LAZY_LOADER:

3849

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3850

write_debug('Lazy loading extractors is forcibly disabled')

3851

else:

3852

write_debug('Lazy loading extractors is disabled')

3853

if self.params['compat_opts']:

3854

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3855

3856

if current_git_head():

3857

write_debug(f'Git HEAD: {current_git_head()}')

3858

write_debug(system_identifier())

3859

3860

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3861

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3862

if ffmpeg_features:

3863

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3864

3865

exe_versions['rtmpdump'] = rtmpdump_version()

3866

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3867

exe_str = ', '.join(

3868

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3869

) or 'none'

3870

write_debug('exe versions: %s' % exe_str)

3871

3872

from .compat.compat_utils import get_package_info

3873

from .dependencies import available_dependencies

3874

3875

write_debug('Optional libraries: %s' % (', '.join(sorted({

3876

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3882

if hasattr(handler, 'proxies'):

3883

proxy_map.update(handler.proxies)

3884

write_debug(f'Proxy map: {proxy_map}')

3885

3886

for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():

3887

display_list = ['%s%s' % (

3888

klass.__name__, '' if klass.__name__ == name else f' as {name}')

3889

for name, klass in plugins.items()]

3890

if plugin_type == 'Extractor':

3891

display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'

3892

for parent, plugins in plugin_ie_overrides.items())

3893

if not display_list:

3894

continue

3895

write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')

3896

3897

plugin_dirs = plugin_directories()

3898

if plugin_dirs:

3899

write_debug(f'Plugin directories: {plugin_dirs}')

3900

3901

# Not implemented

3902

if False and self.params.get('call_home'):

3903

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

3904

write_debug('Public IP address: %s' % ipaddr)

3905

latest_version = self.urlopen(

3906

'https://yt-dl.org/latest/version').read().decode()

3907

if version_tuple(latest_version) > version_tuple(__version__):

3908

self.report_warning(

3909

'You are using an outdated version (newest version: %s)! '

3910

'See https://yt-dl.org/update if you need help updating.' %

3911

latest_version)

3912

3913

def _setup_opener(self):

3914

if hasattr(self, '_opener'):

3915

return

3916

timeout_val = self.params.get('socket_timeout')

3917

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3918

3919

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3920

opts_cookiefile = self.params.get('cookiefile')

3921

opts_proxy = self.params.get('proxy')

3922

3923

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3924

3925

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3926

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3931

else:

3932

proxies = urllib.request.getproxies()

3933

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3934

if 'http' in proxies and 'https' not in proxies:

3935

proxies['https'] = proxies['http']

3936

proxy_handler = PerRequestProxyHandler(proxies)

3937

3938

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3939

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3940

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3941

redirect_handler = YoutubeDLRedirectHandler()

3942

data_handler = urllib.request.DataHandler()

3943

3944

# When passing our own FileHandler instance, build_opener won't add the

3945

# default FileHandler and allows us to disable the file protocol, which

3946

# can be used for malicious purposes (see

3947

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3948

file_handler = urllib.request.FileHandler()

3949

3950

if not self.params.get('enable_file_urls'):

3951

def file_open(*args, **kwargs):

3952

raise urllib.error.URLError(

3953

'file:// URLs are explicitly disabled in yt-dlp for security reasons. '

3954

'Use --enable-file-urls to enable at your own risk.')

3955

file_handler.file_open = file_open

3956

3957

opener = urllib.request.build_opener(

3958

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3959

3960

# Delete the default user-agent header, which would otherwise apply in

3961

# cases where our custom HTTP handler doesn't come into play

3962

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3963

opener.addheaders = []

3964

self._opener = opener

3965

3966

def encode(self, s):

3967

if isinstance(s, bytes):

3968

return s # Already encoded

3969

3970

try:

3971

return s.encode(self.get_encoding())

3972

except UnicodeEncodeError as err:

3973

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3974

raise

3975

3976

def get_encoding(self):

3977

encoding = self.params.get('encoding')

3978

if encoding is None:

3979

encoding = preferredencoding()

3980

return encoding

3981

3982

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3983

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3984

if overwrite is None:

3985

overwrite = self.params.get('overwrites', True)

3986

if not self.params.get('writeinfojson'):

3987

return False

3988

elif not infofn:

3989

self.write_debug(f'Skipping writing {label} infojson')

3990

return False

3991

elif not self._ensure_dir_exists(infofn):

3992

return None

3993

elif not overwrite and os.path.exists(infofn):

3994

self.to_screen(f'[info] {label.title()} metadata is already present')

3995

return 'exists'

3996

3997

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3998

try:

3999

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

4000

return True

4001

except OSError:

4002

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

4003

return None

4004

4005

def _write_description(self, label, ie_result, descfn):

4006

''' Write description and returns True = written, False = skip, None = error '''

4007

if not self.params.get('writedescription'):

4008

return False

4009

elif not descfn:

4010

self.write_debug(f'Skipping writing {label} description')

4011

return False

4012

elif not self._ensure_dir_exists(descfn):

4013

return None

4014

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

4015

self.to_screen(f'[info] {label.title()} description is already present')

4016

elif ie_result.get('description') is None:

4017

self.to_screen(f'[info] There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

4022

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

4023

descfile.write(ie_result['description'])

4024

except OSError:

4025

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

4030

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

4031

ret = []

4032

subtitles = info_dict.get('requested_subtitles')

4033

if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

4034

# subtitles download errors are already managed as troubles in relevant IE

4035

# that way it will silently go on when used with unsupporting IE

4036

return ret

4037

elif not subtitles:

4038

self.to_screen('[info] There are no subtitles for the requested languages')

4039

return ret

4040

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

4041

if not sub_filename_base:

4042

self.to_screen('[info] Skipping writing video subtitles')

4043

return ret

4044

4045

for sub_lang, sub_info in subtitles.items():

4046

sub_format = sub_info['ext']

4047

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

4048

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

4049

existing_sub = self.existing_file((sub_filename_final, sub_filename))

4050

if existing_sub:

4051

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

4052

sub_info['filepath'] = existing_sub

4053

ret.append((existing_sub, sub_filename_final))

4054

continue

4055

4056

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

4057

if sub_info.get('data') is not None:

4058

try:

4059

# Use newline='' to prevent conversion of newline characters

4060

# See https://github.com/ytdl-org/youtube-dl/issues/10268

4061

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

4062

subfile.write(sub_info['data'])

4063

sub_info['filepath'] = sub_filename

4064

ret.append((sub_filename, sub_filename_final))

4065

continue

4066

except OSError:

4067

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

4072

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

4073

self.dl(sub_filename, sub_copy, subtitle=True)

4074

sub_info['filepath'] = sub_filename

4075

ret.append((sub_filename, sub_filename_final))

4076

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

4077

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

4078

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

4079

if not self.params.get('ignoreerrors'):

4080

self.report_error(msg)

4081

raise DownloadError(msg)

4082

self.report_warning(msg)

4083

return ret

4084

4085

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

4086

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

4087

write_all = self.params.get('write_all_thumbnails', False)

4088

thumbnails, ret = [], []

4089

if write_all or self.params.get('writethumbnail', False):

4090

thumbnails = info_dict.get('thumbnails') or []

4091

if not thumbnails:

4092

self.to_screen(f'[info] There are no {label} thumbnails to download')

4093

return ret

4094

multiple = write_all and len(thumbnails) > 1

4095

4096

if thumb_filename_base is None:

4097

thumb_filename_base = filename

4098

if thumbnails and not thumb_filename_base:

4099

self.write_debug(f'Skipping writing {label} thumbnail')

4100

return ret

4101

4102

for idx, t in list(enumerate(thumbnails))[::-1]:

4103

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

4104

thumb_display_id = f'{label} thumbnail {t["id"]}'

4105

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

4106

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

4107

4108

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

4109

if existing_thumb:

4110

self.to_screen('[info] %s is already present' % (

4111

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

4112

t['filepath'] = existing_thumb

4113

ret.append((existing_thumb, thumb_filename_final))

4114

else:

4115

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

4116

try:

4117

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

4118

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

4119

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

4120

shutil.copyfileobj(uf, thumbf)

4121

ret.append((thumb_filename, thumb_filename_final))

4122

t['filepath'] = thumb_filename

4123

except network_exceptions as err:

4124

if isinstance(err, urllib.error.HTTPError) and err.code == 404:

4125

self.to_screen(f'[info] {thumb_display_id.title()} does not exist')

4126

else:

4127

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

4128

thumbnails.pop(idx)

4129

if ret and not write_all:

4130

break

4131

return ret