jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import copy
	4	import datetime
	5	import errno
	6	import fileinput
	7	import http.cookiejar
	8	import io
	9	import itertools
	10	import json
	11	import locale
	12	import operator
	13	import os
	14	import random
	15	import re
	16	import shutil
	17	import string
	18	import subprocess
	19	import sys
	20	import tempfile
	21	import time
	22	import tokenize
	23	import traceback
	24	import unicodedata
	25
	26	from .cache import Cache
	27	from .compat import functools, urllib # isort: split
	28	from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
	29	from .cookies import LenientSimpleCookie, load_cookies
	30	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	31	from .downloader.rtmp import rtmpdump_version
	32	from .extractor import gen_extractor_classes, get_info_extractor
	33	from .extractor.common import UnsupportedURLIE
	34	from .extractor.openload import PhantomJSwrapper
	35	from .minicurses import format_text
	36	from .networking import HEADRequest, Request, RequestDirector
	37	from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
	38	from .networking.exceptions import (
	39	HTTPError,
	40	NoSupportingHandlers,
	41	RequestError,
	42	SSLError,
	43	network_exceptions,
	44	)
	45	from .plugins import directories as plugin_directories
	46	from .postprocessor import _PLUGIN_CLASSES as plugin_pps
	47	from .postprocessor import (
	48	EmbedThumbnailPP,
	49	FFmpegFixupDuplicateMoovPP,
	50	FFmpegFixupDurationPP,
	51	FFmpegFixupM3u8PP,
	52	FFmpegFixupM4aPP,
	53	FFmpegFixupStretchedPP,
	54	FFmpegFixupTimestampPP,
	55	FFmpegMergerPP,
	56	FFmpegPostProcessor,
	57	FFmpegVideoConvertorPP,
	58	MoveFilesAfterDownloadPP,
	59	get_postprocessor,
	60	)
	61	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	62	from .update import (
	63	REPOSITORY,
	64	_get_system_deprecation,
	65	_make_label,
	66	current_git_head,
	67	detect_variant,
	68	)
	69	from .utils import (
	70	DEFAULT_OUTTMPL,
	71	IDENTITY,
	72	LINK_TEMPLATES,
	73	MEDIA_EXTENSIONS,
	74	NO_DEFAULT,
	75	NUMBER_RE,
	76	OUTTMPL_TYPES,
	77	POSTPROCESS_WHEN,
	78	STR_FORMAT_RE_TMPL,
	79	STR_FORMAT_TYPES,
	80	ContentTooShortError,
	81	DateRange,
	82	DownloadCancelled,
	83	DownloadError,
	84	EntryNotInPlaylist,
	85	ExistingVideoReached,
	86	ExtractorError,
	87	FormatSorter,
	88	GeoRestrictedError,
	89	ISO3166Utils,
	90	LazyList,
	91	MaxDownloadsReached,
	92	Namespace,
	93	PagedList,
	94	PlaylistEntries,
	95	Popen,
	96	PostProcessingError,
	97	ReExtractInfo,
	98	RejectedVideoReached,
	99	SameFileError,
	100	UnavailableVideoError,
	101	UserNotLive,
	102	age_restricted,
	103	args_to_str,
	104	bug_reports_message,
	105	date_from_str,
	106	deprecation_warning,
	107	determine_ext,
	108	determine_protocol,
	109	encode_compat_str,
	110	encodeFilename,
	111	error_to_compat_str,
	112	escapeHTML,
	113	expand_path,
	114	extract_basic_auth,
	115	filter_dict,
	116	float_or_none,
	117	format_bytes,
	118	format_decimal_suffix,
	119	format_field,
	120	formatSeconds,
	121	get_compatible_ext,
	122	get_domain,
	123	int_or_none,
	124	iri_to_uri,
	125	is_path_like,
	126	join_nonempty,
	127	locked_file,
	128	make_archive_id,
	129	make_dir,
	130	number_of_digits,
	131	orderedSet,
	132	orderedSet_from_options,
	133	parse_filesize,
	134	preferredencoding,
	135	prepend_extension,
	136	remove_terminal_sequences,
	137	render_table,
	138	replace_extension,
	139	sanitize_filename,
	140	sanitize_path,
	141	sanitize_url,
	142	str_or_none,
	143	strftime_or_none,
	144	subtitles_filename,
	145	supports_terminal_sequences,
	146	system_identifier,
	147	timetuple_from_msec,
	148	to_high_limit_path,
	149	traverse_obj,
	150	try_call,
	151	try_get,
	152	url_basename,
	153	variadic,
	154	version_tuple,
	155	windows_enable_vt_mode,
	156	write_json_file,
	157	write_string,
	158	)
	159	from .utils._utils import _YDLLogger
	160	from .utils.networking import (
	161	HTTPHeaderDict,
	162	clean_headers,
	163	clean_proxies,
	164	std_headers,
	165	)
	166	from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
	167
	168	if compat_os_name == 'nt':
	169	import ctypes
	170
	171
	172	class YoutubeDL:
	173	"""YoutubeDL class.
	174
	175	YoutubeDL objects are the ones responsible of downloading the
	176	actual video file and writing it to disk if the user has requested
	177	it, among some other tasks. In most cases there should be one per
	178	program. As, given a video URL, the downloader doesn't know how to
	179	extract all the needed information, task that InfoExtractors do, it
	180	has to pass the URL to one of them.
	181
	182	For this, YoutubeDL objects have a method that allows
	183	InfoExtractors to be registered in a given order. When it is passed
	184	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	185	finds that reports being able to handle it. The InfoExtractor extracts
	186	all the information about the video or videos the URL refers to, and
	187	YoutubeDL process the extracted information, possibly using a File
	188	Downloader to download the video.
	189
	190	YoutubeDL objects accept a lot of parameters. In order not to saturate
	191	the object constructor with arguments, it receives a dictionary of
	192	options instead. These options are available through the params
	193	attribute for the InfoExtractors to use. The YoutubeDL also
	194	registers itself as the downloader in charge for the InfoExtractors
	195	that are added to it, so this is a "mutual registration".
	196
	197	Available options:
	198
	199	username: Username for authentication purposes.
	200	password: Password for authentication purposes.
	201	videopassword: Password for accessing a video.
	202	ap_mso: Adobe Pass multiple-system operator identifier.
	203	ap_username: Multiple-system operator account username.
	204	ap_password: Multiple-system operator account password.
	205	usenetrc: Use netrc for authentication instead.
	206	netrc_location: Location of the netrc file. Defaults to ~/.netrc.
	207	netrc_cmd: Use a shell command to get credentials
	208	verbose: Print additional info to stdout.
	209	quiet: Do not print messages to stdout.
	210	no_warnings: Do not print out anything for warnings.
	211	forceprint: A dict with keys WHEN mapped to a list of templates to
	212	print to stdout. The allowed keys are video or any of the
	213	items in utils.POSTPROCESS_WHEN.
	214	For compatibility, a single list is also accepted
	215	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	216	a list of tuples with (template, filename)
	217	forcejson: Force printing info_dict as JSON.
	218	dump_single_json: Force printing the info_dict of the whole playlist
	219	(or video) as a single JSON line.
	220	force_write_download_archive: Force writing download archive regardless
	221	of 'skip_download' or 'simulate'.
	222	simulate: Do not download the video files. If unset (or None),
	223	simulate only if listsubtitles, listformats or list_thumbnails is used
	224	format: Video format code. see "FORMAT SELECTION" for more details.
	225	You can also pass a function. The function takes 'ctx' as
	226	argument and returns the formats to download.
	227	See "build_format_selector" for an implementation
	228	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	229	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	230	extracting metadata even if the video is not actually
	231	available for download (experimental)
	232	format_sort: A list of fields by which to sort the video formats.
	233	See "Sorting Formats" for more details.
	234	format_sort_force: Force the given format_sort. see "Sorting Formats"
	235	for more details.
	236	prefer_free_formats: Whether to prefer video formats with free containers
	237	over non-free ones of same quality.
	238	allow_multiple_video_streams: Allow multiple video streams to be merged
	239	into a single file
	240	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	241	into a single file
	242	check_formats Whether to test if the formats are downloadable.
	243	Can be True (check all), False (check none),
	244	'selected' (check selected formats),
	245	or None (check only if requested by extractor)
	246	paths: Dictionary of output paths. The allowed keys are 'home'
	247	'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
	248	outtmpl: Dictionary of templates for output names. Allowed keys
	249	are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
	250	For compatibility with youtube-dl, a single string can also be used
	251	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	252	restrictfilenames: Do not allow "&" and spaces in file names
	253	trim_file_name: Limit length of filename (extension excluded)
	254	windowsfilenames: Force the filenames to be windows compatible
	255	ignoreerrors: Do not stop on download/postprocessing errors.
	256	Can be 'only_download' to ignore only download errors.
	257	Default is 'only_download' for CLI, but False for API
	258	skip_playlist_after_errors: Number of allowed failures until the rest of
	259	the playlist is skipped
	260	allowed_extractors: List of regexes to match against extractor names that are allowed
	261	overwrites: Overwrite all video and metadata files if True,
	262	overwrite only non-video files if None
	263	and don't overwrite any file if False
	264	playlist_items: Specific indices of playlist to download.
	265	playlistrandom: Download playlist items in random order.
	266	lazy_playlist: Process playlist entries as they are received.
	267	matchtitle: Download only matching titles.
	268	rejecttitle: Reject downloads for matching titles.
	269	logger: Log messages to a logging.Logger instance.
	270	logtostderr: Print everything to stderr instead of stdout.
	271	consoletitle: Display progress in console window's titlebar.
	272	writedescription: Write the video description to a .description file
	273	writeinfojson: Write the video description to a .info.json file
	274	clean_infojson: Remove internal metadata from the infojson
	275	getcomments: Extract video comments. This will not be written to disk
	276	unless writeinfojson is also given
	277	writeannotations: Write the video annotations to a .annotations.xml file
	278	writethumbnail: Write the thumbnail image to a file
	279	allow_playlist_files: Whether to write playlists' description, infojson etc
	280	also to disk when using the 'write*' options
	281	write_all_thumbnails: Write all thumbnail formats to files
	282	writelink: Write an internet shortcut file, depending on the
	283	current platform (.url/.webloc/.desktop)
	284	writeurllink: Write a Windows internet shortcut file (.url)
	285	writewebloclink: Write a macOS internet shortcut file (.webloc)
	286	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	287	writesubtitles: Write the video subtitles to a file
	288	writeautomaticsub: Write the automatically generated subtitles to a file
	289	listsubtitles: Lists all available subtitles for the video
	290	subtitlesformat: The format code for subtitles
	291	subtitleslangs: List of languages of the subtitles to download (can be regex).
	292	The list may contain "all" to refer to all the available
	293	subtitles. The language can be prefixed with a "-" to
	294	exclude it from the requested languages, e.g. ['all', '-live_chat']
	295	keepvideo: Keep the video file after post-processing
	296	daterange: A utils.DateRange object, download only if the upload_date is in the range.
	297	skip_download: Skip the actual download of the video file
	298	cachedir: Location of the cache files in the filesystem.
	299	False to disable filesystem cache.
	300	noplaylist: Download single video instead of a playlist if in doubt.
	301	age_limit: An integer representing the user's age in years.
	302	Unsuitable videos for the given age are skipped.
	303	min_views: An integer representing the minimum view count the video
	304	must have in order to not be skipped.
	305	Videos without view count information are always
	306	downloaded. None for no limit.
	307	max_views: An integer representing the maximum view count.
	308	Videos that are more popular than that are not
	309	downloaded.
	310	Videos without view count information are always
	311	downloaded. None for no limit.
	312	download_archive: A set, or the name of a file where all downloads are recorded.
	313	Videos already present in the file are not downloaded again.
	314	break_on_existing: Stop the download process after attempting to download a
	315	file that is in the archive.
	316	break_per_url: Whether break_on_reject and break_on_existing
	317	should act on each input URL as opposed to for the entire queue
	318	cookiefile: File name or text stream from where cookies should be read and dumped to
	319	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	320	name/path from where cookies are loaded, the name of the keyring,
	321	and the container name, e.g. ('chrome', ) or
	322	('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
	323	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	324	support RFC 5746 secure renegotiation
	325	nocheckcertificate: Do not verify SSL certificates
	326	client_certificate: Path to client certificate file in PEM format. May include the private key
	327	client_certificate_key: Path to private key file for client certificate
	328	client_certificate_password: Password for client certificate private key, if encrypted.
	329	If not provided and the key is encrypted, yt-dlp will ask interactively
	330	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	331	(Only supported by some extractors)
	332	enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
	333	http_headers: A dictionary of custom headers to be used for all requests
	334	proxy: URL of the proxy server to use
	335	geo_verification_proxy: URL of the proxy to use for IP address verification
	336	on geo-restricted sites.
	337	socket_timeout: Time to wait for unresponsive hosts, in seconds
	338	bidi_workaround: Work around buggy terminals without bidirectional text
	339	support, using fridibi
	340	debug_printtraffic:Print out sent and received HTTP traffic
	341	default_search: Prepend this string if an input url is not valid.
	342	'auto' for elaborate guessing
	343	encoding: Use this encoding instead of the system-specified.
	344	extract_flat: Whether to resolve and process url_results further
	345	* False: Always process. Default for API
	346	* True: Never process
	347	* 'in_playlist': Do not process inside playlist/multi_video
	348	* 'discard': Always process, but don't return the result
	349	from inside playlist/multi_video
	350	* 'discard_in_playlist': Same as "discard", but only for
	351	playlists (not multi_video). Default for CLI
	352	wait_for_video: If given, wait for scheduled streams to become available.
	353	The value should be a tuple containing the range
	354	(min_secs, max_secs) to wait between retries
	355	postprocessors: A list of dictionaries, each with an entry
	356	* key: The name of the postprocessor. See
	357	yt_dlp/postprocessor/__init__.py for a list.
	358	* when: When to run the postprocessor. Allowed values are
	359	the entries of utils.POSTPROCESS_WHEN
	360	Assumed to be 'post_process' if not given
	361	progress_hooks: A list of functions that get called on download
	362	progress, with a dictionary with the entries
	363	* status: One of "downloading", "error", or "finished".
	364	Check this first and ignore unknown values.
	365	* info_dict: The extracted info_dict
	366
	367	If status is one of "downloading", or "finished", the
	368	following properties may also be present:
	369	* filename: The final filename (always present)
	370	* tmpfilename: The filename we're currently writing to
	371	* downloaded_bytes: Bytes on disk
	372	* total_bytes: Size of the whole file, None if unknown
	373	* total_bytes_estimate: Guess of the eventual file size,
	374	None if unavailable.
	375	* elapsed: The number of seconds since download started.
	376	* eta: The estimated time in seconds, None if unknown
	377	* speed: The download speed in bytes/second, None if
	378	unknown
	379	* fragment_index: The counter of the currently
	380	downloaded video fragment.
	381	* fragment_count: The number of fragments (= individual
	382	files that will be merged)
	383
	384	Progress hooks are guaranteed to be called at least once
	385	(with status "finished") if the download is successful.
	386	postprocessor_hooks: A list of functions that get called on postprocessing
	387	progress, with a dictionary with the entries
	388	* status: One of "started", "processing", or "finished".
	389	Check this first and ignore unknown values.
	390	* postprocessor: Name of the postprocessor
	391	* info_dict: The extracted info_dict
	392
	393	Progress hooks are guaranteed to be called at least twice
	394	(with status "started" and "finished") if the processing is successful.
	395	merge_output_format: "/" separated list of extensions to use when merging formats.
	396	final_ext: Expected final extension; used to detect when the file was
	397	already downloaded and converted
	398	fixup: Automatically correct known faults of the file.
	399	One of:
	400	- "never": do nothing
	401	- "warn": only emit a warning
	402	- "detect_or_warn": check whether we can do anything
	403	about it, warn otherwise (default)
	404	source_address: Client-side IP address to bind to.
	405	sleep_interval_requests: Number of seconds to sleep between requests
	406	during extraction
	407	sleep_interval: Number of seconds to sleep before each download when
	408	used alone or a lower bound of a range for randomized
	409	sleep before each download (minimum possible number
	410	of seconds to sleep) when used along with
	411	max_sleep_interval.
	412	max_sleep_interval:Upper bound of a range for randomized sleep before each
	413	download (maximum possible number of seconds to sleep).
	414	Must only be used along with sleep_interval.
	415	Actual sleep time will be a random float from range
	416	[sleep_interval; max_sleep_interval].
	417	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	418	listformats: Print an overview of available video formats and exit.
	419	list_thumbnails: Print a table of all thumbnails and exit.
	420	match_filter: A function that gets called for every video with the signature
	421	(info_dict, *, incomplete: bool) -> Optional[str]
	422	For backward compatibility with youtube-dl, the signature
	423	(info_dict) -> Optional[str] is also allowed.
	424	- If it returns a message, the video is ignored.
	425	- If it returns None, the video is downloaded.
	426	- If it returns utils.NO_DEFAULT, the user is interactively
	427	asked whether to download the video.
	428	- Raise utils.DownloadCancelled(msg) to abort remaining
	429	downloads when a video is rejected.
	430	match_filter_func in utils/_utils.py is one example for this.
	431	color: A Dictionary with output stream names as keys
	432	and their respective color policy as values.
	433	Can also just be a single color policy,
	434	in which case it applies to all outputs.
	435	Valid stream names are 'stdout' and 'stderr'.
	436	Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
	437	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	438	HTTP header
	439	geo_bypass_country:
	440	Two-letter ISO 3166-2 country code that will be used for
	441	explicit geographic restriction bypassing via faking
	442	X-Forwarded-For HTTP header
	443	geo_bypass_ip_block:
	444	IP range in CIDR notation that will be used similarly to
	445	geo_bypass_country
	446	external_downloader: A dictionary of protocol keys and the executable of the
	447	external downloader to use for it. The allowed protocols
	448	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	449	Set the value to 'native' to use the native downloader
	450	compat_opts: Compatibility options. See "Differences in default behavior".
	451	The following options do not work when used through the API:
	452	filename, abort-on-error, multistreams, no-live-chat, format-sort
	453	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	454	Refer __init__.py for their implementation
	455	progress_template: Dictionary of templates for progress outputs.
	456	Allowed keys are 'download', 'postprocess',
	457	'download-title' (console title) and 'postprocess-title'.
	458	The template is mapped on a dictionary with keys 'progress' and 'info'
	459	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	460	as argument and returns the time to sleep in seconds.
	461	Allowed keys are 'http', 'fragment', 'file_access'
	462	download_ranges: A callback function that gets called for every video with
	463	the signature (info_dict, ydl) -> Iterable[Section].
	464	Only the returned sections will be downloaded.
	465	Each Section is a dict with the following keys:
	466	* start_time: Start time of the section in seconds
	467	* end_time: End time of the section in seconds
	468	* title: Section title (Optional)
	469	* index: Section number (Optional)
	470	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	471	noprogress: Do not print the progress bar
	472	live_from_start: Whether to download livestreams videos from the start
	473
	474	The following parameters are not used by YoutubeDL itself, they are used by
	475	the downloader (see yt_dlp/downloader/common.py):
	476	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	477	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	478	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	479	external_downloader_args, concurrent_fragment_downloads.
	480
	481	The following options are used by the post processors:
	482	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	483	to the binary or its containing directory.
	484	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	485	and a list of additional command-line arguments for the
	486	postprocessor/executable. The dict can also have "PP+EXE" keys
	487	which are used when the given exe is used by the given PP.
	488	Use 'default' as the name for arguments to passed to all PP
	489	For compatibility with youtube-dl, a single list of args
	490	can also be used
	491
	492	The following options are used by the extractors:
	493	extractor_retries: Number of times to retry for known errors (default: 3)
	494	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	495	hls_split_discontinuity: Split HLS playlists to different formats at
	496	discontinuities such as ad breaks (default: False)
	497	extractor_args: A dictionary of arguments to be passed to the extractors.
	498	See "EXTRACTOR ARGUMENTS" for details.
	499	E.g. {'youtube': {'skip': ['dash', 'hls']}}
	500	mark_watched: Mark videos watched (even with --simulate). Only for YouTube

1

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import http.cookiejar

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import string

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

from .cache import Cache

27

from .compat import functools, urllib # isort: split

28

from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req

29

from .cookies import LenientSimpleCookie, load_cookies

30

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

31

from .downloader.rtmp import rtmpdump_version

32

from .extractor import gen_extractor_classes, get_info_extractor

33

from .extractor.common import UnsupportedURLIE

34

from .extractor.openload import PhantomJSwrapper

35

from .minicurses import format_text

36

from .networking import HEADRequest, Request, RequestDirector

37

from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES

38

from .networking.exceptions import (

39

HTTPError,

40

NoSupportingHandlers,

RequestError,

SSLError,

network_exceptions,

)

from .plugins import directories as plugin_directories

46

from .postprocessor import _PLUGIN_CLASSES as plugin_pps

47

from .postprocessor import (

48

EmbedThumbnailPP,

49

FFmpegFixupDuplicateMoovPP,

50

FFmpegFixupDurationPP,

51

FFmpegFixupM3u8PP,

52

FFmpegFixupM4aPP,

53

FFmpegFixupStretchedPP,

54

FFmpegFixupTimestampPP,

55

FFmpegMergerPP,

56

FFmpegPostProcessor,

57

FFmpegVideoConvertorPP,

58

MoveFilesAfterDownloadPP,

59

get_postprocessor,

60

)

61

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

62

from .update import (

63

REPOSITORY,

64

_get_system_deprecation,

_make_label,

current_git_head,

detect_variant,

)

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

FormatSorter,

GeoRestrictedError,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

99

SameFileError,

100

UnavailableVideoError,

UserNotLive,

age_restricted,

args_to_str,

bug_reports_message,

date_from_str,

deprecation_warning,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

escapeHTML,

expand_path,

extract_basic_auth,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

is_path_like,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

number_of_digits,

orderedSet,

orderedSet_from_options,

parse_filesize,

preferredencoding,

prepend_extension,

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .utils._utils import _YDLLogger

160

from .utils.networking import (

HTTPHeaderDict,

clean_headers,

clean_proxies,

std_headers,

)

from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__

167

168

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

176

actual video file and writing it to disk if the user has requested

177

it, among some other tasks. In most cases there should be one per

178

program. As, given a video URL, the downloader doesn't know how to

179

extract all the needed information, task that InfoExtractors do, it

180

has to pass the URL to one of them.

181

182

For this, YoutubeDL objects have a method that allows

183

InfoExtractors to be registered in a given order. When it is passed

184

a URL, the YoutubeDL object handles it to the first InfoExtractor it

185

finds that reports being able to handle it. The InfoExtractor extracts

186

all the information about the video or videos the URL refers to, and

187

YoutubeDL process the extracted information, possibly using a File

188

Downloader to download the video.

189

190

YoutubeDL objects accept a lot of parameters. In order not to saturate

191

the object constructor with arguments, it receives a dictionary of

192

options instead. These options are available through the params

193

attribute for the InfoExtractors to use. The YoutubeDL also

194

registers itself as the downloader in charge for the InfoExtractors

195

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

200

password: Password for authentication purposes.

201

videopassword: Password for accessing a video.

202

ap_mso: Adobe Pass multiple-system operator identifier.

203

ap_username: Multiple-system operator account username.

204

ap_password: Multiple-system operator account password.

205

usenetrc: Use netrc for authentication instead.

206

netrc_location: Location of the netrc file. Defaults to ~/.netrc.

207

netrc_cmd: Use a shell command to get credentials

208

verbose: Print additional info to stdout.

209

quiet: Do not print messages to stdout.

210

no_warnings: Do not print out anything for warnings.

211

forceprint: A dict with keys WHEN mapped to a list of templates to

212

print to stdout. The allowed keys are video or any of the

213

items in utils.POSTPROCESS_WHEN.

214

For compatibility, a single list is also accepted

215

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

216

a list of tuples with (template, filename)

217

forcejson: Force printing info_dict as JSON.

218

dump_single_json: Force printing the info_dict of the whole playlist

219

(or video) as a single JSON line.

220

force_write_download_archive: Force writing download archive regardless

221

of 'skip_download' or 'simulate'.

222

simulate: Do not download the video files. If unset (or None),

223

simulate only if listsubtitles, listformats or list_thumbnails is used

224

format: Video format code. see "FORMAT SELECTION" for more details.

225

You can also pass a function. The function takes 'ctx' as

226

argument and returns the formats to download.

227

See "build_format_selector" for an implementation

228

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

229

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

230

extracting metadata even if the video is not actually

231

available for download (experimental)

232

format_sort: A list of fields by which to sort the video formats.

233

See "Sorting Formats" for more details.

234

format_sort_force: Force the given format_sort. see "Sorting Formats"

235

for more details.

236

prefer_free_formats: Whether to prefer video formats with free containers

237

over non-free ones of same quality.

238

allow_multiple_video_streams: Allow multiple video streams to be merged

239

into a single file

240

allow_multiple_audio_streams: Allow multiple audio streams to be merged

241

into a single file

242

check_formats Whether to test if the formats are downloadable.

243

Can be True (check all), False (check none),

244

'selected' (check selected formats),

245

or None (check only if requested by extractor)

246

paths: Dictionary of output paths. The allowed keys are 'home'

247

'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)

248

outtmpl: Dictionary of templates for output names. Allowed keys

249

are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).

250

For compatibility with youtube-dl, a single string can also be used

251

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

252

restrictfilenames: Do not allow "&" and spaces in file names

253

trim_file_name: Limit length of filename (extension excluded)

254

windowsfilenames: Force the filenames to be windows compatible

255

ignoreerrors: Do not stop on download/postprocessing errors.

256

Can be 'only_download' to ignore only download errors.

257

Default is 'only_download' for CLI, but False for API

258

skip_playlist_after_errors: Number of allowed failures until the rest of

259

the playlist is skipped

260

allowed_extractors: List of regexes to match against extractor names that are allowed

261

overwrites: Overwrite all video and metadata files if True,

262

overwrite only non-video files if None

263

and don't overwrite any file if False

264

playlist_items: Specific indices of playlist to download.

265

playlistrandom: Download playlist items in random order.

266

lazy_playlist: Process playlist entries as they are received.

267

matchtitle: Download only matching titles.

268

rejecttitle: Reject downloads for matching titles.

269

logger: Log messages to a logging.Logger instance.

270

logtostderr: Print everything to stderr instead of stdout.

271

consoletitle: Display progress in console window's titlebar.

272

writedescription: Write the video description to a .description file

273

writeinfojson: Write the video description to a .info.json file

274

clean_infojson: Remove internal metadata from the infojson

275

getcomments: Extract video comments. This will not be written to disk

276

unless writeinfojson is also given

277

writeannotations: Write the video annotations to a .annotations.xml file

278

writethumbnail: Write the thumbnail image to a file

279

allow_playlist_files: Whether to write playlists' description, infojson etc

280

also to disk when using the 'write*' options

281

write_all_thumbnails: Write all thumbnail formats to files

282

writelink: Write an internet shortcut file, depending on the

283

current platform (.url/.webloc/.desktop)

284

writeurllink: Write a Windows internet shortcut file (.url)

285

writewebloclink: Write a macOS internet shortcut file (.webloc)

286

writedesktoplink: Write a Linux internet shortcut file (.desktop)

287

writesubtitles: Write the video subtitles to a file

288

writeautomaticsub: Write the automatically generated subtitles to a file

289

listsubtitles: Lists all available subtitles for the video

290

subtitlesformat: The format code for subtitles

291

subtitleslangs: List of languages of the subtitles to download (can be regex).

292

The list may contain "all" to refer to all the available

293

subtitles. The language can be prefixed with a "-" to

294

exclude it from the requested languages, e.g. ['all', '-live_chat']

295

keepvideo: Keep the video file after post-processing

296

daterange: A utils.DateRange object, download only if the upload_date is in the range.

297

skip_download: Skip the actual download of the video file

298

cachedir: Location of the cache files in the filesystem.

299

False to disable filesystem cache.

300

noplaylist: Download single video instead of a playlist if in doubt.

301

age_limit: An integer representing the user's age in years.

302

Unsuitable videos for the given age are skipped.

303

min_views: An integer representing the minimum view count the video

304

must have in order to not be skipped.

305

Videos without view count information are always

306

downloaded. None for no limit.

307

max_views: An integer representing the maximum view count.

308

Videos that are more popular than that are not

309

downloaded.

310

Videos without view count information are always

311

downloaded. None for no limit.

312

download_archive: A set, or the name of a file where all downloads are recorded.

313

Videos already present in the file are not downloaded again.

314

break_on_existing: Stop the download process after attempting to download a

315

file that is in the archive.

316

break_per_url: Whether break_on_reject and break_on_existing

317

should act on each input URL as opposed to for the entire queue

318

cookiefile: File name or text stream from where cookies should be read and dumped to

319

cookiesfrombrowser: A tuple containing the name of the browser, the profile

320

name/path from where cookies are loaded, the name of the keyring,

321

and the container name, e.g. ('chrome', ) or

322

('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')

323

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

324

support RFC 5746 secure renegotiation

325

nocheckcertificate: Do not verify SSL certificates

326

client_certificate: Path to client certificate file in PEM format. May include the private key

327

client_certificate_key: Path to private key file for client certificate

328

client_certificate_password: Password for client certificate private key, if encrypted.

329

If not provided and the key is encrypted, yt-dlp will ask interactively

330

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

331

(Only supported by some extractors)

332

enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.

333

http_headers: A dictionary of custom headers to be used for all requests

334

proxy: URL of the proxy server to use

335

geo_verification_proxy: URL of the proxy to use for IP address verification

336

on geo-restricted sites.

337

socket_timeout: Time to wait for unresponsive hosts, in seconds

338

bidi_workaround: Work around buggy terminals without bidirectional text

339

support, using fridibi

340

debug_printtraffic:Print out sent and received HTTP traffic

341

default_search: Prepend this string if an input url is not valid.

342

'auto' for elaborate guessing

343

encoding: Use this encoding instead of the system-specified.

344

extract_flat: Whether to resolve and process url_results further

345

* False: Always process. Default for API

346

* True: Never process

347

* 'in_playlist': Do not process inside playlist/multi_video

348

* 'discard': Always process, but don't return the result

349

from inside playlist/multi_video

350

* 'discard_in_playlist': Same as "discard", but only for

351

playlists (not multi_video). Default for CLI

352

wait_for_video: If given, wait for scheduled streams to become available.

353

The value should be a tuple containing the range

354

(min_secs, max_secs) to wait between retries

355

postprocessors: A list of dictionaries, each with an entry

356

* key: The name of the postprocessor. See

357

yt_dlp/postprocessor/__init__.py for a list.

358

* when: When to run the postprocessor. Allowed values are

359

the entries of utils.POSTPROCESS_WHEN

360

Assumed to be 'post_process' if not given

361

progress_hooks: A list of functions that get called on download

362

progress, with a dictionary with the entries

363

* status: One of "downloading", "error", or "finished".

364

Check this first and ignore unknown values.

365

* info_dict: The extracted info_dict

366

367

If status is one of "downloading", or "finished", the

368

following properties may also be present:

369

* filename: The final filename (always present)

370

* tmpfilename: The filename we're currently writing to

371

* downloaded_bytes: Bytes on disk

372

* total_bytes: Size of the whole file, None if unknown

373

* total_bytes_estimate: Guess of the eventual file size,

374

None if unavailable.

375

* elapsed: The number of seconds since download started.

376

* eta: The estimated time in seconds, None if unknown

377

* speed: The download speed in bytes/second, None if

378

unknown

379

* fragment_index: The counter of the currently

380

downloaded video fragment.

381

* fragment_count: The number of fragments (= individual

382

files that will be merged)

383

384

Progress hooks are guaranteed to be called at least once

385

(with status "finished") if the download is successful.

386

postprocessor_hooks: A list of functions that get called on postprocessing

387

progress, with a dictionary with the entries

388

* status: One of "started", "processing", or "finished".

389

Check this first and ignore unknown values.

390

* postprocessor: Name of the postprocessor

391

* info_dict: The extracted info_dict

392

393

Progress hooks are guaranteed to be called at least twice

394

(with status "started" and "finished") if the processing is successful.

395

merge_output_format: "/" separated list of extensions to use when merging formats.

396

final_ext: Expected final extension; used to detect when the file was

397

already downloaded and converted

398

fixup: Automatically correct known faults of the file.

399

One of:

400

- "never": do nothing

401

- "warn": only emit a warning

402

- "detect_or_warn": check whether we can do anything

403

about it, warn otherwise (default)

404

source_address: Client-side IP address to bind to.

405

sleep_interval_requests: Number of seconds to sleep between requests

406

during extraction

407

sleep_interval: Number of seconds to sleep before each download when

408

used alone or a lower bound of a range for randomized

409

sleep before each download (minimum possible number

410

of seconds to sleep) when used along with

411

max_sleep_interval.

412

max_sleep_interval:Upper bound of a range for randomized sleep before each

413

download (maximum possible number of seconds to sleep).

414

Must only be used along with sleep_interval.

415

Actual sleep time will be a random float from range

416

[sleep_interval; max_sleep_interval].

417

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

418

listformats: Print an overview of available video formats and exit.

419

list_thumbnails: Print a table of all thumbnails and exit.

420

match_filter: A function that gets called for every video with the signature

421

(info_dict, *, incomplete: bool) -> Optional[str]

422

For backward compatibility with youtube-dl, the signature

423

(info_dict) -> Optional[str] is also allowed.

424

- If it returns a message, the video is ignored.

425

- If it returns None, the video is downloaded.

426

- If it returns utils.NO_DEFAULT, the user is interactively

427

asked whether to download the video.

428

- Raise utils.DownloadCancelled(msg) to abort remaining

429

downloads when a video is rejected.

430

match_filter_func in utils/_utils.py is one example for this.

431

color: A Dictionary with output stream names as keys

432

and their respective color policy as values.

433

Can also just be a single color policy,

434

in which case it applies to all outputs.

435

Valid stream names are 'stdout' and 'stderr'.

436

Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.

437

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

438

HTTP header

439

geo_bypass_country:

440

Two-letter ISO 3166-2 country code that will be used for

441

explicit geographic restriction bypassing via faking

442

X-Forwarded-For HTTP header

443

geo_bypass_ip_block:

444

IP range in CIDR notation that will be used similarly to

445

geo_bypass_country

446

external_downloader: A dictionary of protocol keys and the executable of the

447

external downloader to use for it. The allowed protocols

448

449

Set the value to 'native' to use the native downloader

450

compat_opts: Compatibility options. See "Differences in default behavior".

451

The following options do not work when used through the API:

452

filename, abort-on-error, multistreams, no-live-chat, format-sort

453

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

454

Refer __init__.py for their implementation

455

progress_template: Dictionary of templates for progress outputs.

456

Allowed keys are 'download', 'postprocess',

457

'download-title' (console title) and 'postprocess-title'.

458

The template is mapped on a dictionary with keys 'progress' and 'info'

459

retry_sleep_functions: Dictionary of functions that takes the number of attempts

460

as argument and returns the time to sleep in seconds.

461

Allowed keys are 'http', 'fragment', 'file_access'

462

download_ranges: A callback function that gets called for every video with

463

the signature (info_dict, ydl) -> Iterable[Section].

464

Only the returned sections will be downloaded.

465

Each Section is a dict with the following keys:

466

* start_time: Start time of the section in seconds

467

* end_time: End time of the section in seconds

468

* title: Section title (Optional)

469

* index: Section number (Optional)

470

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

471

noprogress: Do not print the progress bar

472

live_from_start: Whether to download livestreams videos from the start

473

474

The following parameters are not used by YoutubeDL itself, they are used by

475

the downloader (see yt_dlp/downloader/common.py):

476

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

477

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

478

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

479

external_downloader_args, concurrent_fragment_downloads.

480

481

The following options are used by the post processors:

482

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

483

to the binary or its containing directory.

484

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

485

and a list of additional command-line arguments for the

486

postprocessor/executable. The dict can also have "PP+EXE" keys

487

which are used when the given exe is used by the given PP.

488

Use 'default' as the name for arguments to passed to all PP

489

For compatibility with youtube-dl, a single list of args

490

can also be used

491

492

The following options are used by the extractors:

493

extractor_retries: Number of times to retry for known errors (default: 3)

494

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

495

hls_split_discontinuity: Split HLS playlists to different formats at

496

discontinuities such as ad breaks (default: False)

497

extractor_args: A dictionary of arguments to be passed to the extractors.

498

See "EXTRACTOR ARGUMENTS" for details.

499

E.g. {'youtube': {'skip': ['dash', 'hls']}}

500

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

501

502

The following options are deprecated and may be removed in the future:

503

504

break_on_reject: Stop the download process when encountering a video that

505

has been filtered out.

506

- `raise DownloadCancelled(msg)` in match_filter instead

507

force_generic_extractor: Force downloader to use the generic extractor

508

- Use allowed_extractors = ['generic', 'default']

509

playliststart: - Use playlist_items

510

Playlist item to start at.

511

playlistend: - Use playlist_items

512

Playlist item to end at.

513

playlistreverse: - Use playlist_items

514

Download playlist items in reverse order.

515

forceurl: - Use forceprint

516

Force printing final URL.

517

forcetitle: - Use forceprint

518

Force printing title.

519

forceid: - Use forceprint

520

Force printing ID.

521

forcethumbnail: - Use forceprint

522

Force printing thumbnail URL.

523

forcedescription: - Use forceprint

524

Force printing description.

525

forcefilename: - Use forceprint

526

Force printing final filename.

527

forceduration: - Use forceprint

528

Force printing duration.

529

allsubtitles: - Use subtitleslangs = ['all']

530

Downloads all the subtitles of the video

531

(requires writesubtitles or writeautomaticsub)

532

include_ads: - Doesn't work

533

Download ads as well

534

call_home: - Not implemented

535

Boolean, true iff we are allowed to contact the

536

yt-dlp servers for debugging.

537

post_hooks: - Register a custom postprocessor

538

A list of functions that get called as the final step

539

for each video file, after all postprocessors have been

540

called. The filename will be passed as the only argument.

541

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

542

Use the native HLS downloader instead of ffmpeg/avconv

543

if True, otherwise use ffmpeg/avconv if False, otherwise

544

use downloader suggested by extractor if None.

545

prefer_ffmpeg: - avconv support is deprecated

546

If False, use avconv instead of ffmpeg if both are available,

547

otherwise prefer ffmpeg.

548

youtube_include_dash_manifest: - Use extractor_args

549

If True (default), DASH manifests and related

550

data will be downloaded and processed by extractor.

551

You can reduce network I/O by disabling it if you don't

552

care about DASH. (only for youtube)

553

youtube_include_hls_manifest: - Use extractor_args

554

If True (default), HLS manifests and related

555

data will be downloaded and processed by extractor.

556

You can reduce network I/O by disabling it if you don't

557

care about HLS. (only for youtube)

558

no_color: Same as `color='no_color'`

559

no_overwrites: Same as `overwrites=False`

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

564

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

565

'timestamp', 'release_timestamp',

566

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

567

'average_rating', 'comment_count', 'age_limit',

568

'start_time', 'end_time',

569

'chapter_number', 'season_number', 'episode_number',

570

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

575

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

576

'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

577

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',

578

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

579

'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',

580

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',

581

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

582

}

583

_deprecated_multivalue_fields = {

584

'album_artist': 'album_artists',

585

'artist': 'artists',

586

'composer': 'composers',

587

'creator': 'creators',

588

'genre': 'genres',

589

}

590

_format_selection_exts = {

591

'audio': set(MEDIA_EXTENSIONS.common_audio),

592

'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),

593

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

594

}

595

596

def __init__(self, params=None, auto_init=True):

597

"""Create a FileDownloader object with the given options.

598

@param auto_init Whether to load the default extractors and print header (if verbose).

599

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

606

self._pps = {k: [] for k in POSTPROCESS_WHEN}

607

self._printed_messages = set()

608

self._first_webpage_request = True

609

self._post_hooks = []

610

self._progress_hooks = []

611

self._postprocessor_hooks = []

612

self._download_retcode = 0

613

self._num_downloads = 0

614

self._num_videos = 0

615

self._playlist_level = 0

616

self._playlist_urls = set()

617

self.cache = Cache(self)

618

self.__header_cookies = []

619

620

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

621

self._out_files = Namespace(

622

out=stdout,

623

error=sys.stderr,

624

screen=sys.stderr if self.params.get('quiet') else stdout,

625

console=None if compat_os_name == 'nt' else next(

626

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

)

try:

windows_enable_vt_mode()

631

except Exception as e:

632

self.write_debug(f'Failed to enable VT mode: {e}')

633

634

if self.params.get('no_color'):

635

if self.params.get('color') is not None:

636

self.params.setdefault('_warnings', []).append(

637

'Overwriting params from "color" with "no_color"')

638

self.params['color'] = 'no_color'

639

640

term_allow_color = os.getenv('TERM', '').lower() != 'dumb'

641

no_color = bool(os.getenv('NO_COLOR'))

642

643

def process_color_policy(stream):

644

stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]

645

policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)

646

if policy in ('auto', None):

647

if term_allow_color and supports_terminal_sequences(stream):

648

return 'no_color' if no_color else True

649

return False

650

assert policy in ('always', 'never', 'no_color'), policy

651

return {'always': True, 'never': False}.get(policy, policy)

652

653

self._allow_colors = Namespace(**{

654

name: process_color_policy(stream)

655

for name, stream in self._out_files.items_ if name != 'console'

656

})

657

658

system_deprecation = _get_system_deprecation()

659

if system_deprecation:

660

self.deprecated_feature(system_deprecation.replace('\n', '\n '))

661

662

if self.params.get('allow_unplayable_formats'):

663

self.report_warning(

664

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

665

'This is a developer option intended for debugging. \n'

666

' If you experience any issues while using this option, '

667

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

668

669

if self.params.get('bidi_workaround', False):

670

try:

671

import pty

672

master, slave = pty.openpty()

673

width = shutil.get_terminal_size().columns

674

width_args = [] if width is None else ['-w', str(width)]

675

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

676

try:

677

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

678

except OSError:

679

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

680

self._output_channel = os.fdopen(master, 'rb')

681

except OSError as ose:

682

if ose.errno == errno.ENOENT:

683

self.report_warning(

684

'Could not find fribidi executable, ignoring --bidi-workaround. '

685

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

690

self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))

691

self._load_cookies(self.params['http_headers'].get('Cookie')) # compat

692

self.params['http_headers'].pop('Cookie', None)

693

694

if auto_init and auto_init != 'no_verbose_header':

695

self.print_debug_header()

696

697

def check_deprecated(param, option, suggestion):

698

if self.params.get(param) is not None:

699

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

704

if self.params.get('geo_verification_proxy') is None:

705

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

706

707

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

708

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

709

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

710

711

for msg in self.params.get('_warnings', []):

712

self.report_warning(msg)

713

for msg in self.params.get('_deprecation_warnings', []):

714

self.deprecated_feature(msg)

715

716

if 'list-formats' in self.params['compat_opts']:

717

self.params['listformats_table'] = False

718

719

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

720

# nooverwrites was unnecessarily changed to overwrites

721

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

722

# This ensures compatibility with both keys

723

self.params['overwrites'] = not self.params['nooverwrites']

724

elif self.params.get('overwrites') is None:

725

self.params.pop('overwrites', None)

726

else:

727

self.params['nooverwrites'] = not self.params['overwrites']

728

729

if self.params.get('simulate') is None and any((

730

self.params.get('list_thumbnails'),

731

self.params.get('listformats'),

732

self.params.get('listsubtitles'),

733

)):

734

self.params['simulate'] = 'list_only'

735

736

self.params.setdefault('forceprint', {})

737

self.params.setdefault('print_to_file', {})

738

739

# Compatibility with older syntax

740

if not isinstance(params['forceprint'], dict):

741

self.params['forceprint'] = {'video': params['forceprint']}

742

743

if auto_init:

744

self.add_default_info_extractors()

745

746

if (sys.platform != 'win32'

747

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

748

and not self.params.get('restrictfilenames', False)):

749

# Unicode filesystem API will throw errors (#1474, #13027)

750

self.report_warning(

751

'Assuming --restrict-filenames since file system encoding '

752

'cannot encode all characters. '

753

'Set the LC_ALL environment variable to fix this.')

754

self.params['restrictfilenames'] = True

755

756

self._parse_outtmpl()

757

758

# Creating format selector here allows us to catch syntax errors before the extraction

759

self.format_selector = (

760

self.params.get('format') if self.params.get('format') in (None, '-')

761

else self.params['format'] if callable(self.params['format'])

762

else self.build_format_selector(self.params['format']))

763

764

hooks = {

765

'post_hooks': self.add_post_hook,

766

'progress_hooks': self.add_progress_hook,

767

'postprocessor_hooks': self.add_postprocessor_hook,

768

}

769

for opt, fn in hooks.items():

770

for ph in self.params.get(opt, []):

771

fn(ph)

772

773

for pp_def_raw in self.params.get('postprocessors', []):

774

pp_def = dict(pp_def_raw)

775

when = pp_def.pop('when', 'post_process')

776

self.add_post_processor(

777

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

778

when=when)

779

780

def preload_download_archive(fn):

781

"""Preload the archive, if any is specified"""

archive = set()

if fn is None:

return archive

elif not is_path_like(fn):

786

return fn

787

788

self.write_debug(f'Loading archive file {fn!r}')

789

try:

790

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

791

for line in archive_file:

792

archive.add(line.strip())

793

except OSError as ioe:

794

if ioe.errno != errno.ENOENT:

raise

return archive

self.archive = preload_download_archive(self.params.get('download_archive'))

799

800

def warn_if_short_id(self, argv):

801

# short YouTube ID starting with dash?

802

idxs = [

803

i for i, a in enumerate(argv)

804

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

809

+ ['--'] + [argv[i] for i in idxs]

810

)

811

self.report_warning(

812

'Long argument string detected. '

813

'Use -- to separate parameters and URLs, like this:\n%s' %

814

args_to_str(correct_argv))

815

816

def add_info_extractor(self, ie):

817

"""Add an InfoExtractor object to the end of the list."""

818

ie_key = ie.ie_key()

819

self._ies[ie_key] = ie

820

if not isinstance(ie, type):

821

self._ies_instances[ie_key] = ie

822

ie.set_downloader(self)

823

824

def get_info_extractor(self, ie_key):

825

"""

826

Get an instance of an IE with name ie_key, it will try to get one from

827

the _ies list, if there's no instance it will create a new one and add

828

it to the extractor list.

829

"""

830

ie = self._ies_instances.get(ie_key)

831

if ie is None:

832

ie = get_info_extractor(ie_key)()

833

self.add_info_extractor(ie)

834

return ie

835

836

def add_default_info_extractors(self):

837

"""

838

Add the InfoExtractors returned by gen_extractors to the end of the list

839

"""

840

all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}

841

all_ies['end'] = UnsupportedURLIE()

842

try:

843

ie_names = orderedSet_from_options(

844

self.params.get('allowed_extractors', ['default']), {

845

'all': list(all_ies),

846

'default': [name for name, ie in all_ies.items() if ie._ENABLED],

847

}, use_regex=True)

848

except re.error as e:

849

raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')

850

for name in ie_names:

851

self.add_info_extractor(all_ies[name])

852

self.write_debug(f'Loaded {len(ie_names)} extractors')

853

854

def add_post_processor(self, pp, when='post_process'):

855

"""Add a PostProcessor object to the end of the chain."""

856

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

857

self._pps[when].append(pp)

858

pp.set_downloader(self)

859

860

def add_post_hook(self, ph):

861

"""Add the post hook"""

862

self._post_hooks.append(ph)

863

864

def add_progress_hook(self, ph):

865

"""Add the download progress hook"""

866

self._progress_hooks.append(ph)

867

868

def add_postprocessor_hook(self, ph):

869

"""Add the postprocessing progress hook"""

870

self._postprocessor_hooks.append(ph)

871

for pps in self._pps.values():

872

for pp in pps:

873

pp.add_progress_hook(ph)

874

875

def _bidi_workaround(self, message):

876

if not hasattr(self, '_output_channel'):

877

return message

878

879

assert hasattr(self, '_output_process')

880

assert isinstance(message, str)

881

line_count = message.count('\n') + 1

882

self._output_process.stdin.write((message + '\n').encode())

883

self._output_process.stdin.flush()

884

res = ''.join(self._output_channel.readline().decode()

885

for _ in range(line_count))

886

return res[:-len('\n')]

887

888

def _write_string(self, message, out=None, only_once=False):

889

if only_once:

890

if message in self._printed_messages:

891

return

892

self._printed_messages.add(message)

893

write_string(message, out=out, encoding=self.params.get('encoding'))

894

895

def to_stdout(self, message, skip_eol=False, quiet=None):

896

"""Print message to stdout"""

897

if quiet is not None:

898

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '

899

'Use "YoutubeDL.to_screen" instead')

900

if skip_eol is not False:

901

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '

902

'Use "YoutubeDL.to_screen" instead')

903

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

904

905

def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):

906

"""Print message to screen if not in quiet mode"""

907

if self.params.get('logger'):

908

self.params['logger'].debug(message)

909

return

910

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

911

return

912

self._write_string(

913

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

914

self._out_files.screen, only_once=only_once)

915

916

def to_stderr(self, message, only_once=False):

917

"""Print message to stderr"""

918

assert isinstance(message, str)

919

if self.params.get('logger'):

920

self.params['logger'].error(message)

921

else:

922

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

923

924

def _send_console_code(self, code):

925

if compat_os_name == 'nt' or not self._out_files.console:

926

return

927

self._write_string(code, self._out_files.console)

928

929

def to_console_title(self, message):

930

if not self.params.get('consoletitle', False):

931

return

932

message = remove_terminal_sequences(message)

933

if compat_os_name == 'nt':

934

if ctypes.windll.kernel32.GetConsoleWindow():

935

# c_wchar_p() might not be necessary if `message` is

936

# already of type unicode()

937

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

938

else:

939

self._send_console_code(f'\033]0;{message}\007')

940

941

def save_console_title(self):

942

if not self.params.get('consoletitle') or self.params.get('simulate'):

943

return

944

self._send_console_code('\033[22;0t') # Save the title on stack

945

946

def restore_console_title(self):

947

if not self.params.get('consoletitle') or self.params.get('simulate'):

948

return

949

self._send_console_code('\033[23;0t') # Restore the title from stack

950

951

def __enter__(self):

952

self.save_console_title()

953

return self

954

955

def save_cookies(self):

956

if self.params.get('cookiefile') is not None:

957

self.cookiejar.save()

958

959

def __exit__(self, *args):

960

self.restore_console_title()

self.close()

def close(self):

self.save_cookies()

self._request_director.close()

966

del self._request_director

967

968

def trouble(self, message=None, tb=None, is_error=True):

969

"""Determine action to take when a download problem appears.

970

971

Depending on if the downloader has been configured to ignore

972

download errors or not, this method may throw an exception or

973

not when errors are found, after printing the message.

974

975

@param tb If given, is additional traceback information

976

@param is_error Whether to raise error according to ignorerrors

977

"""

978

if message is not None:

979

self.to_stderr(message)

980

if self.params.get('verbose'):

981

if tb is None:

982

if sys.exc_info()[0]: # if .trouble has been called from an except block

983

tb = ''

984

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

985

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

986

tb += encode_compat_str(traceback.format_exc())

987

else:

988

tb_data = traceback.format_list(traceback.extract_stack())

989

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

995

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

996

exc_info = sys.exc_info()[1].exc_info

997

else:

998

exc_info = sys.exc_info()

999

raise DownloadError(message, exc_info)

1000

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

BAD_FORMAT='light red',

1010

WARNING='yellow',

1011

SUPPRESS='light black',

1012

)

1013

1014

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

1019

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

1020

text = text.encode(encoding, 'ignore').decode(encoding)

1021

if fallback is not None and text != original_text:

1022

text = fallback

1023

return format_text(text, f) if allow_colors is True else text if fallback is None else fallback

1024

1025

def _format_out(self, *args, **kwargs):

1026

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

1027

1028

def _format_screen(self, *args, **kwargs):

1029

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

1030

1031

def _format_err(self, *args, **kwargs):

1032

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

1033

1034

def report_warning(self, message, only_once=False):

1035

'''

1036

Print the message to stderr, it will be prefixed with 'WARNING:'

1037

If stderr is a tty file the 'WARNING:' will be colored

1038

'''

1039

if self.params.get('logger') is not None:

1040

self.params['logger'].warning(message)

1041

else:

1042

if self.params.get('no_warnings'):

1043

return

1044

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

1045

1046

def deprecation_warning(self, message, *, stacklevel=0):

1047

deprecation_warning(

1048

message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)

1049

1050

def deprecated_feature(self, message):

1051

if self.params.get('logger') is not None:

1052

self.params['logger'].warning(f'Deprecated Feature: {message}')

1053

self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)

1054

1055

def report_error(self, message, *args, **kwargs):

1056

'''

1057

Do the same as trouble, but prefixes the message with 'ERROR:', colored

1058

in red if stderr is a tty file.

1059

'''

1060

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

1061

1062

def write_debug(self, message, only_once=False):

1063

'''Log debug message or Print message to stderr'''

1064

if not self.params.get('verbose', False):

1065

return

1066

message = f'[debug] {message}'

1067

if self.params.get('logger'):

1068

self.params['logger'].debug(message)

1069

else:

1070

self.to_stderr(message, only_once)

1071

1072

def report_file_already_downloaded(self, file_name):

1073

"""Report file has already been fully downloaded."""

1074

try:

1075

self.to_screen('[download] %s has already been downloaded' % file_name)

1076

except UnicodeEncodeError:

1077

self.to_screen('[download] The file has already been downloaded')

1078

1079

def report_file_delete(self, file_name):

1080

"""Report that existing file will be deleted."""

1081

try:

1082

self.to_screen('Deleting existing file %s' % file_name)

1083

except UnicodeEncodeError:

1084

self.to_screen('Deleting existing file')

1085

1086

def raise_no_formats(self, info, forced=False, *, msg=None):

1087

has_drm = info.get('_has_drm')

1088

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1089

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1090

if forced or not ignored:

1091

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1092

expected=has_drm or ignored or expected)

1093

else:

1094

self.report_warning(msg)

1095

1096

def parse_outtmpl(self):

1097

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1098

self._parse_outtmpl()

1099

return self.params['outtmpl']

1100

1101

def _parse_outtmpl(self):

1102

sanitize = IDENTITY

1103

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1104

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1105

1106

outtmpl = self.params.setdefault('outtmpl', {})

1107

if not isinstance(outtmpl, dict):

1108

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1109

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1110

1111

def get_output_path(self, dir_type='', filename=None):

1112

paths = self.params.get('paths', {})

1113

assert isinstance(paths, dict), '"paths" parameter must be a dictionary'

1114

path = os.path.join(

1115

expand_path(paths.get('home', '').strip()),

1116

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1117

filename or '')

1118

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1119

1120

@staticmethod

1121

def _outtmpl_expandpath(outtmpl):

1122

# expand_path translates '%%' into '%' and '$$' into '$'

1123

# correspondingly that is not what we want since we need to keep

1124

# '%%' intact for template dict substitution step. Working around

1125

# with boundary-alike separator hack.

1126

sep = ''.join(random.choices(string.ascii_letters, k=32))

1127

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1128

1129

# outtmpl should be expand_path'ed before template dict substitution

1130

# because meta fields may contain env variables we don't want to

1131

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1132

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1133

return expand_path(outtmpl).replace(sep, '')

1134

1135

@staticmethod

1136

def escape_outtmpl(outtmpl):

1137

''' Escape any remaining strings like %s, %abc% etc. '''

1138

return re.sub(

1139

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1140

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1145

''' @return None or Exception object '''

1146

outtmpl = re.sub(

1147

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1148

lambda mobj: f'{mobj.group(0)[:-1]}s',

1149

cls._outtmpl_expandpath(outtmpl))

1150

try:

1151

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1152

return None

1153

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1158

info_dict = dict(info_dict)

1159

info_dict.pop('__postprocessors', None)

1160

info_dict.pop('__pending_error', None)

1161

return info_dict

1162

1163

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1164

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1165

@param sanitize Whether to sanitize the output as a filename.

1166

For backward compatibility, a function can also be passed

1167

"""

1168

1169

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1170

1171

info_dict = self._copy_infodict(info_dict)

1172

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1173

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1174

if info_dict.get('duration', None) is not None

1175

else None)

1176

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1177

info_dict['video_autonumber'] = self._num_videos

1178

if info_dict.get('resolution') is None:

1179

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1180

1181

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1182

# of %(field)s to %(field)0Nd for backward compatibility

1183

field_size_compat_map = {

1184

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1185

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1186

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

'*': float.__mul__,

}

# Field is of the form key1.key2...

1197

# where keys (except first) can be string, int, slice or "{field, ...}"

1198

FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}

1199

FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {

1200

'inner': FIELD_INNER_RE,

1201

'field': rf'\w*(?:\.{FIELD_INNER_RE})*'

1202

}

1203

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1204

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1205

INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)

1206

(?P<negate>-)?

1207

(?P<fields>{FIELD_RE})

1208

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1209

(?:>(?P<strf_format>.+?))?

1210

(?P<remaining>

1211

(?P<alternate>(?<!\\),[^|&)]+)?

1212

(?:&(?P<replacement>.*?))?

1213

(?:\|(?P<default>.*?))?

1214

)$''')

1215

1216

def _from_user_input(field):

if field == ':':

return ...

elif ':' in field:

return slice(*map(int_or_none, field.split(':')))

1221

elif int_or_none(field) is not None:

return int(field)

return field

def _traverse_infodict(fields):

1226

fields = [f for x in re.split(r'\.({.+?})\.?', fields)

1227

for f in ([x] if x.startswith('{') else x.split('.'))]

1228

for i in (0, -1):

1229

if fields and not fields[i]:

1230

fields.pop(i)

1231

1232

for i, f in enumerate(fields):

1233

if not f.startswith('{'):

1234

fields[i] = _from_user_input(f)

1235

continue

1236

assert f.endswith('}'), f'No closing brace for {f} in {fields}'

1237

fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}

1238

1239

return traverse_obj(info_dict, fields, traverse_string=True)

1240

1241

def get_value(mdict):

1242

# Object traversal

1243

value = _traverse_infodict(mdict['fields'])

1244

# Negative

1245

if mdict['negate']:

1246

value = float_or_none(value)

1247

if value is not None:

1248

value *= -1

1249

# Do maths

1250

offset_key = mdict['maths']

1251

if offset_key:

1252

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1257

offset_key).group(0)

1258

offset_key = offset_key[len(item):]

1259

if operator is None:

1260

operator = MATH_FUNCTIONS[item]

1261

continue

1262

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1263

offset = float_or_none(item)

1264

if offset is None:

1265

offset = float_or_none(_traverse_infodict(item))

1266

try:

1267

value = operator(value, multiplier * offset)

1268

except (TypeError, ZeroDivisionError):

1269

return None

1270

operator = None

1271

# Datetime formatting

1272

if mdict['strf_format']:

1273

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1274

1275

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1276

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1281

1282

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1283

return sanitize_filename(str(value), restricted=restricted, is_id=(

1284

bool(re.search(r'(^|[_.])id(\.|$)', key))

1285

if 'filename-sanitization' in self.params['compat_opts']

1286

else NO_DEFAULT))

1287

1288

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1289

sanitize = bool(sanitize)

1290

1291

def _dumpjson_default(obj):

1292

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

class _ReplacementFormatter(string.Formatter):

1297

def get_field(self, field_name, args, kwargs):

1298

if field_name.isdigit():

1299

return args[0], -1

1300

raise ValueError('Unsupported field')

1301

1302

replacement_formatter = _ReplacementFormatter()

1303

1304

def create_key(outer_mobj):

1305

if not outer_mobj.group('has_key'):

1306

return outer_mobj.group(0)

1307

key = outer_mobj.group('key')

1308

mobj = re.match(INTERNAL_FORMAT_RE, key)

1309

value, replacement, default, last_field = None, None, na, ''

1310

while mobj:

1311

mobj = mobj.groupdict()

1312

default = mobj['default'] if mobj['default'] is not None else default

1313

value = get_value(mobj)

1314

last_field, replacement = mobj['fields'], mobj['replacement']

1315

if value is None and mobj['alternate']:

1316

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

if None not in (value, replacement):

1321

try:

1322

value = replacement_formatter.format(replacement, value)

1323

except ValueError:

1324

value, default = None, na

1325

1326

fmt = outer_mobj.group('format')

1327

if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):

1328

fmt = f'0{field_size_compat_map[last_field]:d}d'

1329

1330

flags = outer_mobj.group('conversion') or ''

1331

str_fmt = f'{fmt[:-1]}s'

1332

if value is None:

1333

value, fmt = default, 's'

1334

elif fmt[-1] == 'l': # list

1335

delim = '\n' if '#' in flags else ', '

1336

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1337

elif fmt[-1] == 'j': # json

1338

value, fmt = json.dumps(

1339

value, default=_dumpjson_default,

1340

indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt

1341

elif fmt[-1] == 'h': # html

1342

value, fmt = escapeHTML(str(value)), str_fmt

1343

elif fmt[-1] == 'q': # quoted

1344

value = map(str, variadic(value) if '#' in flags else [value])

1345

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1346

elif fmt[-1] == 'B': # bytes

1347

value = f'%{str_fmt}'.encode() % str(value).encode()

1348

value, fmt = value.decode('utf-8', 'ignore'), 's'

1349

elif fmt[-1] == 'U': # unicode normalized

1350

value, fmt = unicodedata.normalize(

1351

# "+" = compatibility equivalence, "#" = NFD

1352

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1353

value), str_fmt

1354

elif fmt[-1] == 'D': # decimal suffix

1355

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1356

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1357

factor=1024 if '#' in flags else 1000)

1358

elif fmt[-1] == 'S': # filename sanitization

1359

value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt

1360

elif fmt[-1] == 'c':

1361

if value:

1362

value = str(value)[0]

1363

else:

1364

fmt = str_fmt

1365

elif fmt[-1] not in 'rsa': # numeric

1366

value = float_or_none(value)

1367

if value is None:

1368

value, fmt = default, 's'

1369

1370

if sanitize:

1371

# If value is an object, sanitize might convert it to a string

1372

# So we convert it to repr first

1373

if fmt[-1] == 'r':

1374

value, fmt = repr(value), str_fmt

1375

elif fmt[-1] == 'a':

1376

value, fmt = ascii(value), str_fmt

1377

if fmt[-1] in 'csra':

1378

value = sanitizer(last_field, value)

1379

1380

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1381

TMPL_DICT[key] = value

1382

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1383

1384

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1385

1386

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1387

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1388

return self.escape_outtmpl(outtmpl) % info_dict

1389

1390

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1391

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1392

if outtmpl is None:

1393

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1394

try:

1395

outtmpl = self._outtmpl_expandpath(outtmpl)

1396

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1401

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1402

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1403

filename = replace_extension(filename, ext, final_ext)

1404

elif tmpl_type:

1405

force_ext = OUTTMPL_TYPES[tmpl_type]

1406

if force_ext:

1407

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1408

1409

# https://github.com/blackjack4494/youtube-dlc/issues/85

1410

trim_file_name = self.params.get('trim_file_name', False)

1411

if trim_file_name:

1412

no_ext, *ext = filename.rsplit('.', 2)

1413

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1414

1415

return filename

1416

except ValueError as err:

1417

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1418

return None

1419

1420

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1421

"""Generate the output filename"""

1422

if outtmpl:

1423

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1424

dir_type = None

1425

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1426

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1431

pass

1432

elif filename == '-':

1433

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1434

elif os.path.isabs(filename):

1435

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1436

if filename == '-' or not filename:

1437

return filename

1438

1439

return self.get_output_path(dir_type, filename)

1440

1441

def _match_entry(self, info_dict, incomplete=False, silent=False):

1442

"""Returns None if the file should be downloaded"""

1443

_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')

1444

assert incomplete or _type == 'video', 'Only video result can be considered complete'

1445

1446

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1447

1448

def check_filter():

1449

if _type in ('playlist', 'multi_video'):

1450

return

1451

elif _type in ('url', 'url_transparent') and not try_call(

1452

lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):

1453

return

1454

1455

if 'title' in info_dict:

1456

# This can happen when we're just evaluating the playlist

1457

title = info_dict['title']

1458

matchtitle = self.params.get('matchtitle', False)

1459

if matchtitle:

1460

if not re.search(matchtitle, title, re.IGNORECASE):

1461

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1462

rejecttitle = self.params.get('rejecttitle', False)

1463

if rejecttitle:

1464

if re.search(rejecttitle, title, re.IGNORECASE):

1465

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1466

1467

date = info_dict.get('upload_date')

1468

if date is not None:

1469

dateRange = self.params.get('daterange', DateRange())

1470

if date not in dateRange:

1471

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1472

view_count = info_dict.get('view_count')

1473

if view_count is not None:

1474

min_views = self.params.get('min_views')

1475

if min_views is not None and view_count < min_views:

1476

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1477

max_views = self.params.get('max_views')

1478

if max_views is not None and view_count > max_views:

1479

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1480

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1481

return 'Skipping "%s" because it is age restricted' % video_title

1482

1483

match_filter = self.params.get('match_filter')

1484

if match_filter is None:

return None

cancelled = None

try:

try:

ret = match_filter(info_dict, incomplete=incomplete)

1491

except TypeError:

1492

# For backward compatibility

1493

ret = None if incomplete else match_filter(info_dict)

1494

except DownloadCancelled as err:

1495

if err.msg is not NO_DEFAULT:

1496

raise

1497

ret, cancelled = err.msg, err

1498

1499

if ret is NO_DEFAULT:

1500

while True:

1501

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1502

reply = input(self._format_screen(

1503

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1504

if reply in {'y', ''}:

return None

elif reply == 'n':

if cancelled:

raise type(cancelled)(f'Skipping {video_title}')

1509

return f'Skipping {video_title}'

1510

return ret

1511

1512

if self.in_download_archive(info_dict):

1513

reason = ''.join((

1514

format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),

1515

format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),

1516

'has already been recorded in the archive'))

1517

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1518

else:

1519

try:

1520

reason = check_filter()

1521

except DownloadCancelled as e:

1522

reason, break_opt, break_err = e.msg, 'match_filter', type(e)

1523

else:

1524

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1525

if reason is not None:

1526

if not silent:

1527

self.to_screen('[download] ' + reason)

1528

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1534

'''Set the keys from extra_info in info dict if they are missing'''

1535

for key, value in extra_info.items():

1536

info_dict.setdefault(key, value)

1537

1538

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1539

process=True, force_generic_extractor=False):

1540

"""

1541

Extract and return the information dictionary of the URL

1542

1543

Arguments:

1544

@param url URL to extract

1545

1546

Keyword arguments:

1547

@param download Whether to download videos

1548

@param process Whether to resolve all unresolved references (URLs, playlist items).

1549

Must be True for download to work

1550

@param ie_key Use only the extractor with this key

1551

1552

@param extra_info Dictionary containing the extra values to add to the info (For internal use only)

1553

@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')

1554

"""

1555

1556

if extra_info is None:

1557

extra_info = {}

1558

1559

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}

else:

ies = self._ies

for key, ie in ies.items():

1568

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1573

'and will probably not work.')

1574

1575

temp_id = ie.get_temp_id(url)

1576

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):

1577

self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '

1578

'has already been recorded in the archive')

1579

if self.params.get('break_on_existing', False):

1580

raise ExistingVideoReached()

1581

break

1582

return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)

1583

else:

1584

extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

1585

self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',

1586

tb=False if extractors_restricted else None)

1587

1588

def _handle_extraction_exceptions(func):

1589

@functools.wraps(func)

1590

def wrapper(self, *args, **kwargs):

1591

while True:

1592

try:

1593

return func(self, *args, **kwargs)

1594

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1595

raise

1596

except ReExtractInfo as e:

1597

if e.expected:

1598

self.to_screen(f'{e}; Re-extracting data')

1599

else:

1600

self.to_stderr('\r')

1601

self.report_warning(f'{e}; Re-extracting data')

1602

continue

1603

except GeoRestrictedError as e:

1604

msg = e.msg

1605

if e.countries:

1606

msg += '\nThis video is available in %s.' % ', '.join(

1607

map(ISO3166Utils.short2full, e.countries))

1608

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1609

self.report_error(msg)

1610

except ExtractorError as e: # An error we somewhat expected

1611

self.report_error(str(e), e.format_traceback())

1612

except Exception as e:

1613

if self.params.get('ignoreerrors'):

1614

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1621

if (not self.params.get('wait_for_video')

1622

or ie_result.get('_type', 'video') != 'video'

1623

or ie_result.get('formats') or ie_result.get('url')):

1624

return

1625

1626

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1632

if not self.params.get('noprogress'):

1633

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1634

elif last_msg:

1635

return

1636

self.to_screen(full_msg, skip_eol=True)

1637

last_msg = msg

1638

1639

min_wait, max_wait = self.params.get('wait_for_video')

1640

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1641

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1642

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1643

self.report_warning('Release time of video is not known')

1644

elif ie_result and (diff or 0) <= 0:

1645

self.report_warning('Video should already be available according to extracted info')

1646

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1647

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1648

1649

wait_till = time.time() + diff

1650

try:

1651

while True:

1652

diff = wait_till - time.time()

1653

if diff <= 0:

1654

progress('')

1655

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1656

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1657

time.sleep(1)

1658

except KeyboardInterrupt:

1659

progress('')

1660

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1661

except BaseException as e:

1662

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

def _load_cookies(self, data, *, autoscope=True):

1667

"""Loads cookies from a `Cookie` header

1668

1669

This tries to work around the security vulnerability of passing cookies to every domain.

1670

See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

1671

1672

@param data The Cookie header as string to load the cookies from

1673

@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains

1674

If `True`, save cookies for later to be stored in the jar with a limited scope

1675

If a URL, save cookies in the jar with the domain of the URL

1676

"""

1677

for cookie in LenientSimpleCookie(data).values():

1678

if autoscope and any(cookie.values()):

1679

raise ValueError('Invalid syntax in Cookie Header')

1680

1681

domain = cookie.get('domain') or ''

1682

expiry = cookie.get('expires')

1683

if expiry == '': # 0 is valid

1684

expiry = None

1685

prepared_cookie = http.cookiejar.Cookie(

1686

cookie.get('version') or 0, cookie.key, cookie.value, None, False,

1687

domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),

1688

cookie.get('secure') or False, expiry, False, None, None, {})

1689

1690

if domain:

1691

self.cookiejar.set_cookie(prepared_cookie)

1692

elif autoscope is True:

1693

self.deprecated_feature(

1694

'Passing cookies as a header is a potential security risk; '

1695

'they will be scoped to the domain of the downloaded urls. '

1696

'Please consider loading cookies from a file or browser instead.')

1697

self.__header_cookies.append(prepared_cookie)

1698

elif autoscope:

1699

self.report_warning(

1700

'The extractor result contains an unscoped cookie as an HTTP header. '

1701

f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',

1702

only_once=True)

1703

self._apply_header_cookies(autoscope, [prepared_cookie])

1704

else:

1705

self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',

1706

tb=False, is_error=False)

1707

1708

def _apply_header_cookies(self, url, cookies=None):

1709

"""Applies stray header cookies to the provided url

1710

1711

This loads header cookies and scopes them to the domain provided in `url`.

1712

While this is not ideal, it helps reduce the risk of them being sent

1713

to an unintended destination while mostly maintaining compatibility.

1714

"""

1715

parsed = urllib.parse.urlparse(url)

1716

if not parsed.hostname:

1717

return

1718

1719

for cookie in map(copy.copy, cookies or self.__header_cookies):

1720

cookie.domain = f'.{parsed.hostname}'

1721

self.cookiejar.set_cookie(cookie)

1722

1723

@_handle_extraction_exceptions

1724

def __extract_info(self, url, ie, download, extra_info, process):

1725

self._apply_header_cookies(url)

1726

1727

try:

1728

ie_result = ie.extract(url)

1729

except UserNotLive as e:

1730

if process:

1731

if self.params.get('wait_for_video'):

1732

self.report_warning(e)

1733

self._wait_for_video()

1734

raise

1735

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1736

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1737

return

1738

if isinstance(ie_result, list):

1739

# Backwards compatibility: old IE result format

1740

ie_result = {

1741

'_type': 'compat_list',

1742

'entries': ie_result,

1743

}

1744

if extra_info.get('original_url'):

1745

ie_result.setdefault('original_url', extra_info['original_url'])

1746

self.add_default_extra_info(ie_result, ie, url)

1747

if process:

1748

self._wait_for_video(ie_result)

1749

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1754

if url is not None:

1755

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1760

if webpage_url:

1761

self.add_extra_info(ie_result, {

1762

'webpage_url_basename': url_basename(webpage_url),

1763

'webpage_url_domain': get_domain(webpage_url),

1764

})

1765

if ie is not None:

1766

self.add_extra_info(ie_result, {

1767

'extractor': ie.IE_NAME,

1768

'extractor_key': ie.ie_key(),

1769

})

1770

1771

def process_ie_result(self, ie_result, download=True, extra_info=None):

1772

"""

1773

Take the result of the ie(may be modified) and resolve all unresolved

1774

references (URLs, playlist items).

1775

1776

It will also download the videos if 'download'.

1777

Returns the resolved ie_result.

1778

"""

1779

if extra_info is None:

1780

extra_info = {}

1781

result_type = ie_result.get('_type', 'video')

1782

1783

if result_type in ('url', 'url_transparent'):

1784

ie_result['url'] = sanitize_url(

1785

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1786

if ie_result.get('original_url') and not extra_info.get('original_url'):

1787

extra_info = {'original_url': ie_result['original_url'], **extra_info}

1788

1789

extract_flat = self.params.get('extract_flat', False)

1790

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1791

or extract_flat is True):

1792

info_copy = ie_result.copy()

1793

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1794

if ie and not ie_result.get('id'):

1795

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1796

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1797

self.add_extra_info(info_copy, extra_info)

1798

info_copy, _ = self.pre_process(info_copy)

1799

self._fill_common_fields(info_copy, False)

1800

self.__forced_printings(info_copy)

1801

self._raise_pending_errors(info_copy)

1802

if self.params.get('force_write_download_archive', False):

1803

self.record_download_archive(info_copy)

1804

return ie_result

1805

1806

if result_type == 'video':

1807

self.add_extra_info(ie_result, extra_info)

1808

ie_result = self.process_video_result(ie_result, download=download)

1809

self._raise_pending_errors(ie_result)

1810

additional_urls = (ie_result or {}).get('additional_urls')

1811

if additional_urls:

1812

# TODO: Improve MetadataParserPP to allow setting a list

1813

if isinstance(additional_urls, str):

1814

additional_urls = [additional_urls]

1815

self.to_screen(

1816

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1817

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1818

ie_result['additional_entries'] = [

1819

self.extract_info(

1820

url, download, extra_info=extra_info,

1821

force_generic_extractor=self.params.get('force_generic_extractor'))

1822

for url in additional_urls

1823

]

1824

return ie_result

1825

elif result_type == 'url':

1826

# We have to add extra_info to the results because it may be

1827

# contained in a playlist

1828

return self.extract_info(

1829

ie_result['url'], download,

1830

ie_key=ie_result.get('ie_key'),

1831

extra_info=extra_info)

1832

elif result_type == 'url_transparent':

1833

# Use the information from the embedding page

1834

info = self.extract_info(

1835

ie_result['url'], ie_key=ie_result.get('ie_key'),

1836

extra_info=extra_info, download=False, process=False)

1837

1838

# extract_info may return None when ignoreerrors is enabled and

1839

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1845

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1846

# For video clips, the id etc of the clip extractor should be used

1847

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1848

1849

new_result = info.copy()

1850

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1851

1852

# Extracted info may not be a video result (i.e.

1853

# info.get('_type', 'video') != video) but rather an url or

1854

# url_transparent. In such cases outer metadata (from ie_result)

1855

# should be propagated to inner one (info). For this to happen

1856

# _type of info should be overridden with url_transparent. This

1857

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1858

if new_result.get('_type') == 'url':

1859

new_result['_type'] = 'url_transparent'

1860

1861

return self.process_ie_result(

1862

new_result, download=download, extra_info=extra_info)

1863

elif result_type in ('playlist', 'multi_video'):

1864

# Protect from infinite recursion due to recursively nested playlists

1865

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1866

webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url

1867

if webpage_url and webpage_url in self._playlist_urls:

1868

self.to_screen(

1869

'[download] Skipping already downloaded playlist: %s'

1870

% ie_result.get('title') or ie_result.get('id'))

1871

return

1872

1873

self._playlist_level += 1

1874

self._playlist_urls.add(webpage_url)

1875

self._fill_common_fields(ie_result, False)

1876

self._sanitize_thumbnails(ie_result)

1877

try:

1878

return self.__process_playlist(ie_result, download)

1879

finally:

1880

self._playlist_level -= 1

1881

if not self._playlist_level:

1882

self._playlist_urls.clear()

1883

elif result_type == 'compat_list':

1884

self.report_warning(

1885

'Extractor %s returned a compat_list result. '

1886

'It needs to be updated.' % ie_result.get('extractor'))

1887

1888

def _fixup(r):

1889

self.add_extra_info(r, {

1890

'extractor': ie_result['extractor'],

1891

'webpage_url': ie_result['webpage_url'],

1892

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1893

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1894

'extractor_key': ie_result['extractor_key'],

1895

})

1896

return r

1897

ie_result['entries'] = [

1898

self.process_ie_result(_fixup(r), download, extra_info)

1899

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1904

1905

def _ensure_dir_exists(self, path):

1906

return make_dir(path, self.report_error)

1907

1908

@staticmethod

1909

def _playlist_infodict(ie_result, strict=False, **kwargs):

1910

info = {

1911

'playlist_count': ie_result.get('playlist_count'),

1912

'playlist': ie_result.get('title') or ie_result.get('id'),

1913

'playlist_id': ie_result.get('id'),

1914

'playlist_title': ie_result.get('title'),

1915

'playlist_uploader': ie_result.get('uploader'),

1916

'playlist_uploader_id': ie_result.get('uploader_id'),

**kwargs,

}

if strict:

return info

if ie_result.get('webpage_url'):

1922

info.update({

1923

'webpage_url': ie_result['webpage_url'],

1924

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1925

'webpage_url_domain': get_domain(ie_result['webpage_url']),

})

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),

1931

'extractor': ie_result['extractor'],

1932

'extractor_key': ie_result['extractor_key'],

1933

}

1934

1935

def __process_playlist(self, ie_result, download):

1936

"""Process each entry in the playlist"""

1937

assert ie_result['_type'] in ('playlist', 'multi_video')

1938

1939

common_info = self._playlist_infodict(ie_result, strict=True)

1940

title = common_info.get('playlist') or '<Untitled>'

1941

if self._match_entry(common_info, incomplete=True) is not None:

1942

return

1943

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1944

1945

all_entries = PlaylistEntries(self, ie_result)

1946

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1947

1948

lazy = self.params.get('lazy_playlist')

1949

if lazy:

1950

resolved_entries, n_entries = [], 'N/A'

1951

ie_result['requested_entries'], ie_result['entries'] = None, None

1952

else:

1953

entries = resolved_entries = list(entries)

1954

n_entries = len(resolved_entries)

1955

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1956

if not ie_result.get('playlist_count'):

1957

# Better to do this after potentially exhausting entries

1958

ie_result['playlist_count'] = all_entries.get_full_count()

1959

1960

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1961

ie_copy = collections.ChainMap(ie_result, extra)

1962

1963

_infojson_written = False

1964

write_playlist_files = self.params.get('allow_playlist_files', True)

1965

if write_playlist_files and self.params.get('list_thumbnails'):

1966

self.list_thumbnails(ie_result)

1967

if write_playlist_files and not self.params.get('simulate'):

1968

_infojson_written = self._write_info_json(

1969

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1970

if _infojson_written is None:

1971

return

1972

if self._write_description('playlist', ie_result,

1973

self.prepare_filename(ie_copy, 'pl_description')) is None:

1974

return

1975

# TODO: This should be passed to ThumbnailsConvertor if necessary

1976

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1977

1978

if lazy:

1979

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

1980

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

1981

elif self.params.get('playlistreverse'):

1982

entries.reverse()

1983

elif self.params.get('playlistrandom'):

1984

random.shuffle(entries)

1985

1986

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'

1987

f'{format_field(ie_result, "playlist_count", " of %s")}')

1988

1989

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

1990

if self.params.get('extract_flat') == 'discard_in_playlist':

1991

keep_resolved_entries = ie_result['_type'] != 'playlist'

1992

if keep_resolved_entries:

1993

self.write_debug('The information of all playlist entries will be held in memory')

1994

1995

failures = 0

1996

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1997

for i, (playlist_index, entry) in enumerate(entries):

1998

if lazy:

1999

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

2004

if not lazy and 'playlist-index' in self.params['compat_opts']:

2005

playlist_index = ie_result['requested_entries'][i]

2006

2007

entry_copy = collections.ChainMap(entry, {

2008

**common_info,

2009

'n_entries': int_or_none(n_entries),

2010

'playlist_index': playlist_index,

2011

'playlist_autonumber': i + 1,

2012

})

2013

2014

if self._match_entry(entry_copy, incomplete=True) is not None:

2015

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

2016

resolved_entries[i] = (playlist_index, NO_DEFAULT)

2017

continue

2018

2019

self.to_screen('[download] Downloading item %s of %s' % (

2020

self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

2021

2022

entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({

2023

'playlist_index': playlist_index,

2024

'playlist_autonumber': i + 1,

}, extra))

if not entry_result:

failures += 1

if failures >= max_failures:

2029

self.report_error(

2030

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

2031

break

2032

if keep_resolved_entries:

2033

resolved_entries[i] = (playlist_index, entry_result)

2034

2035

# Update with processed data

2036

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

2037

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

2038

if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):

2039

# Do not set for full playlist

2040

ie_result.pop('requested_entries')

2041

2042

# Write the updated info to json

2043

if _infojson_written is True and self._write_info_json(

2044

'updated playlist', ie_result,

2045

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

2046

return

2047

2048

ie_result = self.run_all_pps('playlist', ie_result)

2049

self.to_screen(f'[download] Finished downloading playlist: {title}')

2050

return ie_result

2051

2052

@_handle_extraction_exceptions

2053

def __process_iterable_entry(self, entry, download, extra_info):

2054

return self.process_ie_result(

2055

entry, download=download, extra_info=extra_info)

2056

2057

def _build_format_filter(self, filter_spec):

2058

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

2069

(?P<key>[\w.-]+)\s*

2070

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

2071

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

2072

''' % '|'.join(map(re.escape, OPERATORS.keys())))

2073

m = operator_rex.fullmatch(filter_spec)

2074

if m:

2075

try:

2076

comparison_value = int(m.group('value'))

2077

except ValueError:

2078

comparison_value = parse_filesize(m.group('value'))

2079

if comparison_value is None:

2080

comparison_value = parse_filesize(m.group('value') + 'B')

2081

if comparison_value is None:

2082

raise ValueError(

2083

'Invalid value %r in format specification %r' % (

2084

m.group('value'), filter_spec))

2085

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

2091

'$=': lambda attr, value: attr.endswith(value),

2092

'*=': lambda attr, value: value in attr,

2093

'~=': lambda attr, value: value.search(attr) is not None

2094

}

2095

str_operator_rex = re.compile(r'''(?x)\s*

2096

(?P<key>[a-zA-Z0-9._-]+)\s*

2097

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

2098

(?P<quote>["'])?

2099

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

2100

(?(quote)(?P=quote))\s*

2101

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

2102

m = str_operator_rex.fullmatch(filter_spec)

2103

if m:

2104

if m.group('op') == '~=':

2105

comparison_value = re.compile(m.group('value'))

2106

else:

2107

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

2108

str_op = STR_OPERATORS[m.group('op')]

2109

if m.group('negation'):

2110

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

2116

2117

def _filter(f):

2118

actual_value = f.get(m.group('key'))

2119

if actual_value is None:

2120

return m.group('none_inclusive')

2121

return op(actual_value, comparison_value)

2122

return _filter

2123

2124

def _check_formats(self, formats):

2125

for f in formats:

2126

self.to_screen('[info] Testing format %s' % f['format_id'])

2127

path = self.get_output_path('temp')

2128

if not self._ensure_dir_exists(f'{path}/'):

2129

continue

2130

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

2131

temp_file.close()

2132

try:

2133

success, _ = self.dl(temp_file.name, f, test=True)

2134

except (DownloadError, OSError, ValueError) + network_exceptions:

2135

success = False

2136

finally:

2137

if os.path.exists(temp_file.name):

2138

try:

2139

os.remove(temp_file.name)

2140

except OSError:

2141

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

2146

2147

def _default_format_spec(self, info_dict, download=True):

2148

2149

def can_merge():

2150

merger = FFmpegMergerPP(self)

2151

return merger.available and merger.can_merge()

2152

2153

prefer_best = (

2154

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

2159

or self.params['outtmpl']['default'] == '-'))

2160

compat = (

2161

prefer_best

2162

or self.params.get('allow_multiple_audio_streams', False)

2163

or 'format-spec' in self.params['compat_opts'])

2164

2165

return (

2166

'best/bestvideo+bestaudio' if prefer_best

2167

else 'bestvideo*+bestaudio/best' if not compat

2168

else 'bestvideo+bestaudio/best')

2169

2170

def build_format_selector(self, format_spec):

2171

def syntax_error(note, start):

2172

message = (

2173

'Invalid format specification: '

2174

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

2175

return SyntaxError(message)

2176

2177

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

2182

2183

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

2184

'video': self.params.get('allow_multiple_video_streams', False)}

2185

2186

def _parse_filter(tokens):

2187

filter_parts = []

2188

for type, string_, start, _, _ in tokens:

2189

if type == tokenize.OP and string_ == ']':

2190

return ''.join(filter_parts)

2191

else:

2192

filter_parts.append(string_)

2193

2194

def _remove_unused_ops(tokens):

2195

# Remove operators that we don't use and join them with the surrounding strings.

2196

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

2197

ALLOWED_OPS = ('/', '+', ',', '(', ')')

2198

last_string, last_start, last_end, last_line = None, None, None, None

2199

for type, string_, start, end, line in tokens:

2200

if type == tokenize.OP and string_ == '[':

2201

if last_string:

2202

yield tokenize.NAME, last_string, last_start, last_end, last_line

2203

last_string = None

2204

yield type, string_, start, end, line

2205

# everything inside brackets will be handled by _parse_filter

2206

for type, string_, start, end, line in tokens:

2207

yield type, string_, start, end, line

2208

if type == tokenize.OP and string_ == ']':

2209

break

2210

elif type == tokenize.OP and string_ in ALLOWED_OPS:

2211

if last_string:

2212

yield tokenize.NAME, last_string, last_start, last_end, last_line

2213

last_string = None

2214

yield type, string_, start, end, line

2215

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

2216

if not last_string:

2217

last_string = string_

last_start = start

last_end = end

else:

last_string += string_

2222

if last_string:

2223

yield tokenize.NAME, last_string, last_start, last_end, last_line

2224

2225

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2226

selectors = []

2227

current_selector = None

2228

for type, string_, start, _, _ in tokens:

2229

# ENCODING is only defined in python 3.x

2230

if type == getattr(tokenize, 'ENCODING', None):

2231

continue

2232

elif type in [tokenize.NAME, tokenize.NUMBER]:

2233

current_selector = FormatSelector(SINGLE, string_, [])

2234

elif type == tokenize.OP:

2235

if string_ == ')':

2236

if not inside_group:

2237

# ')' will be handled by the parentheses group

2238

tokens.restore_last_token()

2239

break

2240

elif inside_merge and string_ in ['/', ',']:

2241

tokens.restore_last_token()

2242

break

2243

elif inside_choice and string_ == ',':

2244

tokens.restore_last_token()

2245

break

2246

elif string_ == ',':

2247

if not current_selector:

2248

raise syntax_error('"," must follow a format selector', start)

2249

selectors.append(current_selector)

2250

current_selector = None

2251

elif string_ == '/':

2252

if not current_selector:

2253

raise syntax_error('"/" must follow a format selector', start)

2254

first_choice = current_selector

2255

second_choice = _parse_format_selection(tokens, inside_choice=True)

2256

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2257

elif string_ == '[':

2258

if not current_selector:

2259

current_selector = FormatSelector(SINGLE, 'best', [])

2260

format_filter = _parse_filter(tokens)

2261

current_selector.filters.append(format_filter)

2262

elif string_ == '(':

2263

if current_selector:

2264

raise syntax_error('Unexpected "("', start)

2265

group = _parse_format_selection(tokens, inside_group=True)

2266

current_selector = FormatSelector(GROUP, group, [])

2267

elif string_ == '+':

2268

if not current_selector:

2269

raise syntax_error('Unexpected "+"', start)

2270

selector_1 = current_selector

2271

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2272

if not selector_2:

2273

raise syntax_error('Expected a selector', start)

2274

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2275

else:

2276

raise syntax_error(f'Operator not recognized: "{string_}"', start)

2277

elif type == tokenize.ENDMARKER:

2278

break

2279

if current_selector:

2280

selectors.append(current_selector)

2281

return selectors

2282

2283

def _merge(formats_pair):

2284

format_1, format_2 = formats_pair

2285

2286

formats_info = []

2287

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2288

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2289

2290

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2291

get_no_more = {'video': False, 'audio': False}

2292

for (i, fmt_info) in enumerate(formats_info):

2293

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2294

formats_info.pop(i)

2295

continue

2296

for aud_vid in ['audio', 'video']:

2297

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2298

if get_no_more[aud_vid]:

2299

formats_info.pop(i)

2300

break

2301

get_no_more[aud_vid] = True

2302

2303

if len(formats_info) == 1:

2304

return formats_info[0]

2305

2306

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2307

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2308

2309

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2310

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2311

2312

output_ext = get_compatible_ext(

2313

vcodecs=[f.get('vcodec') for f in video_fmts],

2314

acodecs=[f.get('acodec') for f in audio_fmts],

2315

vexts=[f['ext'] for f in video_fmts],

2316

aexts=[f['ext'] for f in audio_fmts],

2317

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2318

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2319

2320

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2321

2322

new_dict = {

2323

'requested_formats': formats_info,

2324

'format': '+'.join(filtered('format')),

2325

'format_id': '+'.join(filtered('format_id')),

2326

'ext': output_ext,

2327

'protocol': '+'.join(map(determine_protocol, formats_info)),

2328

'language': '+'.join(orderedSet(filtered('language'))) or None,

2329

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2330

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2331

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2337

'height': the_only_video.get('height'),

2338

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2339

'fps': the_only_video.get('fps'),

2340

'dynamic_range': the_only_video.get('dynamic_range'),

2341

'vcodec': the_only_video.get('vcodec'),

2342

'vbr': the_only_video.get('vbr'),

2343

'stretched_ratio': the_only_video.get('stretched_ratio'),

2344

'aspect_ratio': the_only_video.get('aspect_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2350

'abr': the_only_audio.get('abr'),

2351

'asr': the_only_audio.get('asr'),

2352

'audio_channels': the_only_audio.get('audio_channels')

})

return new_dict

def _check_formats(formats):

2358

if self.params.get('check_formats') == 'selected':

2359

yield from self._check_formats(formats)

2360

return

2361

elif (self.params.get('check_formats') is not None

2362

or self.params.get('allow_unplayable_formats')):

yield from formats

return

for f in formats:

if f.get('has_drm') or f.get('__needs_testing'):

2368

yield from self._check_formats([f])

else:

yield f

def _build_selector_function(selector):

2373

if isinstance(selector, list): # ,

2374

fs = [_build_selector_function(s) for s in selector]

2375

2376

def selector_function(ctx):

2377

for f in fs:

2378

yield from f(ctx)

2379

return selector_function

2380

2381

elif selector.type == GROUP: # ()

2382

selector_function = _build_selector_function(selector.selector)

2383

2384

elif selector.type == PICKFIRST: # /

2385

fs = [_build_selector_function(s) for s in selector.selector]

2386

2387

def selector_function(ctx):

2388

for f in fs:

2389

picked_formats = list(f(ctx))

2390

if picked_formats:

2391

return picked_formats

2392

return []

2393

2394

elif selector.type == MERGE: # +

2395

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2396

2397

def selector_function(ctx):

2398

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2399

yield _merge(pair)

2400

2401

elif selector.type == SINGLE: # atom

2402

format_spec = selector.selector or 'best'

2403

2404

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2405

if format_spec == 'all':

2406

def selector_function(ctx):

2407

yield from _check_formats(ctx['formats'][::-1])

2408

elif format_spec == 'mergeall':

2409

def selector_function(ctx):

2410

formats = list(_check_formats(

2411

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2412

if not formats:

2413

return

2414

merged_format = formats[-1]

2415

for f in formats[-2::-1]:

2416

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2426

format_reverse = mobj.group('bw')[0] == 'b'

2427

format_type = (mobj.group('type') or [None])[0]

2428

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2429

format_modified = mobj.group('mod') is not None

2430

2431

format_fallback = not format_type and not format_modified # for b, w

2432

_filter_f = (

2433

(lambda f: f.get('%scodec' % format_type) != 'none')

2434

if format_type and format_modified # bv*, ba*, wv*, wa*

2435

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2436

if format_type # bv, ba, wv, wa

2437

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2438

if not format_modified # b, w

2439

else lambda f: True) # b*, w*

2440

filter_f = lambda f: _filter_f(f) and (

2441

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2442

else:

2443

if format_spec in self._format_selection_exts['audio']:

2444

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2445

elif format_spec in self._format_selection_exts['video']:

2446

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2447

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2448

elif format_spec in self._format_selection_exts['storyboards']:

2449

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2450

else:

2451

filter_f = lambda f: f.get('format_id') == format_spec # id

2452

2453

def selector_function(ctx):

2454

formats = list(ctx['formats'])

2455

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2456

if not matches:

2457

if format_fallback and ctx['incomplete_formats']:

2458

# for extractors with incomplete formats (audio only (soundcloud)

2459

# or video only (imgur)) best/worst will fallback to

2460

# best/worst {video,audio}-only format

2461

matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))

2462

elif seperate_fallback and not ctx['has_merged_format']:

2463

# for compatibility with youtube-dl when there is no pre-merged format

2464

matches = list(filter(seperate_fallback, formats))

2465

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2466

try:

2467

yield matches[format_idx - 1]

2468

except LazyList.IndexError:

2469

return

2470

2471

filters = [self._build_format_filter(f) for f in selector.filters]

2472

2473

def final_selector(ctx):

2474

ctx_copy = dict(ctx)

2475

for _filter in filters:

2476

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2477

return selector_function(ctx_copy)

2478

return final_selector

2479

2480

# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid

2481

# Prefix numbers with random letters to avoid it being classified as a number

2482

# See: https://github.com/yt-dlp/yt-dlp/pulls/8797

2483

# TODO: Implement parser not reliant on tokenize.tokenize

2484

prefix = ''.join(random.choices(string.ascii_letters, k=32))

2485

stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())

2486

try:

2487

tokens = list(_remove_unused_ops(

2488

token._replace(string=token.string.replace(prefix, ''))

2489

for token in tokenize.tokenize(stream.readline)))

2490

except tokenize.TokenError:

2491

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2492

2493

class TokenIterator:

2494

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2503

raise StopIteration()

2504

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2511

self.counter -= 1

2512

2513

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2514

return _build_selector_function(parsed_selector)

2515

2516

def _calc_headers(self, info_dict, load_cookies=False):

2517

res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))

2518

clean_headers(res)

2519

2520

if load_cookies: # For --load-info-json

2521

self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat

2522

self._load_cookies(info_dict.get('cookies'), autoscope=False)

2523

# The `Cookie` header is removed to prevent leaks and unscoped cookies.

2524

# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

2525

res.pop('Cookie', None)

2526

cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])

2527

if cookies:

2528

encoder = LenientSimpleCookie()

2529

values = []

2530

for cookie in cookies:

2531

_, value = encoder.value_encode(cookie.value)

2532

values.append(f'{cookie.name}={value}')

2533

if cookie.domain:

2534

values.append(f'Domain={cookie.domain}')

2535

if cookie.path:

2536

values.append(f'Path={cookie.path}')

2537

if cookie.secure:

2538

values.append('Secure')

2539

if cookie.expires:

2540

values.append(f'Expires={cookie.expires}')

2541

if cookie.version:

2542

values.append(f'Version={cookie.version}')

2543

info_dict['cookies'] = '; '.join(values)

2544

2545

if 'X-Forwarded-For' not in res:

2546

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2547

if x_forwarded_for_ip:

2548

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2553

self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')

2554

return self.cookiejar.get_cookie_header(url)

2555

2556

def _sort_thumbnails(self, thumbnails):

2557

thumbnails.sort(key=lambda t: (

2558

t.get('preference') if t.get('preference') is not None else -1,

2559

t.get('width') if t.get('width') is not None else -1,

2560

t.get('height') if t.get('height') is not None else -1,

2561

t.get('id') if t.get('id') is not None else '',

2562

t.get('url')))

2563

2564

def _sanitize_thumbnails(self, info_dict):

2565

thumbnails = info_dict.get('thumbnails')

2566

if thumbnails is None:

2567

thumbnail = info_dict.get('thumbnail')

2568

if thumbnail:

2569

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2574

for t in thumbnails:

2575

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2576

try:

2577

self.urlopen(HEADRequest(t['url']))

2578

except network_exceptions as err:

2579

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2584

for i, t in enumerate(thumbnails):

2585

if t.get('id') is None:

2586

t['id'] = '%d' % i

2587

if t.get('width') and t.get('height'):

2588

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2589

t['url'] = sanitize_url(t['url'])

2590

2591

if self.params.get('check_formats') is True:

2592

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2593

else:

2594

info_dict['thumbnails'] = thumbnails

2595

2596

def _fill_common_fields(self, info_dict, final=True):

2597

# TODO: move sanitization here

2598

if final:

2599

title = info_dict['fulltitle'] = info_dict.get('title')

2600

if not title:

2601

if title == '':

2602

self.write_debug('Extractor gave empty title. Creating a generic title')

2603

else:

2604

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2605

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2606

2607

if info_dict.get('duration') is not None:

2608

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2609

2610

for ts_key, date_key in (

2611

('timestamp', 'upload_date'),

2612

('release_timestamp', 'release_date'),

2613

('modified_timestamp', 'modified_date'),

2614

):

2615

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2616

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2617

# see http://bugs.python.org/issue1646728)

2618

with contextlib.suppress(ValueError, OverflowError, OSError):

2619

upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)

2620

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2621

2622

if not info_dict.get('release_year'):

2623

info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))

2624

2625

live_keys = ('is_live', 'was_live')

2626

live_status = info_dict.get('live_status')

2627

if live_status is None:

2628

for key in live_keys:

2629

if info_dict.get(key) is False:

2630

continue

2631

if info_dict.get(key):

2632

live_status = key

2633

break

2634

if all(info_dict.get(key) is False for key in live_keys):

2635

live_status = 'not_live'

2636

if live_status:

2637

info_dict['live_status'] = live_status

2638

for key in live_keys:

2639

if info_dict.get(key) is None:

2640

info_dict[key] = (live_status == key)

2641

if live_status == 'post_live':

2642

info_dict['was_live'] = True

2643

2644

# Auto generate title fields corresponding to the *_number fields when missing

2645

# in order to always have clean titles. This is very common for TV series.

2646

for field in ('chapter', 'season', 'episode'):

2647

if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2648

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2649

2650

for old_key, new_key in self._deprecated_multivalue_fields.items():

2651

if new_key in info_dict and old_key in info_dict:

2652

if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json

2653

self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')

2654

elif old_value := info_dict.get(old_key):

2655

info_dict[new_key] = old_value.split(', ')

2656

elif new_value := info_dict.get(new_key):

2657

info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)

2658

2659

def _raise_pending_errors(self, info):

2660

err = info.pop('__pending_error', None)

2661

if err:

2662

self.report_error(err, tb=False)

2663

2664

def sort_formats(self, info_dict):

2665

formats = self._get_formats(info_dict)

2666

formats.sort(key=FormatSorter(

2667

self, info_dict.get('_format_sort_fields') or []).calculate_preference)

2668

2669

def process_video_result(self, info_dict, download=True):

2670

assert info_dict.get('_type', 'video') == 'video'

2671

self._num_videos += 1

2672

2673

if 'id' not in info_dict:

2674

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2675

elif not info_dict.get('id'):

2676

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2677

2678

def report_force_conversion(field, field_not, conversion):

2679

self.report_warning(

2680

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2681

% (field, field_not, conversion))

2682

2683

def sanitize_string_field(info, string_field):

2684

field = info.get(string_field)

2685

if field is None or isinstance(field, str):

2686

return

2687

report_force_conversion(string_field, 'a string', 'string')

2688

info[string_field] = str(field)

2689

2690

def sanitize_numeric_fields(info):

2691

for numeric_field in self._NUMERIC_FIELDS:

2692

field = info.get(numeric_field)

2693

if field is None or isinstance(field, (int, float)):

2694

continue

2695

report_force_conversion(numeric_field, 'numeric', 'int')

2696

info[numeric_field] = int_or_none(field)

2697

2698

sanitize_string_field(info_dict, 'id')

2699

sanitize_numeric_fields(info_dict)

2700

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2701

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2702

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2703

self.report_warning('"duration" field is negative, there is an error in extractor')

2704

2705

chapters = info_dict.get('chapters') or []

2706

if chapters and chapters[0].get('start_time'):

2707

chapters.insert(0, {'start_time': 0})

2708

2709

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2710

for idx, (prev, current, next_) in enumerate(zip(

2711

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2712

if current.get('start_time') is None:

2713

current['start_time'] = prev.get('end_time')

2714

if not current.get('end_time'):

2715

current['end_time'] = next_.get('start_time')

2716

if not current.get('title'):

2717

current['title'] = f'<Untitled Chapter {idx}>'

2718

2719

if 'playlist' not in info_dict:

2720

# It isn't part of a playlist

2721

info_dict['playlist'] = None

2722

info_dict['playlist_index'] = None

2723

2724

self._sanitize_thumbnails(info_dict)

2725

2726

thumbnail = info_dict.get('thumbnail')

2727

thumbnails = info_dict.get('thumbnails')

2728

if thumbnail:

2729

info_dict['thumbnail'] = sanitize_url(thumbnail)

2730

elif thumbnails:

2731

info_dict['thumbnail'] = thumbnails[-1]['url']

2732

2733

if info_dict.get('display_id') is None and 'id' in info_dict:

2734

info_dict['display_id'] = info_dict['id']

2735

2736

self._fill_common_fields(info_dict)

2737

2738

for cc_kind in ('subtitles', 'automatic_captions'):

2739

cc = info_dict.get(cc_kind)

2740

if cc:

2741

for _, subtitle in cc.items():

2742

for subtitle_format in subtitle:

2743

if subtitle_format.get('url'):

2744

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2745

if subtitle_format.get('ext') is None:

2746

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2747

2748

automatic_captions = info_dict.get('automatic_captions')

2749

subtitles = info_dict.get('subtitles')

2750

2751

info_dict['requested_subtitles'] = self.process_subtitles(

2752

info_dict['id'], subtitles, automatic_captions)

2753

2754

formats = self._get_formats(info_dict)

2755

2756

# Backward compatibility with InfoExtractor._sort_formats

2757

field_preference = (formats or [{}])[0].pop('__sort_fields', None)

2758

if field_preference:

2759

info_dict['_format_sort_fields'] = field_preference

2760

2761

info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it

2762

f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None

2763

if not self.params.get('allow_unplayable_formats'):

2764

formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']

2765

2766

if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2767

self.report_warning(

2768

f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'

2769

'only images are available for download. Use --list-formats to see them'.capitalize())

2770

2771

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2772

if not get_from_start:

2773

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2774

if info_dict.get('is_live') and formats:

2775

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2776

if get_from_start and not formats:

2777

self.raise_no_formats(info_dict, msg=(

2778

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2779

'If you want to download from the current time, use --no-live-from-start'))

2780

2781

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2786

'there is an error in extractor')

2787

return False

2788

if isinstance(url, bytes):

2789

sanitize_string_field(f, 'url')

2790

return True

2791

2792

# Filter out malformed formats for better extraction robustness

2793

formats = list(filter(is_wellformed, formats or []))

2794

2795

if not formats:

2796

self.raise_no_formats(info_dict)

2797

2798

for format in formats:

2799

sanitize_string_field(format, 'format_id')

2800

sanitize_numeric_fields(format)

2801

format['url'] = sanitize_url(format['url'])

2802

if format.get('ext') is None:

2803

format['ext'] = determine_ext(format['url']).lower()

2804

if format.get('protocol') is None:

2805

format['protocol'] = determine_protocol(format)

2806

if format.get('resolution') is None:

2807

format['resolution'] = self.format_resolution(format, default=None)

2808

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2809

format['dynamic_range'] = 'SDR'

2810

if format.get('aspect_ratio') is None:

2811

format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))

2812

# For fragmented formats, "tbr" is often max bitrate and not average

2813

if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))

2814

and info_dict.get('duration') and format.get('tbr')

2815

and not format.get('filesize') and not format.get('filesize_approx')):

2816

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2817

format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)

2818

2819

# Safeguard against old/insecure infojson when using --load-info-json

2820

if info_dict.get('http_headers'):

2821

info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])

2822

info_dict['http_headers'].pop('Cookie', None)

2823

2824

# This is copied to http_headers by the above _calc_headers and can now be removed

2825

if '__x_forwarded_for_ip' in info_dict:

2826

del info_dict['__x_forwarded_for_ip']

self.sort_formats({

'formats': formats,

'_format_sort_fields': info_dict.get('_format_sort_fields')

2831

})

2832

2833

# Sanitize and group by format_id

2834

formats_dict = {}

2835

for i, format in enumerate(formats):

2836

if not format.get('format_id'):

2837

format['format_id'] = str(i)

2838

else:

2839

# Sanitize format_id from characters used in format selector expression

2840

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2841

formats_dict.setdefault(format['format_id'], []).append(format)

2842

2843

# Make sure all formats have unique format_id

2844

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2845

for format_id, ambiguous_formats in formats_dict.items():

2846

ambigious_id = len(ambiguous_formats) > 1

2847

for i, format in enumerate(ambiguous_formats):

2848

if ambigious_id:

2849

format['format_id'] = '%s-%d' % (format_id, i)

2850

# Ensure there is no conflict between id and ext in format selection

2851

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2852

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2853

format['format_id'] = 'f%s' % format['format_id']

2854

2855

if format.get('format') is None:

2856

format['format'] = '{id} - {res}{note}'.format(

2857

id=format['format_id'],

2858

res=self.format_resolution(format),

2859

note=format_field(format, 'format_note', ' (%s)'),

2860

)

2861

2862

if self.params.get('check_formats') is True:

2863

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2864

2865

if not formats or formats[0] is not info_dict:

2866

# only set the 'formats' fields if the original info_dict list them

2867

# otherwise we end up with a circular reference, the first (and unique)

2868

# element in the 'formats' field in info_dict is info_dict itself,

2869

# which can't be exported to json

2870

info_dict['formats'] = formats

2871

2872

info_dict, _ = self.pre_process(info_dict)

2873

2874

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2875

return info_dict

2876

2877

self.post_extract(info_dict)

2878

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2879

2880

# The pre-processors may have modified the formats

2881

formats = self._get_formats(info_dict)

2882

2883

list_only = self.params.get('simulate') == 'list_only'

2884

interactive_format_selection = not list_only and self.format_selector == '-'

2885

if self.params.get('list_thumbnails'):

2886

self.list_thumbnails(info_dict)

2887

if self.params.get('listsubtitles'):

2888

if 'automatic_captions' in info_dict:

2889

self.list_subtitles(

2890

info_dict['id'], automatic_captions, 'automatic captions')

2891

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2892

if self.params.get('listformats') or interactive_format_selection:

2893

self.list_formats(info_dict)

2894

if list_only:

2895

# Without this printing, -F --print-json will not work

2896

self.__forced_printings(info_dict)

2897

return info_dict

2898

2899

format_selector = self.format_selector

2900

while True:

2901

if interactive_format_selection:

2902

req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)

2903

+ '(Press ENTER for default, or Ctrl+C to quit)'

2904

+ self._format_screen(': ', self.Styles.EMPHASIS))

2905

try:

2906

format_selector = self.build_format_selector(req_format) if req_format else None

2907

except SyntaxError as err:

2908

self.report_error(err, tb=False, is_error=False)

2909

continue

2910

2911

if format_selector is None:

2912

req_format = self._default_format_spec(info_dict, download=download)

2913

self.write_debug(f'Default format spec: {req_format}')

2914

format_selector = self.build_format_selector(req_format)

2915

2916

formats_to_download = list(format_selector({

2917

'formats': formats,

2918

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2919

'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video

2920

or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio

2921

}))

2922

if interactive_format_selection and not formats_to_download:

2923

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2928

if not self.params.get('ignore_no_formats_error'):

2929

raise ExtractorError(

2930

'Requested format is not available. Use --list-formats for a list of available formats',

2931

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2932

self.report_warning('Requested format is not available')

2933

# Process what we can, even without any available formats.

2934

formats_to_download = [{}]

2935

2936

requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))

2937

best_format, downloaded_formats = formats_to_download[-1], []

2938

if download:

2939

if best_format and requested_ranges:

2940

def to_screen(*msg):

2941

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2942

2943

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2944

(f['format_id'] for f in formats_to_download))

2945

if requested_ranges != ({}, ):

2946

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2947

(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))

2948

max_downloads_reached = False

2949

2950

for fmt, chapter in itertools.product(formats_to_download, requested_ranges):

2951

new_info = self._copy_infodict(info_dict)

2952

new_info.update(fmt)

2953

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2954

end_time = offset + min(chapter.get('end_time', duration), duration)

2955

# duration may not be accurate. So allow deviations <1sec

2956

if end_time == float('inf') or end_time > offset + duration + 1:

2957

end_time = None

2958

if chapter or offset:

2959

new_info.update({

2960

'section_start': offset + chapter.get('start_time', 0),

2961

'section_end': end_time,

2962

'section_title': chapter.get('title'),

2963

'section_number': chapter.get('index'),

2964

})

2965

downloaded_formats.append(new_info)

2966

try:

2967

self.process_info(new_info)

2968

except MaxDownloadsReached:

2969

max_downloads_reached = True

2970

self._raise_pending_errors(new_info)

2971

# Remove copied info

2972

for key, val in tuple(new_info.items()):

2973

if info_dict.get(key) == val:

2974

new_info.pop(key)

2975

if max_downloads_reached:

2976

break

2977

2978

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2979

assert write_archive.issubset({True, False, 'ignore'})

2980

if True in write_archive and False not in write_archive:

2981

self.record_download_archive(info_dict)

2982

2983

info_dict['requested_downloads'] = downloaded_formats

2984

info_dict = self.run_all_pps('after_video', info_dict)

2985

if max_downloads_reached:

2986

raise MaxDownloadsReached()

2987

2988

# We update the info dict with the selected best quality format (backwards compatibility)

2989

info_dict.update(best_format)

2990

return info_dict

2991

2992

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2993

"""Select the requested subtitles and their format"""

2994

available_subs, normal_sub_langs = {}, []

2995

if normal_subtitles and self.params.get('writesubtitles'):

2996

available_subs.update(normal_subtitles)

2997

normal_sub_langs = tuple(normal_subtitles.keys())

2998

if automatic_captions and self.params.get('writeautomaticsub'):

2999

for lang, cap_info in automatic_captions.items():

3000

if lang not in available_subs:

3001

available_subs[lang] = cap_info

3002

3003

if not available_subs or (

3004

not self.params.get('writesubtitles')

3005

and not self.params.get('writeautomaticsub')):

3006

return None

3007

3008

all_sub_langs = tuple(available_subs.keys())

3009

if self.params.get('allsubtitles', False):

3010

requested_langs = all_sub_langs

3011

elif self.params.get('subtitleslangs', False):

3012

try:

3013

requested_langs = orderedSet_from_options(

3014

self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)

3015

except re.error as e:

3016

raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')

3017

else:

3018

requested_langs = LazyList(itertools.chain(

3019

['en'] if 'en' in normal_sub_langs else [],

3020

filter(lambda f: f.startswith('en'), normal_sub_langs),

3021

['en'] if 'en' in all_sub_langs else [],

3022

filter(lambda f: f.startswith('en'), all_sub_langs),

3023

normal_sub_langs, all_sub_langs,

3024

))[:1]

3025

if requested_langs:

3026

self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')

3027

3028

formats_query = self.params.get('subtitlesformat', 'best')

3029

formats_preference = formats_query.split('/') if formats_query else []

3030

subs = {}

3031

for lang in requested_langs:

3032

formats = available_subs.get(lang)

3033

if formats is None:

3034

self.report_warning(f'{lang} subtitles not available for {video_id}')

3035

continue

3036

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

3048

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

3053

if info_dict is None:

3054

return

3055

info_copy = info_dict.copy()

3056

info_copy.setdefault('filename', self.prepare_filename(info_dict))

3057

if info_dict.get('requested_formats') is not None:

3058

# For RTMP URLs, also include the playpath

3059

info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

3060

elif info_dict.get('url'):

3061

info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')

3062

info_copy['formats_table'] = self.render_formats_table(info_dict)

3063

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

3064

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

3065

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

3066

3067

def format_tmpl(tmpl):

3068

mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)

if not mobj:

return tmpl

fmt = '%({})s'

if tmpl.startswith('{'):

3074

tmpl, fmt = f'.{tmpl}', '%({})j'

3075

if tmpl.endswith('='):

3076

tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'

3077

return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))

3078

3079

for tmpl in self.params['forceprint'].get(key, []):

3080

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

3081

3082

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

3083

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

3084

tmpl = format_tmpl(tmpl)

3085

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

3086

if self._ensure_dir_exists(filename):

3087

with open(filename, 'a', encoding='utf-8', newline='') as f:

3088

f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)

return info_copy

def __forced_printings(self, info_dict, filename=None, incomplete=True):

3093

if (self.params.get('forcejson')

3094

or self.params['forceprint'].get('video')

3095

or self.params['print_to_file'].get('video')):

3096

self.post_extract(info_dict)

3097

if filename:

3098

info_dict['filename'] = filename

3099

info_copy = self._forceprint('video', info_dict)

3100

3101

def print_field(field, actual_field=None, optional=False):

3102

if actual_field is None:

3103

actual_field = field

3104

if self.params.get(f'force{field}') and (

3105

info_copy.get(field) is not None or (not optional and not incomplete)):

3106

self.to_stdout(info_copy[actual_field])

print_field('title')

print_field('id')

print_field('url', 'urls')

3111

print_field('thumbnail', optional=True)

3112

print_field('description', optional=True)

3113

print_field('filename')

3114

if self.params.get('forceduration') and info_copy.get('duration') is not None:

3115

self.to_stdout(formatSeconds(info_copy['duration']))

3116

print_field('format')

3117

3118

if self.params.get('forcejson'):

3119

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

3120

3121

def dl(self, name, info, subtitle=False, test=False):

3122

if not info.get('url'):

3123

self.raise_no_formats(info, True)

3124

3125

if test:

3126

verbose = self.params.get('verbose')

3127

params = {

3128

'test': True,

3129

'quiet': self.params.get('quiet') or not verbose,

3130

'verbose': verbose,

3131

'noprogress': not verbose,

3132

'nopart': True,

3133

'skip_unavailable_fragments': False,

3134

'keep_fragments': False,

3135

'overwrites': True,

3136

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

3141

if not test:

3142

for ph in self._progress_hooks:

3143

fd.add_progress_hook(ph)

3144

urls = '", "'.join(

3145

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

3146

for f in info.get('requested_formats', []) or [info])

3147

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

3148

3149

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

3150

# But it may contain objects that are not deep-copyable

3151

new_info = self._copy_infodict(info)

3152

if new_info.get('http_headers') is None:

3153

new_info['http_headers'] = self._calc_headers(new_info)

3154

return fd.download(name, new_info, subtitle)

3155

3156

def existing_file(self, filepaths, *, default_overwrite=True):

3157

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

3158

if existing_files and not self.params.get('overwrites', default_overwrite):

3159

return existing_files[0]

3160

3161

for file in existing_files:

3162

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

3167

"""Process a single resolved IE result. (Modifies it in-place)"""

3168

3169

assert info_dict.get('_type', 'video') == 'video'

3170

original_infodict = info_dict

3171

3172

if 'format' not in info_dict and 'ext' in info_dict:

3173

info_dict['format'] = info_dict['ext']

3174

3175

if self._match_entry(info_dict) is not None:

3176

info_dict['__write_download_archive'] = 'ignore'

3177

return

3178

3179

# Does nothing under normal operation - for backward compatibility of process_info

3180

self.post_extract(info_dict)

3181

3182

def replace_info_dict(new_info):

3183

nonlocal info_dict

3184

if new_info == info_dict:

3185

return

3186

info_dict.clear()

3187

info_dict.update(new_info)

3188

3189

new_info, _ = self.pre_process(info_dict, 'video')

3190

replace_info_dict(new_info)

3191

self._num_downloads += 1

3192

3193

# info_dict['_filename'] needs to be set for backward compatibility

3194

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

3195

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

3200

3201

def check_max_downloads():

3202

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

3203

raise MaxDownloadsReached()

3204

3205

if self.params.get('simulate'):

3206

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3207

check_max_downloads()

3208

return

3209

3210

if full_filename is None:

3211

return

3212

if not self._ensure_dir_exists(encodeFilename(full_filename)):

3213

return

3214

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

3215

return

3216

3217

if self._write_description('video', info_dict,

3218

self.prepare_filename(info_dict, 'description')) is None:

3219

return

3220

3221

sub_files = self._write_subtitles(info_dict, temp_filename)

3222

if sub_files is None:

3223

return

3224

files_to_move.update(dict(sub_files))

3225

3226

thumb_files = self._write_thumbnails(

3227

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

3228

if thumb_files is None:

3229

return

3230

files_to_move.update(dict(thumb_files))

3231

3232

infofn = self.prepare_filename(info_dict, 'infojson')

3233

_infojson_written = self._write_info_json('video', info_dict, infofn)

3234

if _infojson_written:

3235

info_dict['infojson_filename'] = infofn

3236

# For backward compatibility, even though it was a private field

3237

info_dict['__infojson_filename'] = infofn

3238

elif _infojson_written is None:

3239

return

3240

3241

# Note: Annotations are deprecated

3242

annofn = None

3243

if self.params.get('writeannotations', False):

3244

annofn = self.prepare_filename(info_dict, 'annotation')

3245

if annofn:

3246

if not self._ensure_dir_exists(encodeFilename(annofn)):

3247

return

3248

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

3249

self.to_screen('[info] Video annotations are already present')

3250

elif not info_dict.get('annotations'):

3251

self.report_warning('There are no annotations to write.')

3252

else:

3253

try:

3254

self.to_screen('[info] Writing video annotations to: ' + annofn)

3255

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

3256

annofile.write(info_dict['annotations'])

3257

except (KeyError, TypeError):

3258

self.report_warning('There are no annotations to write.')

3259

except OSError:

3260

self.report_error('Cannot write annotations file: ' + annofn)

3261

return

3262

3263

# Write internet shortcut files

3264

def _write_link_file(link_type):

3265

url = try_get(info_dict['webpage_url'], iri_to_uri)

3266

if not url:

3267

self.report_warning(

3268

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3269

return True

3270

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3271

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3272

return False

3273

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3274

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3275

return True

3276

try:

3277

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3278

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3279

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3280

template_vars = {'url': url}

3281

if link_type == 'desktop':

3282

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3283

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3284

except OSError:

3285

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3291

'webloc': self.params.get('writewebloclink'),

3292

'desktop': self.params.get('writedesktoplink'),

3293

}

3294

if self.params.get('writelink'):

3295

link_type = ('webloc' if sys.platform == 'darwin'

3296

else 'desktop' if sys.platform.startswith('linux')

3297

else 'url')

3298

write_links[link_type] = True

3299

3300

if any(should_write and not _write_link_file(link_type)

3301

for link_type, should_write in write_links.items()):

3302

return

3303

3304

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3305

replace_info_dict(new_info)

3306

3307

if self.params.get('skip_download'):

3308

info_dict['filepath'] = temp_filename

3309

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3310

info_dict['__files_to_move'] = files_to_move

3311

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3312

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3313

else:

3314

# Download

3315

info_dict.setdefault('__postprocessors', [])

3316

try:

3317

3318

def existing_video_file(*filepaths):

3319

ext = info_dict.get('ext')

3320

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3321

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3322

default_overwrite=False)

3323

if file:

3324

info_dict['ext'] = os.path.splitext(file)[1][1:]

3325

return file

3326

3327

fd, success = None, True

3328

if info_dict.get('protocol') or info_dict.get('url'):

3329

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3330

if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (

3331

info_dict.get('section_start') or info_dict.get('section_end')):

3332

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3333

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3334

self.report_error(f'{msg}. Aborting')

3335

return

3336

3337

if info_dict.get('requested_formats') is not None:

3338

old_ext = info_dict['ext']

3339

if self.params.get('merge_output_format') is None:

3340

if (info_dict['ext'] == 'webm'

3341

and info_dict.get('thumbnails')

3342

# check with type instead of pp_key, __name__, or isinstance

3343

# since we dont want any custom PPs to trigger this

3344

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3345

info_dict['ext'] = 'mkv'

3346

self.report_warning(

3347

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3348

new_ext = info_dict['ext']

3349

3350

def correct_ext(filename, ext=new_ext):

3351

if filename == '-':

3352

return filename

3353

filename_real_ext = os.path.splitext(filename)[1][1:]

3354

filename_wo_ext = (

3355

os.path.splitext(filename)[0]

3356

if filename_real_ext in (old_ext, new_ext)

3357

else filename)

3358

return f'{filename_wo_ext}.{ext}'

3359

3360

# Ensure filename always has a correct extension for successful merge

3361

full_filename = correct_ext(full_filename)

3362

temp_filename = correct_ext(temp_filename)

3363

dl_filename = existing_video_file(full_filename, temp_filename)

3364

3365

info_dict['__real_download'] = False

3366

# NOTE: Copy so that original format dicts are not modified

3367

info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))

3368

3369

merger = FFmpegMergerPP(self)

3370

downloaded = []

3371

if dl_filename is not None:

3372

self.report_file_already_downloaded(dl_filename)

3373

elif fd:

3374

for f in info_dict['requested_formats'] if fd != FFmpegFD else []:

3375

f['filepath'] = fname = prepend_extension(

3376

correct_ext(temp_filename, info_dict['ext']),

3377

'f%s' % f['format_id'], info_dict['ext'])

3378

downloaded.append(fname)

3379

info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])

3380

success, real_download = self.dl(temp_filename, info_dict)

3381

info_dict['__real_download'] = real_download

3382

else:

3383

if self.params.get('allow_unplayable_formats'):

3384

self.report_warning(

3385

'You have requested merging of multiple formats '

3386

'while also allowing unplayable formats to be downloaded. '

3387

'The formats won\'t be merged to prevent data corruption.')

3388

elif not merger.available:

3389

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3390

if not self.params.get('ignoreerrors'):

3391

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3392

return

3393

self.report_warning(f'{msg}. The formats won\'t be merged')

3394

3395

if temp_filename == '-':

3396

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3397

else 'but the formats are incompatible for simultaneous download' if merger.available

3398

else 'but ffmpeg is not installed')

3399

self.report_warning(

3400

f'You have requested downloading multiple formats to stdout {reason}. '

3401

'The formats will be streamed one after the other')

3402

fname = temp_filename

3403

for f in info_dict['requested_formats']:

3404

new_info = dict(info_dict)

3405

del new_info['requested_formats']

3406

new_info.update(f)

3407

if temp_filename != '-':

3408

fname = prepend_extension(

3409

correct_ext(temp_filename, new_info['ext']),

3410

'f%s' % f['format_id'], new_info['ext'])

3411

if not self._ensure_dir_exists(fname):

3412

return

3413

f['filepath'] = fname

3414

downloaded.append(fname)

3415

partial_success, real_download = self.dl(fname, new_info)

3416

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3417

success = success and partial_success

3418

3419

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3420

info_dict['__postprocessors'].append(merger)

3421

info_dict['__files_to_merge'] = downloaded

3422

# Even if there were no downloads, it is being merged only now

3423

info_dict['__real_download'] = True

3424

else:

3425

for file in downloaded:

3426

files_to_move[file] = None

3427

else:

3428

# Just a single file

3429

dl_filename = existing_video_file(full_filename, temp_filename)

3430

if dl_filename is None or dl_filename == temp_filename:

3431

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3432

# So we should try to resume the download

3433

success, real_download = self.dl(temp_filename, info_dict)

3434

info_dict['__real_download'] = real_download

3435

else:

3436

self.report_file_already_downloaded(dl_filename)

3437

3438

dl_filename = dl_filename or temp_filename

3439

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3440

3441

except network_exceptions as err:

3442

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3443

return

3444

except OSError as err:

3445

raise UnavailableVideoError(err)

3446

except (ContentTooShortError, ) as err:

3447

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3448

return

3449

3450

self._raise_pending_errors(info_dict)

3451

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3456

vid = info_dict['id']

3457

3458

if fixup_policy in ('ignore', 'never'):

3459

return

3460

elif fixup_policy == 'warn':

3461

do_fixup = 'warn'

3462

elif fixup_policy != 'force':

3463

assert fixup_policy in ('detect_or_warn', None)

3464

if not info_dict.get('__real_download'):

3465

do_fixup = False

3466

3467

def ffmpeg_fixup(cndn, msg, cls):

3468

if not (do_fixup and cndn):

3469

return

3470

elif do_fixup == 'warn':

3471

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3476

else:

3477

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3478

3479

stretched_ratio = info_dict.get('stretched_ratio')

3480

ffmpeg_fixup(stretched_ratio not in (1, None),

3481

f'Non-uniform pixel ratio {stretched_ratio}',

3482

FFmpegFixupStretchedPP)

3483

3484

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3485

downloader = downloader.FD_NAME if downloader else None

3486

3487

ext = info_dict.get('ext')

3488

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3489

isinstance(pp, FFmpegVideoConvertorPP)

3490

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3491

) for pp in self._pps['post_process'])

3492

3493

if not postprocessed_by_ffmpeg:

3494

ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'

3495

and info_dict.get('container') == 'm4a_dash',

3496

'writing DASH m4a. Only some players support this container',

3497

FFmpegFixupM4aPP)

3498

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3499

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3500

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3501

FFmpegFixupM3u8PP)

3502

ffmpeg_fixup(downloader == 'dashsegments'

3503

and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),

3504

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3505

3506

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3507

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3512

except PostProcessingError as err:

3513

self.report_error('Postprocessing: %s' % str(err))

3514

return

3515

try:

3516

for ph in self._post_hooks:

3517

ph(info_dict['filepath'])

3518

except Exception as err:

3519

self.report_error('post hooks: %s' % str(err))

3520

return

3521

info_dict['__write_download_archive'] = True

3522

3523

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3524

if self.params.get('force_write_download_archive'):

3525

info_dict['__write_download_archive'] = True

3526

check_max_downloads()

3527

3528

def __download_wrapper(self, func):

3529

@functools.wraps(func)

3530

def wrapper(*args, **kwargs):

3531

try:

3532

res = func(*args, **kwargs)

3533

except UnavailableVideoError as e:

3534

self.report_error(e)

3535

except DownloadCancelled as e:

3536

self.to_screen(f'[info] {e}')

3537

if not self.params.get('break_per_url'):

3538

raise

3539

self._num_downloads = 0

3540

else:

3541

if self.params.get('dump_single_json', False):

3542

self.post_extract(res)

3543

self.to_stdout(json.dumps(self.sanitize_info(res)))

3544

return wrapper

3545

3546

def download(self, url_list):

3547

"""Download a given list of URLs."""

3548

url_list = variadic(url_list) # Passing a single URL is a common mistake

3549

outtmpl = self.params['outtmpl']['default']

3550

if (len(url_list) > 1

3551

and outtmpl != '-'

3552

and '%' not in outtmpl

3553

and self.params.get('max_downloads') != 1):

3554

raise SameFileError(outtmpl)

3555

3556

for url in url_list:

3557

self.__download_wrapper(self.extract_info)(

3558

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3559

3560

return self._download_retcode

3561

3562

def download_with_info_file(self, info_filename):

3563

with contextlib.closing(fileinput.FileInput(

3564

[info_filename], mode='r',

3565

openhook=fileinput.hook_encoded('utf-8'))) as f:

3566

# FileInput doesn't have a read method, we can't call json.load

3567

infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))

3568

for info in variadic(json.loads('\n'.join(f)))]

3569

for info in infos:

3570

try:

3571

self.__download_wrapper(self.process_ie_result)(info, download=True)

3572

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3573

if not isinstance(e, EntryNotInPlaylist):

3574

self.to_stderr('\r')

3575

webpage_url = info.get('webpage_url')

3576

if webpage_url is None:

3577

raise

3578

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3579

self.download([webpage_url])

3580

except ExtractorError as e:

3581

self.report_error(e)

3582

return self._download_retcode

3583

3584

@staticmethod

3585

def sanitize_info(info_dict, remove_private_keys=False):

3586

''' Sanitize the infodict for converting to json '''

3587

if info_dict is None:

3588

return info_dict

3589

info_dict.setdefault('epoch', int(time.time()))

3590

info_dict.setdefault('_type', 'video')

3591

info_dict.setdefault('_version', {

3592

'version': __version__,

3593

'current_git_head': current_git_head(),

3594

'release_git_head': RELEASE_GIT_HEAD,

3595

'repository': ORIGIN,

3596

})

3597

3598

if remove_private_keys:

3599

reject = lambda k, v: v is None or k.startswith('__') or k in {

3600

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3601

'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',

3602

'playlist_autonumber',

3603

}

3604

else:

3605

reject = lambda k, v: False

3606

3607

def filter_fn(obj):

3608

if isinstance(obj, dict):

3609

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3610

elif isinstance(obj, (list, tuple, set, LazyList)):

3611

return list(map(filter_fn, obj))

3612

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3618

3619

@staticmethod

3620

def filter_requested_info(info_dict, actually_filter=True):

3621

''' Alias of sanitize_info for backward compatibility '''

3622

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3623

3624

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3625

for filename in set(filter(None, files_to_delete)):

3626

if msg:

3627

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3632

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3633

del info['__files_to_move'][filename]

3634

3635

@staticmethod

3636

def post_extract(info_dict):

3637

def actual_post_extract(info_dict):

3638

if info_dict.get('_type') in ('playlist', 'multi_video'):

3639

for video_dict in info_dict.get('entries', {}):

3640

actual_post_extract(video_dict or {})

3641

return

3642

3643

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3644

info_dict.update(post_extractor())

3645

3646

actual_post_extract(info_dict or {})

3647

3648

def run_pp(self, pp, infodict):

3649

files_to_delete = []

3650

if '__files_to_move' not in infodict:

3651

infodict['__files_to_move'] = {}

3652

try:

3653

files_to_delete, infodict = pp.run(infodict)

3654

except PostProcessingError as e:

3655

# Must be True and not 'only_download'

3656

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3662

return infodict

3663

if self.params.get('keepvideo', False):

3664

for f in files_to_delete:

3665

infodict['__files_to_move'].setdefault(f, '')

3666

else:

3667

self._delete_downloaded_files(

3668

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3669

return infodict

3670

3671

def run_all_pps(self, key, info, *, additional_pps=None):

3672

if key != 'video':

3673

self._forceprint(key, info)

3674

for pp in (additional_pps or []) + self._pps[key]:

3675

info = self.run_pp(pp, info)

3676

return info

3677

3678

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3679

info = dict(ie_info)

3680

info['__files_to_move'] = files_to_move or {}

3681

try:

3682

info = self.run_all_pps(key, info)

3683

except PostProcessingError as err:

3684

msg = f'Preprocessing: {err}'

3685

info.setdefault('__pending_error', msg)

3686

self.report_error(msg, is_error=False)

3687

return info, info.pop('__files_to_move', None)

3688

3689

def post_process(self, filename, info, files_to_move=None):

3690

"""Run all the postprocessors on the given file."""

3691

info['filepath'] = filename

3692

info['__files_to_move'] = files_to_move or {}

3693

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3694

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3695

del info['__files_to_move']

3696

return self.run_all_pps('after_move', info)

3697

3698

def _make_archive_id(self, info_dict):

3699

video_id = info_dict.get('id')

3700

if not video_id:

3701

return

3702

# Future-proof against any change in case

3703

# and backwards compatibility with prior versions

3704

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3705

if extractor is None:

3706

url = str_or_none(info_dict.get('url'))

3707

if not url:

3708

return

3709

# Try to find matching extractor for the URL and take its ie_key

3710

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3717

3718

def in_download_archive(self, info_dict):

if not self.archive:

return False

vid_ids = [self._make_archive_id(info_dict)]

3723

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3724

return any(id_ in self.archive for id_ in vid_ids)

3725

3726

def record_download_archive(self, info_dict):

3727

fn = self.params.get('download_archive')

3728

if fn is None:

3729

return

3730

vid_id = self._make_archive_id(info_dict)

3731

assert vid_id

3732

3733

self.write_debug(f'Adding to archive: {vid_id}')

3734

if is_path_like(fn):

3735

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3736

archive_file.write(vid_id + '\n')

3737

self.archive.add(vid_id)

3738

3739

@staticmethod

3740

def format_resolution(format, default='unknown'):

3741

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3742

return 'audio only'

3743

if format.get('resolution') is not None:

3744

return format['resolution']

3745

if format.get('width') and format.get('height'):

3746

return '%dx%d' % (format['width'], format['height'])

3747

elif format.get('height'):

3748

return '%sp' % format['height']

3749

elif format.get('width'):

3750

return '%dx?' % format['width']

3751

return default

3752

3753

def _list_format_headers(self, *headers):

3754

if self.params.get('listformats_table', True) is not False:

3755

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3756

return headers

3757

3758

def _format_note(self, fdict):

3759

res = ''

3760

if fdict.get('ext') in ['f4f', 'f4m']:

3761

res += '(unsupported)'

3762

if fdict.get('language'):

3763

if res:

3764

res += ' '

3765

res += '[%s]' % fdict['language']

3766

if fdict.get('format_note') is not None:

3767

if res:

3768

res += ' '

3769

res += fdict['format_note']

3770

if fdict.get('tbr') is not None:

3771

if res:

3772

res += ', '

3773

res += '%4dk' % fdict['tbr']

3774

if fdict.get('container') is not None:

3775

if res:

3776

res += ', '

3777

res += '%s container' % fdict['container']

3778

if (fdict.get('vcodec') is not None

3779

and fdict.get('vcodec') != 'none'):

3780

if res:

3781

res += ', '

3782

res += fdict['vcodec']

3783

if fdict.get('vbr') is not None:

3784

res += '@'

3785

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3786

res += 'video@'

3787

if fdict.get('vbr') is not None:

3788

res += '%4dk' % fdict['vbr']

3789

if fdict.get('fps') is not None:

3790

if res:

3791

res += ', '

3792

res += '%sfps' % fdict['fps']

3793

if fdict.get('acodec') is not None:

3794

if res:

3795

res += ', '

3796

if fdict['acodec'] == 'none':

3797

res += 'video only'

3798

else:

3799

res += '%-5s' % fdict['acodec']

3800

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3805

res += '@%3dk' % fdict['abr']

3806

if fdict.get('asr') is not None:

3807

res += ' (%5dHz)' % fdict['asr']

3808

if fdict.get('filesize') is not None:

3809

if res:

3810

res += ', '

3811

res += format_bytes(fdict['filesize'])

3812

elif fdict.get('filesize_approx') is not None:

3813

if res:

3814

res += ', '

3815

res += '~' + format_bytes(fdict['filesize_approx'])

3816

return res

3817

3818

def _get_formats(self, info_dict):

3819

if info_dict.get('formats') is None:

3820

if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':

3821

return [info_dict]

3822

return []

3823

return info_dict['formats']

3824

3825

def render_formats_table(self, info_dict):

3826

formats = self._get_formats(info_dict)

3827

if not formats:

3828

return

3829

if not self.params.get('listformats_table', True) is not False:

3830

table = [

3831

[

3832

format_field(f, 'format_id'),

3833

format_field(f, 'ext'),

3834

self.format_resolution(f),

3835

self._format_note(f)

3836

] for f in formats if (f.get('preference') or 0) >= -1000]

3837

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3838

3839

def simplified_codec(f, field):

3840

assert field in ('acodec', 'vcodec')

codec = f.get(field)

if not codec:

return 'unknown'

elif codec != 'none':

3845

return '.'.join(codec.split('.')[:4])

3846

3847

if field == 'vcodec' and f.get('acodec') == 'none':

3848

return 'images'

3849

elif field == 'acodec' and f.get('vcodec') == 'none':

3850

return ''

3851

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3852

self.Styles.SUPPRESS)

3853

3854

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3855

table = [

3856

[

3857

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3858

format_field(f, 'ext'),

3859

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3860

format_field(f, 'fps', '\t%d', func=round),

3861

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3862

format_field(f, 'audio_channels', '\t%s'),

3863

delim, (

3864

format_field(f, 'filesize', ' \t%s', func=format_bytes)

3865

or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)

3866

or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),

3867

None, self._format_out('~\t%s', self.Styles.SUPPRESS))),

3868

format_field(f, 'tbr', '\t%dk', func=round),

3869

shorten_protocol_name(f.get('protocol', '')),

3870

delim,

3871

simplified_codec(f, 'vcodec'),

3872

format_field(f, 'vbr', '\t%dk', func=round),

3873

simplified_codec(f, 'acodec'),

3874

format_field(f, 'abr', '\t%dk', func=round),

3875

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3876

join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(

3877

self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,

3878

(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'

3879

else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),

3880

format_field(f, 'format_note'),

3881

format_field(f, 'container', ignore=(None, f.get('ext'))),

3882

delim=', '), delim=' '),

3883

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3884

header_line = self._list_format_headers(

3885

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3886

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3887

3888

return render_table(

3889

header_line, table, hide_empty=True,

3890

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3891

3892

def render_thumbnails_table(self, info_dict):

3893

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3898

[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])

3899

3900

def render_subtitles_table(self, video_id, subtitles):

3901

def _row(lang, formats):

3902

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3903

if len(set(names)) == 1:

3904

names = [] if names[0] == 'unknown' else names[:1]

3905

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3911

[_row(lang, formats) for lang, formats in subtitles.items()],

3912

hide_empty=True)

3913

3914

def __list_table(self, video_id, name, func, *args):

3915

table = func(*args)

3916

if not table:

3917

self.to_screen(f'{video_id} has no {name}')

3918

return

3919

self.to_screen(f'[info] Available {name} for {video_id}:')

3920

self.to_stdout(table)

3921

3922

def list_formats(self, info_dict):

3923

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3924

3925

def list_thumbnails(self, info_dict):

3926

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3927

3928

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3929

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3930

3931

def print_debug_header(self):

3932

if not self.params.get('verbose'):

3933

return

3934

3935

from . import _IN_CLI # Must be delayed import

3936

3937

# These imports can be slow. So import them only as needed

3938

from .extractor.extractors import _LAZY_LOADER

3939

from .extractor.extractors import (

3940

_PLUGIN_CLASSES as plugin_ies,

3941

_PLUGIN_OVERRIDES as plugin_ie_overrides

3942

)

3943

3944

def get_encoding(stream):

3945

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3946

additional_info = []

3947

if os.environ.get('TERM', '').lower() == 'dumb':

3948

additional_info.append('dumb')

3949

if not supports_terminal_sequences(stream):

3950

from .utils import WINDOWS_VT_MODE # Must be imported locally

3951

additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')

3952

if additional_info:

3953

ret = f'{ret} ({",".join(additional_info)})'

3954

return ret

3955

3956

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3957

locale.getpreferredencoding(),

3958

sys.getfilesystemencoding(),

3959

self.get_encoding(),

3960

', '.join(

3961

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3962

if stream is not None and key != 'console')

3963

)

3964

3965

logger = self.params.get('logger')

3966

if logger:

3967

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3968

write_debug(encoding_str)

3969

else:

3970

write_string(f'[debug] {encoding_str}\n', encoding=None)

3971

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3972

3973

source = detect_variant()

3974

if VARIANT not in (None, 'pip'):

3975

source += '*'

3976

klass = type(self)

3977

write_debug(join_nonempty(

3978

f'{REPOSITORY.rpartition("/")[2]} version',

3979

_make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),

3980

f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',

3981

'' if source == 'unknown' else f'({source})',

3982

'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',

delim=' '))

if not _IN_CLI:

write_debug(f'params: {self.params}')

3987

3988

if not _LAZY_LOADER:

3989

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3990

write_debug('Lazy loading extractors is forcibly disabled')

3991

else:

3992

write_debug('Lazy loading extractors is disabled')

3993

if self.params['compat_opts']:

3994

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3995

3996

if current_git_head():

3997

write_debug(f'Git HEAD: {current_git_head()}')

3998

write_debug(system_identifier())

3999

4000

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

4001

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

4002

if ffmpeg_features:

4003

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

4004

4005

exe_versions['rtmpdump'] = rtmpdump_version()

4006

exe_versions['phantomjs'] = PhantomJSwrapper._version()

4007

exe_str = ', '.join(

4008

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

4009

) or 'none'

4010

write_debug('exe versions: %s' % exe_str)

4011

4012

from .compat.compat_utils import get_package_info

4013

from .dependencies import available_dependencies

4014

4015

write_debug('Optional libraries: %s' % (', '.join(sorted({

4016

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

4017

})) or 'none'))

4018

4019

write_debug(f'Proxy map: {self.proxies}')

4020

write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')

4021

for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():

4022

display_list = ['%s%s' % (

4023

klass.__name__, '' if klass.__name__ == name else f' as {name}')

4024

for name, klass in plugins.items()]

4025

if plugin_type == 'Extractor':

4026

display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'

4027

for parent, plugins in plugin_ie_overrides.items())

4028

if not display_list:

4029

continue

4030

write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')

4031

4032

plugin_dirs = plugin_directories()

4033

if plugin_dirs:

4034

write_debug(f'Plugin directories: {plugin_dirs}')

4035

4036

# Not implemented

4037

if False and self.params.get('call_home'):

4038

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

4039

write_debug('Public IP address: %s' % ipaddr)

4040

latest_version = self.urlopen(

4041

'https://yt-dl.org/latest/version').read().decode()

4042

if version_tuple(latest_version) > version_tuple(__version__):

4043

self.report_warning(

4044

'You are using an outdated version (newest version: %s)! '

4045

'See https://yt-dl.org/update if you need help updating.' %

4046

latest_version)

4047

4048

@functools.cached_property

4049

def proxies(self):

4050

"""Global proxy configuration"""

4051

opts_proxy = self.params.get('proxy')

4052

if opts_proxy is not None:

4053

if opts_proxy == '':

4054

opts_proxy = '__noproxy__'

4055

proxies = {'all': opts_proxy}

4056

else:

4057

proxies = urllib.request.getproxies()

4058

# compat. Set HTTPS_PROXY to __noproxy__ to revert

4059

if 'http' in proxies and 'https' not in proxies:

4060

proxies['https'] = proxies['http']

return proxies

@functools.cached_property

4065

def cookiejar(self):

4066

"""Global cookiejar instance"""

4067

return load_cookies(

4068

self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)

@property

def _opener(self):

"""

Get a urllib OpenerDirector from the Urllib handler (deprecated).

4074

"""

4075

self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')

4076

handler = self._request_director.handlers['Urllib']

4077

return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)

4078

4079

def urlopen(self, req):

4080

""" Start an HTTP download """

4081

if isinstance(req, str):

4082

req = Request(req)

4083

elif isinstance(req, urllib.request.Request):

4084

self.deprecation_warning(

4085

'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '

4086

'Use yt_dlp.networking.common.Request instead.')

4087

req = urllib_req_to_req(req)

4088

assert isinstance(req, Request)

4089

4090

# compat: Assume user:pass url params are basic auth

4091

url, basic_auth_header = extract_basic_auth(req.url)

4092

if basic_auth_header:

4093

req.headers['Authorization'] = basic_auth_header

4094

req.url = sanitize_url(url)

4095

4096

clean_proxies(proxies=req.proxies, headers=req.headers)

4097

clean_headers(req.headers)

4098

4099

try:

4100

return self._request_director.send(req)

4101

except NoSupportingHandlers as e:

4102

for ue in e.unsupported_errors:

4103

# FIXME: This depends on the order of errors.

4104

if not (ue.handler and ue.msg):

4105

continue

4106

if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():

4107

raise RequestError(

4108

'file:// URLs are disabled by default in yt-dlp for security reasons. '

4109

'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue

4110

if 'unsupported proxy type: "https"' in ue.msg.lower():

4111

raise RequestError(

4112

'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')

4113

4114

elif (

4115

re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())

4116

and 'websockets' not in self._request_director.handlers

4117

):

4118

raise RequestError(

4119

'This request requires WebSocket support. '

4120

'Ensure one of the following dependencies are installed: websockets',

4121

cause=ue) from ue

4122

raise

4123

except SSLError as e:

4124

if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):

4125

raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e

4126

elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):

4127

raise RequestError(

4128

'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '

4129

'Try using --legacy-server-connect', cause=e) from e

4130

raise

4131

4132

def build_request_director(self, handlers, preferences=None):

4133

logger = _YDLLogger(self)

4134

headers = self.params['http_headers'].copy()

4135

proxies = self.proxies.copy()

4136

clean_headers(headers)

4137

clean_proxies(proxies, headers)

4138

4139

director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))

4140

for handler in handlers:

4141

director.add_handler(handler(

4142

logger=logger,

4143

headers=headers,

4144

cookiejar=self.cookiejar,

4145

proxies=proxies,

4146

prefer_system_certs='no-certifi' in self.params['compat_opts'],

4147

verify=not self.params.get('nocheckcertificate'),

4148

**traverse_obj(self.params, {

4149

'verbose': 'debug_printtraffic',

4150

'source_address': 'source_address',

4151

'timeout': 'socket_timeout',

4152

'legacy_ssl_support': 'legacyserverconnect',

4153

'enable_file_urls': 'enable_file_urls',

4154

'client_cert': {

4155

'client_certificate': 'client_certificate',

4156

'client_certificate_key': 'client_certificate_key',

4157

'client_certificate_password': 'client_certificate_password',

},

}),

))

director.preferences.update(preferences or [])

4162

if 'prefer-legacy-http-handler' in self.params['compat_opts']:

4163

director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)

4164

return director

4165

4166

@functools.cached_property

4167

def _request_director(self):

4168

return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)

4169

4170

def encode(self, s):

4171

if isinstance(s, bytes):

4172

return s # Already encoded

4173

4174

try:

4175

return s.encode(self.get_encoding())

4176

except UnicodeEncodeError as err:

4177

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

4178

raise

4179

4180

def get_encoding(self):

4181

encoding = self.params.get('encoding')

4182

if encoding is None:

4183

encoding = preferredencoding()

4184

return encoding

4185

4186

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

4187

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

4188

if overwrite is None:

4189

overwrite = self.params.get('overwrites', True)

4190

if not self.params.get('writeinfojson'):

4191

return False

4192

elif not infofn:

4193

self.write_debug(f'Skipping writing {label} infojson')

4194

return False

4195

elif not self._ensure_dir_exists(infofn):

4196

return None

4197

elif not overwrite and os.path.exists(infofn):

4198

self.to_screen(f'[info] {label.title()} metadata is already present')

4199

return 'exists'

4200

4201

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

4202

try:

4203

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

4204

return True

4205

except OSError:

4206

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

4207

return None

4208

4209

def _write_description(self, label, ie_result, descfn):

4210

''' Write description and returns True = written, False = skip, None = error '''

4211

if not self.params.get('writedescription'):

4212

return False

4213

elif not descfn:

4214

self.write_debug(f'Skipping writing {label} description')

4215

return False

4216

elif not self._ensure_dir_exists(descfn):

4217

return None

4218

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

4219

self.to_screen(f'[info] {label.title()} description is already present')

4220

elif ie_result.get('description') is None:

4221

self.to_screen(f'[info] There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

4226

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

4227

descfile.write(ie_result['description'])

4228

except OSError:

4229

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

4234

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

4235

ret = []

4236

subtitles = info_dict.get('requested_subtitles')

4237

if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

4238

# subtitles download errors are already managed as troubles in relevant IE

4239

# that way it will silently go on when used with unsupporting IE

4240

return ret

4241

elif not subtitles:

4242

self.to_screen('[info] There are no subtitles for the requested languages')

4243

return ret

4244

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

4245

if not sub_filename_base:

4246

self.to_screen('[info] Skipping writing video subtitles')

4247

return ret

4248

4249

for sub_lang, sub_info in subtitles.items():

4250

sub_format = sub_info['ext']

4251

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

4252

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

4253

existing_sub = self.existing_file((sub_filename_final, sub_filename))

4254

if existing_sub:

4255

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

4256

sub_info['filepath'] = existing_sub

4257

ret.append((existing_sub, sub_filename_final))

4258

continue

4259

4260

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

4261

if sub_info.get('data') is not None:

4262

try:

4263

# Use newline='' to prevent conversion of newline characters

4264

# See https://github.com/ytdl-org/youtube-dl/issues/10268

4265

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

4266

subfile.write(sub_info['data'])

4267

sub_info['filepath'] = sub_filename

4268

ret.append((sub_filename, sub_filename_final))

4269

continue

4270

except OSError:

4271

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

4276

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

4277

self.dl(sub_filename, sub_copy, subtitle=True)

4278

sub_info['filepath'] = sub_filename

4279

ret.append((sub_filename, sub_filename_final))

4280

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

4281

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

4282

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

4283

if not self.params.get('ignoreerrors'):

4284

self.report_error(msg)

4285

raise DownloadError(msg)

4286

self.report_warning(msg)

4287

return ret

4288

4289

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

4290

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''

4291

write_all = self.params.get('write_all_thumbnails', False)

4292

thumbnails, ret = [], []

4293

if write_all or self.params.get('writethumbnail', False):

4294

thumbnails = info_dict.get('thumbnails') or []

4295

if not thumbnails:

4296

self.to_screen(f'[info] There are no {label} thumbnails to download')

4297

return ret

4298

multiple = write_all and len(thumbnails) > 1

4299

4300

if thumb_filename_base is None:

4301

thumb_filename_base = filename

4302

if thumbnails and not thumb_filename_base:

4303

self.write_debug(f'Skipping writing {label} thumbnail')

4304

return ret

4305

4306

if thumbnails and not self._ensure_dir_exists(filename):

4307

return None

4308

4309

for idx, t in list(enumerate(thumbnails))[::-1]:

4310

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

4311

thumb_display_id = f'{label} thumbnail {t["id"]}'

4312

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

4313

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

4314

4315

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

4316

if existing_thumb:

4317

self.to_screen('[info] %s is already present' % (

4318

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

4319

t['filepath'] = existing_thumb

4320

ret.append((existing_thumb, thumb_filename_final))

4321

else:

4322

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

4323

try:

4324

uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))

4325

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

4326

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

4327

shutil.copyfileobj(uf, thumbf)

4328

ret.append((thumb_filename, thumb_filename_final))

4329

t['filepath'] = thumb_filename

4330

except network_exceptions as err:

4331

if isinstance(err, HTTPError) and err.status == 404:

4332

self.to_screen(f'[info] {thumb_display_id.title()} does not exist')

4333

else:

4334

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

4335

thumbnails.pop(idx)

4336

if ret and not write_all:

4337

break

4338

return ret