jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import copy
	4	import datetime as dt
	5	import errno
	6	import fileinput
	7	import functools
	8	import http.cookiejar
	9	import io
	10	import itertools
	11	import json
	12	import locale
	13	import operator
	14	import os
	15	import random
	16	import re
	17	import shutil
	18	import string
	19	import subprocess
	20	import sys
	21	import tempfile
	22	import time
	23	import tokenize
	24	import traceback
	25	import unicodedata
	26
	27	from .cache import Cache
	28	from .compat import urllib # isort: split
	29	from .compat import compat_os_name, urllib_req_to_req
	30	from .cookies import LenientSimpleCookie, load_cookies
	31	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	32	from .downloader.rtmp import rtmpdump_version
	33	from .extractor import gen_extractor_classes, get_info_extractor
	34	from .extractor.common import UnsupportedURLIE
	35	from .extractor.openload import PhantomJSwrapper
	36	from .minicurses import format_text
	37	from .networking import HEADRequest, Request, RequestDirector
	38	from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
	39	from .networking.exceptions import (
	40	HTTPError,
	41	NoSupportingHandlers,
	42	RequestError,
	43	SSLError,
	44	network_exceptions,
	45	)
	46	from .networking.impersonate import ImpersonateRequestHandler
	47	from .plugins import directories as plugin_directories
	48	from .postprocessor import _PLUGIN_CLASSES as plugin_pps
	49	from .postprocessor import (
	50	EmbedThumbnailPP,
	51	FFmpegFixupDuplicateMoovPP,
	52	FFmpegFixupDurationPP,
	53	FFmpegFixupM3u8PP,
	54	FFmpegFixupM4aPP,
	55	FFmpegFixupStretchedPP,
	56	FFmpegFixupTimestampPP,
	57	FFmpegMergerPP,
	58	FFmpegPostProcessor,
	59	FFmpegVideoConvertorPP,
	60	MoveFilesAfterDownloadPP,
	61	get_postprocessor,
	62	)
	63	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	64	from .update import (
	65	REPOSITORY,
	66	_get_system_deprecation,
	67	_make_label,
	68	current_git_head,
	69	detect_variant,
	70	)
	71	from .utils import (
	72	DEFAULT_OUTTMPL,
	73	IDENTITY,
	74	LINK_TEMPLATES,
	75	MEDIA_EXTENSIONS,
	76	NO_DEFAULT,
	77	NUMBER_RE,
	78	OUTTMPL_TYPES,
	79	POSTPROCESS_WHEN,
	80	STR_FORMAT_RE_TMPL,
	81	STR_FORMAT_TYPES,
	82	ContentTooShortError,
	83	DateRange,
	84	DownloadCancelled,
	85	DownloadError,
	86	EntryNotInPlaylist,
	87	ExistingVideoReached,
	88	ExtractorError,
	89	FormatSorter,
	90	GeoRestrictedError,
	91	ISO3166Utils,
	92	LazyList,
	93	MaxDownloadsReached,
	94	Namespace,
	95	PagedList,
	96	PlaylistEntries,
	97	Popen,
	98	PostProcessingError,
	99	ReExtractInfo,
	100	RejectedVideoReached,
	101	SameFileError,
	102	UnavailableVideoError,
	103	UserNotLive,
	104	YoutubeDLError,
	105	age_restricted,
	106	bug_reports_message,
	107	date_from_str,
	108	deprecation_warning,
	109	determine_ext,
	110	determine_protocol,
	111	encode_compat_str,
	112	encodeFilename,
	113	escapeHTML,
	114	expand_path,
	115	extract_basic_auth,
	116	filter_dict,
	117	float_or_none,
	118	format_bytes,
	119	format_decimal_suffix,
	120	format_field,
	121	formatSeconds,
	122	get_compatible_ext,
	123	get_domain,
	124	int_or_none,
	125	iri_to_uri,
	126	is_path_like,
	127	join_nonempty,
	128	locked_file,
	129	make_archive_id,
	130	make_dir,
	131	number_of_digits,
	132	orderedSet,
	133	orderedSet_from_options,
	134	parse_filesize,
	135	preferredencoding,
	136	prepend_extension,
	137	remove_terminal_sequences,
	138	render_table,
	139	replace_extension,
	140	sanitize_filename,
	141	sanitize_path,
	142	sanitize_url,
	143	shell_quote,
	144	str_or_none,
	145	strftime_or_none,
	146	subtitles_filename,
	147	supports_terminal_sequences,
	148	system_identifier,
	149	filesize_from_tbr,
	150	timetuple_from_msec,
	151	to_high_limit_path,
	152	traverse_obj,
	153	try_call,
	154	try_get,
	155	url_basename,
	156	variadic,
	157	version_tuple,
	158	windows_enable_vt_mode,
	159	write_json_file,
	160	write_string,
	161	)
	162	from .utils._utils import _UnsafeExtensionError, _YDLLogger
	163	from .utils.networking import (
	164	HTTPHeaderDict,
	165	clean_headers,
	166	clean_proxies,
	167	std_headers,
	168	)
	169	from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
	170
	171	if compat_os_name == 'nt':
	172	import ctypes
	173
	174
	175	def _catch_unsafe_extension_error(func):
	176	@functools.wraps(func)
	177	def wrapper(self, args, *kwargs):
	178	try:
	179	return func(self, args, *kwargs)
	180	except _UnsafeExtensionError as error:
	181	self.report_error(
	182	f'The extracted extension ({error.extension!r}) is unusual '
	183	'and will be skipped for safety reasons. '
	184	f'If you believe this is an error{bug_reports_message(",")}')
	185
	186	return wrapper
	187
	188
	189	class YoutubeDL:
	190	"""YoutubeDL class.
	191
	192	YoutubeDL objects are the ones responsible of downloading the
	193	actual video file and writing it to disk if the user has requested
	194	it, among some other tasks. In most cases there should be one per
	195	program. As, given a video URL, the downloader doesn't know how to
	196	extract all the needed information, task that InfoExtractors do, it
	197	has to pass the URL to one of them.
	198
	199	For this, YoutubeDL objects have a method that allows
	200	InfoExtractors to be registered in a given order. When it is passed
	201	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	202	finds that reports being able to handle it. The InfoExtractor extracts
	203	all the information about the video or videos the URL refers to, and
	204	YoutubeDL process the extracted information, possibly using a File
	205	Downloader to download the video.
	206
	207	YoutubeDL objects accept a lot of parameters. In order not to saturate
	208	the object constructor with arguments, it receives a dictionary of
	209	options instead. These options are available through the params
	210	attribute for the InfoExtractors to use. The YoutubeDL also
	211	registers itself as the downloader in charge for the InfoExtractors
	212	that are added to it, so this is a "mutual registration".
	213
	214	Available options:
	215
	216	username: Username for authentication purposes.
	217	password: Password for authentication purposes.
	218	videopassword: Password for accessing a video.
	219	ap_mso: Adobe Pass multiple-system operator identifier.
	220	ap_username: Multiple-system operator account username.
	221	ap_password: Multiple-system operator account password.
	222	usenetrc: Use netrc for authentication instead.
	223	netrc_location: Location of the netrc file. Defaults to ~/.netrc.
	224	netrc_cmd: Use a shell command to get credentials
	225	verbose: Print additional info to stdout.
	226	quiet: Do not print messages to stdout.
	227	no_warnings: Do not print out anything for warnings.
	228	forceprint: A dict with keys WHEN mapped to a list of templates to
	229	print to stdout. The allowed keys are video or any of the
	230	items in utils.POSTPROCESS_WHEN.
	231	For compatibility, a single list is also accepted
	232	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	233	a list of tuples with (template, filename)
	234	forcejson: Force printing info_dict as JSON.
	235	dump_single_json: Force printing the info_dict of the whole playlist
	236	(or video) as a single JSON line.
	237	force_write_download_archive: Force writing download archive regardless
	238	of 'skip_download' or 'simulate'.
	239	simulate: Do not download the video files. If unset (or None),
	240	simulate only if listsubtitles, listformats or list_thumbnails is used
	241	format: Video format code. see "FORMAT SELECTION" for more details.
	242	You can also pass a function. The function takes 'ctx' as
	243	argument and returns the formats to download.
	244	See "build_format_selector" for an implementation
	245	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	246	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	247	extracting metadata even if the video is not actually
	248	available for download (experimental)
	249	format_sort: A list of fields by which to sort the video formats.
	250	See "Sorting Formats" for more details.
	251	format_sort_force: Force the given format_sort. see "Sorting Formats"
	252	for more details.
	253	prefer_free_formats: Whether to prefer video formats with free containers
	254	over non-free ones of same quality.
	255	allow_multiple_video_streams: Allow multiple video streams to be merged
	256	into a single file
	257	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	258	into a single file
	259	check_formats Whether to test if the formats are downloadable.
	260	Can be True (check all), False (check none),
	261	'selected' (check selected formats),
	262	or None (check only if requested by extractor)
	263	paths: Dictionary of output paths. The allowed keys are 'home'
	264	'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
	265	outtmpl: Dictionary of templates for output names. Allowed keys
	266	are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
	267	For compatibility with youtube-dl, a single string can also be used
	268	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	269	restrictfilenames: Do not allow "&" and spaces in file names
	270	trim_file_name: Limit length of filename (extension excluded)
	271	windowsfilenames: Force the filenames to be windows compatible
	272	ignoreerrors: Do not stop on download/postprocessing errors.
	273	Can be 'only_download' to ignore only download errors.
	274	Default is 'only_download' for CLI, but False for API
	275	skip_playlist_after_errors: Number of allowed failures until the rest of
	276	the playlist is skipped
	277	allowed_extractors: List of regexes to match against extractor names that are allowed
	278	overwrites: Overwrite all video and metadata files if True,
	279	overwrite only non-video files if None
	280	and don't overwrite any file if False
	281	playlist_items: Specific indices of playlist to download.
	282	playlistrandom: Download playlist items in random order.
	283	lazy_playlist: Process playlist entries as they are received.
	284	matchtitle: Download only matching titles.
	285	rejecttitle: Reject downloads for matching titles.
	286	logger: Log messages to a logging.Logger instance.
	287	logtostderr: Print everything to stderr instead of stdout.
	288	consoletitle: Display progress in console window's titlebar.
	289	writedescription: Write the video description to a .description file
	290	writeinfojson: Write the video description to a .info.json file
	291	clean_infojson: Remove internal metadata from the infojson
	292	getcomments: Extract video comments. This will not be written to disk
	293	unless writeinfojson is also given
	294	writeannotations: Write the video annotations to a .annotations.xml file
	295	writethumbnail: Write the thumbnail image to a file
	296	allow_playlist_files: Whether to write playlists' description, infojson etc
	297	also to disk when using the 'write*' options
	298	write_all_thumbnails: Write all thumbnail formats to files
	299	writelink: Write an internet shortcut file, depending on the
	300	current platform (.url/.webloc/.desktop)
	301	writeurllink: Write a Windows internet shortcut file (.url)
	302	writewebloclink: Write a macOS internet shortcut file (.webloc)
	303	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	304	writesubtitles: Write the video subtitles to a file
	305	writeautomaticsub: Write the automatically generated subtitles to a file
	306	listsubtitles: Lists all available subtitles for the video
	307	subtitlesformat: The format code for subtitles
	308	subtitleslangs: List of languages of the subtitles to download (can be regex).
	309	The list may contain "all" to refer to all the available
	310	subtitles. The language can be prefixed with a "-" to
	311	exclude it from the requested languages, e.g. ['all', '-live_chat']
	312	keepvideo: Keep the video file after post-processing
	313	daterange: A utils.DateRange object, download only if the upload_date is in the range.
	314	skip_download: Skip the actual download of the video file
	315	cachedir: Location of the cache files in the filesystem.
	316	False to disable filesystem cache.
	317	noplaylist: Download single video instead of a playlist if in doubt.
	318	age_limit: An integer representing the user's age in years.
	319	Unsuitable videos for the given age are skipped.
	320	min_views: An integer representing the minimum view count the video
	321	must have in order to not be skipped.
	322	Videos without view count information are always
	323	downloaded. None for no limit.
	324	max_views: An integer representing the maximum view count.
	325	Videos that are more popular than that are not
	326	downloaded.
	327	Videos without view count information are always
	328	downloaded. None for no limit.
	329	download_archive: A set, or the name of a file where all downloads are recorded.
	330	Videos already present in the file are not downloaded again.
	331	break_on_existing: Stop the download process after attempting to download a
	332	file that is in the archive.
	333	break_per_url: Whether break_on_reject and break_on_existing
	334	should act on each input URL as opposed to for the entire queue
	335	cookiefile: File name or text stream from where cookies should be read and dumped to
	336	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	337	name/path from where cookies are loaded, the name of the keyring,
	338	and the container name, e.g. ('chrome', ) or
	339	('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
	340	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	341	support RFC 5746 secure renegotiation
	342	nocheckcertificate: Do not verify SSL certificates
	343	client_certificate: Path to client certificate file in PEM format. May include the private key
	344	client_certificate_key: Path to private key file for client certificate
	345	client_certificate_password: Password for client certificate private key, if encrypted.
	346	If not provided and the key is encrypted, yt-dlp will ask interactively
	347	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	348	(Only supported by some extractors)
	349	enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
	350	http_headers: A dictionary of custom headers to be used for all requests
	351	proxy: URL of the proxy server to use
	352	geo_verification_proxy: URL of the proxy to use for IP address verification
	353	on geo-restricted sites.
	354	socket_timeout: Time to wait for unresponsive hosts, in seconds
	355	bidi_workaround: Work around buggy terminals without bidirectional text
	356	support, using fridibi
	357	debug_printtraffic:Print out sent and received HTTP traffic
	358	default_search: Prepend this string if an input url is not valid.
	359	'auto' for elaborate guessing
	360	encoding: Use this encoding instead of the system-specified.
	361	extract_flat: Whether to resolve and process url_results further
	362	* False: Always process. Default for API
	363	* True: Never process
	364	* 'in_playlist': Do not process inside playlist/multi_video
	365	* 'discard': Always process, but don't return the result
	366	from inside playlist/multi_video
	367	* 'discard_in_playlist': Same as "discard", but only for
	368	playlists (not multi_video). Default for CLI
	369	wait_for_video: If given, wait for scheduled streams to become available.
	370	The value should be a tuple containing the range
	371	(min_secs, max_secs) to wait between retries
	372	postprocessors: A list of dictionaries, each with an entry
	373	* key: The name of the postprocessor. See
	374	yt_dlp/postprocessor/__init__.py for a list.
	375	* when: When to run the postprocessor. Allowed values are
	376	the entries of utils.POSTPROCESS_WHEN
	377	Assumed to be 'post_process' if not given
	378	progress_hooks: A list of functions that get called on download
	379	progress, with a dictionary with the entries
	380	* status: One of "downloading", "error", or "finished".
	381	Check this first and ignore unknown values.
	382	* info_dict: The extracted info_dict
	383
	384	If status is one of "downloading", or "finished", the
	385	following properties may also be present:
	386	* filename: The final filename (always present)
	387	* tmpfilename: The filename we're currently writing to
	388	* downloaded_bytes: Bytes on disk
	389	* total_bytes: Size of the whole file, None if unknown
	390	* total_bytes_estimate: Guess of the eventual file size,
	391	None if unavailable.
	392	* elapsed: The number of seconds since download started.
	393	* eta: The estimated time in seconds, None if unknown
	394	* speed: The download speed in bytes/second, None if
	395	unknown
	396	* fragment_index: The counter of the currently
	397	downloaded video fragment.
	398	* fragment_count: The number of fragments (= individual
	399	files that will be merged)
	400
	401	Progress hooks are guaranteed to be called at least once
	402	(with status "finished") if the download is successful.
	403	postprocessor_hooks: A list of functions that get called on postprocessing
	404	progress, with a dictionary with the entries
	405	* status: One of "started", "processing", or "finished".
	406	Check this first and ignore unknown values.
	407	* postprocessor: Name of the postprocessor
	408	* info_dict: The extracted info_dict
	409
	410	Progress hooks are guaranteed to be called at least twice
	411	(with status "started" and "finished") if the processing is successful.
	412	merge_output_format: "/" separated list of extensions to use when merging formats.
	413	final_ext: Expected final extension; used to detect when the file was
	414	already downloaded and converted
	415	fixup: Automatically correct known faults of the file.
	416	One of:
	417	- "never": do nothing
	418	- "warn": only emit a warning
	419	- "detect_or_warn": check whether we can do anything
	420	about it, warn otherwise (default)
	421	source_address: Client-side IP address to bind to.
	422	impersonate: Client to impersonate for requests.
	423	An ImpersonateTarget (from yt_dlp.networking.impersonate)
	424	sleep_interval_requests: Number of seconds to sleep between requests
	425	during extraction
	426	sleep_interval: Number of seconds to sleep before each download when
	427	used alone or a lower bound of a range for randomized
	428	sleep before each download (minimum possible number
	429	of seconds to sleep) when used along with
	430	max_sleep_interval.
	431	max_sleep_interval:Upper bound of a range for randomized sleep before each
	432	download (maximum possible number of seconds to sleep).
	433	Must only be used along with sleep_interval.
	434	Actual sleep time will be a random float from range
	435	[sleep_interval; max_sleep_interval].
	436	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	437	listformats: Print an overview of available video formats and exit.
	438	list_thumbnails: Print a table of all thumbnails and exit.
	439	match_filter: A function that gets called for every video with the signature
	440	(info_dict, *, incomplete: bool) -> Optional[str]
	441	For backward compatibility with youtube-dl, the signature
	442	(info_dict) -> Optional[str] is also allowed.
	443	- If it returns a message, the video is ignored.
	444	- If it returns None, the video is downloaded.
	445	- If it returns utils.NO_DEFAULT, the user is interactively
	446	asked whether to download the video.
	447	- Raise utils.DownloadCancelled(msg) to abort remaining
	448	downloads when a video is rejected.
	449	match_filter_func in utils/_utils.py is one example for this.
	450	color: A Dictionary with output stream names as keys
	451	and their respective color policy as values.
	452	Can also just be a single color policy,
	453	in which case it applies to all outputs.
	454	Valid stream names are 'stdout' and 'stderr'.
	455	Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
	456	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	457	HTTP header
	458	geo_bypass_country:
	459	Two-letter ISO 3166-2 country code that will be used for
	460	explicit geographic restriction bypassing via faking
	461	X-Forwarded-For HTTP header
	462	geo_bypass_ip_block:
	463	IP range in CIDR notation that will be used similarly to
	464	geo_bypass_country
	465	external_downloader: A dictionary of protocol keys and the executable of the
	466	external downloader to use for it. The allowed protocols
	467	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	468	Set the value to 'native' to use the native downloader
	469	compat_opts: Compatibility options. See "Differences in default behavior".
	470	The following options do not work when used through the API:
	471	filename, abort-on-error, multistreams, no-live-chat,
	472	format-sort, no-clean-infojson, no-playlist-metafiles,
	473	no-keep-subs, no-attach-info-json, allow-unsafe-ext.
	474	Refer __init__.py for their implementation
	475	progress_template: Dictionary of templates for progress outputs.
	476	Allowed keys are 'download', 'postprocess',
	477	'download-title' (console title) and 'postprocess-title'.
	478	The template is mapped on a dictionary with keys 'progress' and 'info'
	479	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	480	as argument and returns the time to sleep in seconds.
	481	Allowed keys are 'http', 'fragment', 'file_access'
	482	download_ranges: A callback function that gets called for every video with
	483	the signature (info_dict, ydl) -> Iterable[Section].
	484	Only the returned sections will be downloaded.
	485	Each Section is a dict with the following keys:
	486	* start_time: Start time of the section in seconds
	487	* end_time: End time of the section in seconds
	488	* title: Section title (Optional)
	489	* index: Section number (Optional)
	490	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	491	noprogress: Do not print the progress bar
	492	live_from_start: Whether to download livestreams videos from the start
	493
	494	The following parameters are not used by YoutubeDL itself, they are used by
	495	the downloader (see yt_dlp/downloader/common.py):
	496	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	497	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	498	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	499	external_downloader_args, concurrent_fragment_downloads, progress_delta.
	500

1

import collections

2

import contextlib

3

import copy

4

import datetime as dt

import errno

import fileinput

import functools

import http.cookiejar

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import string

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

from .cache import Cache

28

from .compat import urllib # isort: split

29

from .compat import compat_os_name, urllib_req_to_req

30

from .cookies import LenientSimpleCookie, load_cookies

31

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

32

from .downloader.rtmp import rtmpdump_version

33

from .extractor import gen_extractor_classes, get_info_extractor

34

from .extractor.common import UnsupportedURLIE

35

from .extractor.openload import PhantomJSwrapper

36

from .minicurses import format_text

37

from .networking import HEADRequest, Request, RequestDirector

38

from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES

39

from .networking.exceptions import (

40

HTTPError,

41

NoSupportingHandlers,

RequestError,

SSLError,

network_exceptions,

)

from .networking.impersonate import ImpersonateRequestHandler

47

from .plugins import directories as plugin_directories

48

from .postprocessor import _PLUGIN_CLASSES as plugin_pps

49

from .postprocessor import (

50

EmbedThumbnailPP,

51

FFmpegFixupDuplicateMoovPP,

52

FFmpegFixupDurationPP,

53

FFmpegFixupM3u8PP,

54

FFmpegFixupM4aPP,

55

FFmpegFixupStretchedPP,

56

FFmpegFixupTimestampPP,

57

FFmpegMergerPP,

58

FFmpegPostProcessor,

59

FFmpegVideoConvertorPP,

60

MoveFilesAfterDownloadPP,

61

get_postprocessor,

62

)

63

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

64

from .update import (

65

REPOSITORY,

66

_get_system_deprecation,

_make_label,

current_git_head,

detect_variant,

)

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

FormatSorter,

GeoRestrictedError,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

101

SameFileError,

102

UnavailableVideoError,

UserNotLive,

YoutubeDLError,

age_restricted,

bug_reports_message,

date_from_str,

deprecation_warning,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

escapeHTML,

expand_path,

extract_basic_auth,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

is_path_like,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

number_of_digits,

orderedSet,

orderedSet_from_options,

parse_filesize,

preferredencoding,

prepend_extension,

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

shell_quote,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

filesize_from_tbr,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .utils._utils import _UnsafeExtensionError, _YDLLogger

163

from .utils.networking import (

HTTPHeaderDict,

clean_headers,

clean_proxies,

std_headers,

)

from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__

170

171

if compat_os_name == 'nt':

import ctypes

def _catch_unsafe_extension_error(func):

176

@functools.wraps(func)

177

def wrapper(self, *args, **kwargs):

178

try:

179

return func(self, *args, **kwargs)

180

except _UnsafeExtensionError as error:

181

self.report_error(

182

f'The extracted extension ({error.extension!r}) is unusual '

183

'and will be skipped for safety reasons. '

184

f'If you believe this is an error{bug_reports_message(",")}')

return wrapper

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

193

actual video file and writing it to disk if the user has requested

194

it, among some other tasks. In most cases there should be one per

195

program. As, given a video URL, the downloader doesn't know how to

196

extract all the needed information, task that InfoExtractors do, it

197

has to pass the URL to one of them.

198

199

For this, YoutubeDL objects have a method that allows

200

InfoExtractors to be registered in a given order. When it is passed

201

a URL, the YoutubeDL object handles it to the first InfoExtractor it

202

finds that reports being able to handle it. The InfoExtractor extracts

203

all the information about the video or videos the URL refers to, and

204

YoutubeDL process the extracted information, possibly using a File

205

Downloader to download the video.

206

207

YoutubeDL objects accept a lot of parameters. In order not to saturate

208

the object constructor with arguments, it receives a dictionary of

209

options instead. These options are available through the params

210

attribute for the InfoExtractors to use. The YoutubeDL also

211

registers itself as the downloader in charge for the InfoExtractors

212

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

217

password: Password for authentication purposes.

218

videopassword: Password for accessing a video.

219

ap_mso: Adobe Pass multiple-system operator identifier.

220

ap_username: Multiple-system operator account username.

221

ap_password: Multiple-system operator account password.

222

usenetrc: Use netrc for authentication instead.

223

netrc_location: Location of the netrc file. Defaults to ~/.netrc.

224

netrc_cmd: Use a shell command to get credentials

225

verbose: Print additional info to stdout.

226

quiet: Do not print messages to stdout.

227

no_warnings: Do not print out anything for warnings.

228

forceprint: A dict with keys WHEN mapped to a list of templates to

229

print to stdout. The allowed keys are video or any of the

230

items in utils.POSTPROCESS_WHEN.

231

For compatibility, a single list is also accepted

232

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

233

a list of tuples with (template, filename)

234

forcejson: Force printing info_dict as JSON.

235

dump_single_json: Force printing the info_dict of the whole playlist

236

(or video) as a single JSON line.

237

force_write_download_archive: Force writing download archive regardless

238

of 'skip_download' or 'simulate'.

239

simulate: Do not download the video files. If unset (or None),

240

simulate only if listsubtitles, listformats or list_thumbnails is used

241

format: Video format code. see "FORMAT SELECTION" for more details.

242

You can also pass a function. The function takes 'ctx' as

243

argument and returns the formats to download.

244

See "build_format_selector" for an implementation

245

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

246

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

247

extracting metadata even if the video is not actually

248

available for download (experimental)

249

format_sort: A list of fields by which to sort the video formats.

250

See "Sorting Formats" for more details.

251

format_sort_force: Force the given format_sort. see "Sorting Formats"

252

for more details.

253

prefer_free_formats: Whether to prefer video formats with free containers

254

over non-free ones of same quality.

255

allow_multiple_video_streams: Allow multiple video streams to be merged

256

into a single file

257

allow_multiple_audio_streams: Allow multiple audio streams to be merged

258

into a single file

259

check_formats Whether to test if the formats are downloadable.

260

Can be True (check all), False (check none),

261

'selected' (check selected formats),

262

or None (check only if requested by extractor)

263

paths: Dictionary of output paths. The allowed keys are 'home'

264

'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)

265

outtmpl: Dictionary of templates for output names. Allowed keys

266

are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).

267

For compatibility with youtube-dl, a single string can also be used

268

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

269

restrictfilenames: Do not allow "&" and spaces in file names

270

trim_file_name: Limit length of filename (extension excluded)

271

windowsfilenames: Force the filenames to be windows compatible

272

ignoreerrors: Do not stop on download/postprocessing errors.

273

Can be 'only_download' to ignore only download errors.

274

Default is 'only_download' for CLI, but False for API

275

skip_playlist_after_errors: Number of allowed failures until the rest of

276

the playlist is skipped

277

allowed_extractors: List of regexes to match against extractor names that are allowed

278

overwrites: Overwrite all video and metadata files if True,

279

overwrite only non-video files if None

280

and don't overwrite any file if False

281

playlist_items: Specific indices of playlist to download.

282

playlistrandom: Download playlist items in random order.

283

lazy_playlist: Process playlist entries as they are received.

284

matchtitle: Download only matching titles.

285

rejecttitle: Reject downloads for matching titles.

286

logger: Log messages to a logging.Logger instance.

287

logtostderr: Print everything to stderr instead of stdout.

288

consoletitle: Display progress in console window's titlebar.

289

writedescription: Write the video description to a .description file

290

writeinfojson: Write the video description to a .info.json file

291

clean_infojson: Remove internal metadata from the infojson

292

getcomments: Extract video comments. This will not be written to disk

293

unless writeinfojson is also given

294

writeannotations: Write the video annotations to a .annotations.xml file

295

writethumbnail: Write the thumbnail image to a file

296

allow_playlist_files: Whether to write playlists' description, infojson etc

297

also to disk when using the 'write*' options

298

write_all_thumbnails: Write all thumbnail formats to files

299

writelink: Write an internet shortcut file, depending on the

300

current platform (.url/.webloc/.desktop)

301

writeurllink: Write a Windows internet shortcut file (.url)

302

writewebloclink: Write a macOS internet shortcut file (.webloc)

303

writedesktoplink: Write a Linux internet shortcut file (.desktop)

304

writesubtitles: Write the video subtitles to a file

305

writeautomaticsub: Write the automatically generated subtitles to a file

306

listsubtitles: Lists all available subtitles for the video

307

subtitlesformat: The format code for subtitles

308

subtitleslangs: List of languages of the subtitles to download (can be regex).

309

The list may contain "all" to refer to all the available

310

subtitles. The language can be prefixed with a "-" to

311

exclude it from the requested languages, e.g. ['all', '-live_chat']

312

keepvideo: Keep the video file after post-processing

313

daterange: A utils.DateRange object, download only if the upload_date is in the range.

314

skip_download: Skip the actual download of the video file

315

cachedir: Location of the cache files in the filesystem.

316

False to disable filesystem cache.

317

noplaylist: Download single video instead of a playlist if in doubt.

318

age_limit: An integer representing the user's age in years.

319

Unsuitable videos for the given age are skipped.

320

min_views: An integer representing the minimum view count the video

321

must have in order to not be skipped.

322

Videos without view count information are always

323

downloaded. None for no limit.

324

max_views: An integer representing the maximum view count.

325

Videos that are more popular than that are not

326

downloaded.

327

Videos without view count information are always

328

downloaded. None for no limit.

329

download_archive: A set, or the name of a file where all downloads are recorded.

330

Videos already present in the file are not downloaded again.

331

break_on_existing: Stop the download process after attempting to download a

332

file that is in the archive.

333

break_per_url: Whether break_on_reject and break_on_existing

334

should act on each input URL as opposed to for the entire queue

335

cookiefile: File name or text stream from where cookies should be read and dumped to

336

cookiesfrombrowser: A tuple containing the name of the browser, the profile

337

name/path from where cookies are loaded, the name of the keyring,

338

and the container name, e.g. ('chrome', ) or

339

('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')

340

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

341

support RFC 5746 secure renegotiation

342

nocheckcertificate: Do not verify SSL certificates

343

client_certificate: Path to client certificate file in PEM format. May include the private key

344

client_certificate_key: Path to private key file for client certificate

345

client_certificate_password: Password for client certificate private key, if encrypted.

346

If not provided and the key is encrypted, yt-dlp will ask interactively

347

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

348

(Only supported by some extractors)

349

enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.

350

http_headers: A dictionary of custom headers to be used for all requests

351

proxy: URL of the proxy server to use

352

geo_verification_proxy: URL of the proxy to use for IP address verification

353

on geo-restricted sites.

354

socket_timeout: Time to wait for unresponsive hosts, in seconds

355

bidi_workaround: Work around buggy terminals without bidirectional text

356

support, using fridibi

357

debug_printtraffic:Print out sent and received HTTP traffic

358

default_search: Prepend this string if an input url is not valid.

359

'auto' for elaborate guessing

360

encoding: Use this encoding instead of the system-specified.

361

extract_flat: Whether to resolve and process url_results further

362

* False: Always process. Default for API

363

* True: Never process

364

* 'in_playlist': Do not process inside playlist/multi_video

365

* 'discard': Always process, but don't return the result

366

from inside playlist/multi_video

367

* 'discard_in_playlist': Same as "discard", but only for

368

playlists (not multi_video). Default for CLI

369

wait_for_video: If given, wait for scheduled streams to become available.

370

The value should be a tuple containing the range

371

(min_secs, max_secs) to wait between retries

372

postprocessors: A list of dictionaries, each with an entry

373

* key: The name of the postprocessor. See

374

yt_dlp/postprocessor/__init__.py for a list.

375

* when: When to run the postprocessor. Allowed values are

376

the entries of utils.POSTPROCESS_WHEN

377

Assumed to be 'post_process' if not given

378

progress_hooks: A list of functions that get called on download

379

progress, with a dictionary with the entries

380

* status: One of "downloading", "error", or "finished".

381

Check this first and ignore unknown values.

382

* info_dict: The extracted info_dict

383

384

If status is one of "downloading", or "finished", the

385

following properties may also be present:

386

* filename: The final filename (always present)

387

* tmpfilename: The filename we're currently writing to

388

* downloaded_bytes: Bytes on disk

389

* total_bytes: Size of the whole file, None if unknown

390

* total_bytes_estimate: Guess of the eventual file size,

391

None if unavailable.

392

* elapsed: The number of seconds since download started.

393

* eta: The estimated time in seconds, None if unknown

394

* speed: The download speed in bytes/second, None if

395

unknown

396

* fragment_index: The counter of the currently

397

downloaded video fragment.

398

* fragment_count: The number of fragments (= individual

399

files that will be merged)

400

401

Progress hooks are guaranteed to be called at least once

402

(with status "finished") if the download is successful.

403

postprocessor_hooks: A list of functions that get called on postprocessing

404

progress, with a dictionary with the entries

405

* status: One of "started", "processing", or "finished".

406

Check this first and ignore unknown values.

407

* postprocessor: Name of the postprocessor

408

* info_dict: The extracted info_dict

409

410

Progress hooks are guaranteed to be called at least twice

411

(with status "started" and "finished") if the processing is successful.

412

merge_output_format: "/" separated list of extensions to use when merging formats.

413

final_ext: Expected final extension; used to detect when the file was

414

already downloaded and converted

415

fixup: Automatically correct known faults of the file.

416

One of:

417

- "never": do nothing

418

- "warn": only emit a warning

419

- "detect_or_warn": check whether we can do anything

420

about it, warn otherwise (default)

421

source_address: Client-side IP address to bind to.

422

impersonate: Client to impersonate for requests.

423

An ImpersonateTarget (from yt_dlp.networking.impersonate)

424

sleep_interval_requests: Number of seconds to sleep between requests

425

during extraction

426

sleep_interval: Number of seconds to sleep before each download when

427

used alone or a lower bound of a range for randomized

428

sleep before each download (minimum possible number

429

of seconds to sleep) when used along with

430

max_sleep_interval.

431

max_sleep_interval:Upper bound of a range for randomized sleep before each

432

download (maximum possible number of seconds to sleep).

433

Must only be used along with sleep_interval.

434

Actual sleep time will be a random float from range

435

[sleep_interval; max_sleep_interval].

436

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

437

listformats: Print an overview of available video formats and exit.

438

list_thumbnails: Print a table of all thumbnails and exit.

439

match_filter: A function that gets called for every video with the signature

440

(info_dict, *, incomplete: bool) -> Optional[str]

441

For backward compatibility with youtube-dl, the signature

442

(info_dict) -> Optional[str] is also allowed.

443

- If it returns a message, the video is ignored.

444

- If it returns None, the video is downloaded.

445

- If it returns utils.NO_DEFAULT, the user is interactively

446

asked whether to download the video.

447

- Raise utils.DownloadCancelled(msg) to abort remaining

448

downloads when a video is rejected.

449

match_filter_func in utils/_utils.py is one example for this.

450

color: A Dictionary with output stream names as keys

451

and their respective color policy as values.

452

Can also just be a single color policy,

453

in which case it applies to all outputs.

454

Valid stream names are 'stdout' and 'stderr'.

455

Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.

456

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

457

HTTP header

458

geo_bypass_country:

459

Two-letter ISO 3166-2 country code that will be used for

460

explicit geographic restriction bypassing via faking

461

X-Forwarded-For HTTP header

462

geo_bypass_ip_block:

463

IP range in CIDR notation that will be used similarly to

464

geo_bypass_country

465

external_downloader: A dictionary of protocol keys and the executable of the

466

external downloader to use for it. The allowed protocols

467

468

Set the value to 'native' to use the native downloader

469

compat_opts: Compatibility options. See "Differences in default behavior".

470

The following options do not work when used through the API:

471

filename, abort-on-error, multistreams, no-live-chat,

472

format-sort, no-clean-infojson, no-playlist-metafiles,

473

no-keep-subs, no-attach-info-json, allow-unsafe-ext.

474

Refer __init__.py for their implementation

475

progress_template: Dictionary of templates for progress outputs.

476

Allowed keys are 'download', 'postprocess',

477

'download-title' (console title) and 'postprocess-title'.

478

The template is mapped on a dictionary with keys 'progress' and 'info'

479

retry_sleep_functions: Dictionary of functions that takes the number of attempts

480

as argument and returns the time to sleep in seconds.

481

Allowed keys are 'http', 'fragment', 'file_access'

482

download_ranges: A callback function that gets called for every video with

483

the signature (info_dict, ydl) -> Iterable[Section].

484

Only the returned sections will be downloaded.

485

Each Section is a dict with the following keys:

486

* start_time: Start time of the section in seconds

487

* end_time: End time of the section in seconds

488

* title: Section title (Optional)

489

* index: Section number (Optional)

490

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

491

noprogress: Do not print the progress bar

492

live_from_start: Whether to download livestreams videos from the start

493

494

The following parameters are not used by YoutubeDL itself, they are used by

495

the downloader (see yt_dlp/downloader/common.py):

496

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

497

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

498

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

499

external_downloader_args, concurrent_fragment_downloads, progress_delta.

500

501

The following options are used by the post processors:

502

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

503

to the binary or its containing directory.

504

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

505

and a list of additional command-line arguments for the

506

postprocessor/executable. The dict can also have "PP+EXE" keys

507

which are used when the given exe is used by the given PP.

508

Use 'default' as the name for arguments to passed to all PP

509

For compatibility with youtube-dl, a single list of args

510

can also be used

511

512

The following options are used by the extractors:

513

extractor_retries: Number of times to retry for known errors (default: 3)

514

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

515

hls_split_discontinuity: Split HLS playlists to different formats at

516

discontinuities such as ad breaks (default: False)

517

extractor_args: A dictionary of arguments to be passed to the extractors.

518

See "EXTRACTOR ARGUMENTS" for details.

519

E.g. {'youtube': {'skip': ['dash', 'hls']}}

520

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

521

522

The following options are deprecated and may be removed in the future:

523

524

break_on_reject: Stop the download process when encountering a video that

525

has been filtered out.

526

- `raise DownloadCancelled(msg)` in match_filter instead

527

force_generic_extractor: Force downloader to use the generic extractor

528

- Use allowed_extractors = ['generic', 'default']

529

playliststart: - Use playlist_items

530

Playlist item to start at.

531

playlistend: - Use playlist_items

532

Playlist item to end at.

533

playlistreverse: - Use playlist_items

534

Download playlist items in reverse order.

535

forceurl: - Use forceprint

536

Force printing final URL.

537

forcetitle: - Use forceprint

538

Force printing title.

539

forceid: - Use forceprint

540

Force printing ID.

541

forcethumbnail: - Use forceprint

542

Force printing thumbnail URL.

543

forcedescription: - Use forceprint

544

Force printing description.

545

forcefilename: - Use forceprint

546

Force printing final filename.

547

forceduration: - Use forceprint

548

Force printing duration.

549

allsubtitles: - Use subtitleslangs = ['all']

550

Downloads all the subtitles of the video

551

(requires writesubtitles or writeautomaticsub)

552

include_ads: - Doesn't work

553

Download ads as well

554

call_home: - Not implemented

555

Boolean, true iff we are allowed to contact the

556

yt-dlp servers for debugging.

557

post_hooks: - Register a custom postprocessor

558

A list of functions that get called as the final step

559

for each video file, after all postprocessors have been

560

called. The filename will be passed as the only argument.

561

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

562

Use the native HLS downloader instead of ffmpeg/avconv

563

if True, otherwise use ffmpeg/avconv if False, otherwise

564

use downloader suggested by extractor if None.

565

prefer_ffmpeg: - avconv support is deprecated

566

If False, use avconv instead of ffmpeg if both are available,

567

otherwise prefer ffmpeg.

568

youtube_include_dash_manifest: - Use extractor_args

569

If True (default), DASH manifests and related

570

data will be downloaded and processed by extractor.

571

You can reduce network I/O by disabling it if you don't

572

care about DASH. (only for youtube)

573

youtube_include_hls_manifest: - Use extractor_args

574

If True (default), HLS manifests and related

575

data will be downloaded and processed by extractor.

576

You can reduce network I/O by disabling it if you don't

577

care about HLS. (only for youtube)

578

no_color: Same as `color='no_color'`

579

no_overwrites: Same as `overwrites=False`

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

584

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

585

'timestamp', 'release_timestamp',

586

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

587

'average_rating', 'comment_count', 'age_limit',

588

'start_time', 'end_time',

589

'chapter_number', 'season_number', 'episode_number',

590

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

595

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

596

'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

597

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',

598

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',

599

'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',

600

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url',

601

'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version',

602

'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',

603

}

604

_deprecated_multivalue_fields = {

605

'album_artist': 'album_artists',

606

'artist': 'artists',

607

'composer': 'composers',

608

'creator': 'creators',

609

'genre': 'genres',

610

}

611

_format_selection_exts = {

612

'audio': set(MEDIA_EXTENSIONS.common_audio),

613

'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},

614

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

615

}

616

617

def __init__(self, params=None, auto_init=True):

618

"""Create a FileDownloader object with the given options.

619

@param auto_init Whether to load the default extractors and print header (if verbose).

620

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

627

self._pps = {k: [] for k in POSTPROCESS_WHEN}

628

self._printed_messages = set()

629

self._first_webpage_request = True

630

self._post_hooks = []

631

self._progress_hooks = []

632

self._postprocessor_hooks = []

633

self._download_retcode = 0

634

self._num_downloads = 0

635

self._num_videos = 0

636

self._playlist_level = 0

637

self._playlist_urls = set()

638

self.cache = Cache(self)

639

self.__header_cookies = []

640

641

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

642

self._out_files = Namespace(

643

out=stdout,

644

error=sys.stderr,

645

screen=sys.stderr if self.params.get('quiet') else stdout,

646

console=None if compat_os_name == 'nt' else next(

647

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),

)

try:

windows_enable_vt_mode()

652

except Exception as e:

653

self.write_debug(f'Failed to enable VT mode: {e}')

654

655

if self.params.get('no_color'):

656

if self.params.get('color') is not None:

657

self.params.setdefault('_warnings', []).append(

658

'Overwriting params from "color" with "no_color"')

659

self.params['color'] = 'no_color'

660

661

term_allow_color = os.getenv('TERM', '').lower() != 'dumb'

662

no_color = bool(os.getenv('NO_COLOR'))

663

664

def process_color_policy(stream):

665

stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]

666

policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)

667

if policy in ('auto', None):

668

if term_allow_color and supports_terminal_sequences(stream):

669

return 'no_color' if no_color else True

670

return False

671

assert policy in ('always', 'never', 'no_color'), policy

672

return {'always': True, 'never': False}.get(policy, policy)

673

674

self._allow_colors = Namespace(**{

675

name: process_color_policy(stream)

676

for name, stream in self._out_files.items_ if name != 'console'

677

})

678

679

system_deprecation = _get_system_deprecation()

680

if system_deprecation:

681

self.deprecated_feature(system_deprecation.replace('\n', '\n '))

682

683

if self.params.get('allow_unplayable_formats'):

684

self.report_warning(

685

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

686

'This is a developer option intended for debugging. \n'

687

' If you experience any issues while using this option, '

688

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

689

690

if self.params.get('bidi_workaround', False):

691

try:

692

import pty

693

master, slave = pty.openpty()

694

width = shutil.get_terminal_size().columns

695

width_args = [] if width is None else ['-w', str(width)]

696

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

697

try:

698

self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)

699

except OSError:

700

self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)

701

self._output_channel = os.fdopen(master, 'rb')

702

except OSError as ose:

703

if ose.errno == errno.ENOENT:

704

self.report_warning(

705

'Could not find fribidi executable, ignoring --bidi-workaround. '

706

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

711

self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))

712

self._load_cookies(self.params['http_headers'].get('Cookie')) # compat

713

self.params['http_headers'].pop('Cookie', None)

714

715

if auto_init and auto_init != 'no_verbose_header':

716

self.print_debug_header()

717

718

def check_deprecated(param, option, suggestion):

719

if self.params.get(param) is not None:

720

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

725

if self.params.get('geo_verification_proxy') is None:

726

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

727

728

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

729

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

730

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

731

732

for msg in self.params.get('_warnings', []):

733

self.report_warning(msg)

734

for msg in self.params.get('_deprecation_warnings', []):

735

self.deprecated_feature(msg)

736

737

if impersonate_target := self.params.get('impersonate'):

738

if not self._impersonate_target_available(impersonate_target):

739

raise YoutubeDLError(

740

f'Impersonate target "{impersonate_target}" is not available. '

741

f'Use --list-impersonate-targets to see available targets. '

742

f'You may be missing dependencies required to support this target.')

743

744

if 'list-formats' in self.params['compat_opts']:

745

self.params['listformats_table'] = False

746

747

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

748

# nooverwrites was unnecessarily changed to overwrites

749

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

750

# This ensures compatibility with both keys

751

self.params['overwrites'] = not self.params['nooverwrites']

752

elif self.params.get('overwrites') is None:

753

self.params.pop('overwrites', None)

754

else:

755

self.params['nooverwrites'] = not self.params['overwrites']

756

757

if self.params.get('simulate') is None and any((

758

self.params.get('list_thumbnails'),

759

self.params.get('listformats'),

760

self.params.get('listsubtitles'),

761

)):

762

self.params['simulate'] = 'list_only'

763

764

self.params.setdefault('forceprint', {})

765

self.params.setdefault('print_to_file', {})

766

767

# Compatibility with older syntax

768

if not isinstance(params['forceprint'], dict):

769

self.params['forceprint'] = {'video': params['forceprint']}

770

771

if auto_init:

772

self.add_default_info_extractors()

773

774

if (sys.platform != 'win32'

775

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

776

and not self.params.get('restrictfilenames', False)):

777

# Unicode filesystem API will throw errors (#1474, #13027)

778

self.report_warning(

779

'Assuming --restrict-filenames since file system encoding '

780

'cannot encode all characters. '

781

'Set the LC_ALL environment variable to fix this.')

782

self.params['restrictfilenames'] = True

783

784

self._parse_outtmpl()

785

786

# Creating format selector here allows us to catch syntax errors before the extraction

787

self.format_selector = (

788

self.params.get('format') if self.params.get('format') in (None, '-')

789

else self.params['format'] if callable(self.params['format'])

790

else self.build_format_selector(self.params['format']))

791

792

hooks = {

793

'post_hooks': self.add_post_hook,

794

'progress_hooks': self.add_progress_hook,

795

'postprocessor_hooks': self.add_postprocessor_hook,

796

}

797

for opt, fn in hooks.items():

798

for ph in self.params.get(opt, []):

799

fn(ph)

800

801

for pp_def_raw in self.params.get('postprocessors', []):

802

pp_def = dict(pp_def_raw)

803

when = pp_def.pop('when', 'post_process')

804

self.add_post_processor(

805

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

806

when=when)

807

808

def preload_download_archive(fn):

809

"""Preload the archive, if any is specified"""

archive = set()

if fn is None:

return archive

elif not is_path_like(fn):

814

return fn

815

816

self.write_debug(f'Loading archive file {fn!r}')

817

try:

818

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

819

for line in archive_file:

820

archive.add(line.strip())

821

except OSError as ioe:

822

if ioe.errno != errno.ENOENT:

raise

return archive

self.archive = preload_download_archive(self.params.get('download_archive'))

827

828

def warn_if_short_id(self, argv):

829

# short YouTube ID starting with dash?

830

idxs = [

831

i for i, a in enumerate(argv)

832

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

837

+ ['--'] + [argv[i] for i in idxs]

838

)

839

self.report_warning(

840

'Long argument string detected. '

841

f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')

842

843

def add_info_extractor(self, ie):

844

"""Add an InfoExtractor object to the end of the list."""

845

ie_key = ie.ie_key()

846

self._ies[ie_key] = ie

847

if not isinstance(ie, type):

848

self._ies_instances[ie_key] = ie

849

ie.set_downloader(self)

850

851

def get_info_extractor(self, ie_key):

852

"""

853

Get an instance of an IE with name ie_key, it will try to get one from

854

the _ies list, if there's no instance it will create a new one and add

855

it to the extractor list.

856

"""

857

ie = self._ies_instances.get(ie_key)

858

if ie is None:

859

ie = get_info_extractor(ie_key)()

860

self.add_info_extractor(ie)

861

return ie

862

863

def add_default_info_extractors(self):

864

"""

865

Add the InfoExtractors returned by gen_extractors to the end of the list

866

"""

867

all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}

868

all_ies['end'] = UnsupportedURLIE()

869

try:

870

ie_names = orderedSet_from_options(

871

self.params.get('allowed_extractors', ['default']), {

872

'all': list(all_ies),

873

'default': [name for name, ie in all_ies.items() if ie._ENABLED],

874

}, use_regex=True)

875

except re.error as e:

876

raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')

877

for name in ie_names:

878

self.add_info_extractor(all_ies[name])

879

self.write_debug(f'Loaded {len(ie_names)} extractors')

880

881

def add_post_processor(self, pp, when='post_process'):

882

"""Add a PostProcessor object to the end of the chain."""

883

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

884

self._pps[when].append(pp)

885

pp.set_downloader(self)

886

887

def add_post_hook(self, ph):

888

"""Add the post hook"""

889

self._post_hooks.append(ph)

890

891

def add_progress_hook(self, ph):

892

"""Add the download progress hook"""

893

self._progress_hooks.append(ph)

894

895

def add_postprocessor_hook(self, ph):

896

"""Add the postprocessing progress hook"""

897

self._postprocessor_hooks.append(ph)

898

for pps in self._pps.values():

899

for pp in pps:

900

pp.add_progress_hook(ph)

901

902

def _bidi_workaround(self, message):

903

if not hasattr(self, '_output_channel'):

904

return message

905

906

assert hasattr(self, '_output_process')

907

assert isinstance(message, str)

908

line_count = message.count('\n') + 1

909

self._output_process.stdin.write((message + '\n').encode())

910

self._output_process.stdin.flush()

911

res = ''.join(self._output_channel.readline().decode()

912

for _ in range(line_count))

913

return res[:-len('\n')]

914

915

def _write_string(self, message, out=None, only_once=False):

916

if only_once:

917

if message in self._printed_messages:

918

return

919

self._printed_messages.add(message)

920

write_string(message, out=out, encoding=self.params.get('encoding'))

921

922

def to_stdout(self, message, skip_eol=False, quiet=None):

923

"""Print message to stdout"""

924

if quiet is not None:

925

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '

926

'Use "YoutubeDL.to_screen" instead')

927

if skip_eol is not False:

928

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '

929

'Use "YoutubeDL.to_screen" instead')

930

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

931

932

def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):

933

"""Print message to screen if not in quiet mode"""

934

if self.params.get('logger'):

935

self.params['logger'].debug(message)

936

return

937

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

938

return

939

self._write_string(

940

'{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),

941

self._out_files.screen, only_once=only_once)

942

943

def to_stderr(self, message, only_once=False):

944

"""Print message to stderr"""

945

assert isinstance(message, str)

946

if self.params.get('logger'):

947

self.params['logger'].error(message)

948

else:

949

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

950

951

def _send_console_code(self, code):

952

if compat_os_name == 'nt' or not self._out_files.console:

953

return

954

self._write_string(code, self._out_files.console)

955

956

def to_console_title(self, message):

957

if not self.params.get('consoletitle', False):

958

return

959

message = remove_terminal_sequences(message)

960

if compat_os_name == 'nt':

961

if ctypes.windll.kernel32.GetConsoleWindow():

962

# c_wchar_p() might not be necessary if `message` is

963

# already of type unicode()

964

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

965

else:

966

self._send_console_code(f'\033]0;{message}\007')

967

968

def save_console_title(self):

969

if not self.params.get('consoletitle') or self.params.get('simulate'):

970

return

971

self._send_console_code('\033[22;0t') # Save the title on stack

972

973

def restore_console_title(self):

974

if not self.params.get('consoletitle') or self.params.get('simulate'):

975

return

976

self._send_console_code('\033[23;0t') # Restore the title from stack

977

978

def __enter__(self):

979

self.save_console_title()

980

return self

981

982

def save_cookies(self):

983

if self.params.get('cookiefile') is not None:

984

self.cookiejar.save()

985

986

def __exit__(self, *args):

987

self.restore_console_title()

self.close()

def close(self):

self.save_cookies()

if '_request_director' in self.__dict__:

993

self._request_director.close()

994

del self._request_director

995

996

def trouble(self, message=None, tb=None, is_error=True):

997

"""Determine action to take when a download problem appears.

998

999

Depending on if the downloader has been configured to ignore

1000

download errors or not, this method may throw an exception or

1001

not when errors are found, after printing the message.

1002

1003

@param tb If given, is additional traceback information

1004

@param is_error Whether to raise error according to ignorerrors

1005

"""

1006

if message is not None:

1007

self.to_stderr(message)

1008

if self.params.get('verbose'):

1009

if tb is None:

1010

if sys.exc_info()[0]: # if .trouble has been called from an except block

1011

tb = ''

1012

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

1013

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

1014

tb += encode_compat_str(traceback.format_exc())

1015

else:

1016

tb_data = traceback.format_list(traceback.extract_stack())

1017

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

1023

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

1024

exc_info = sys.exc_info()[1].exc_info

1025

else:

1026

exc_info = sys.exc_info()

1027

raise DownloadError(message, exc_info)

1028

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

BAD_FORMAT='light red',

1038

WARNING='yellow',

1039

SUPPRESS='light black',

1040

)

1041

1042

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

1047

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

1048

text = text.encode(encoding, 'ignore').decode(encoding)

1049

if fallback is not None and text != original_text:

1050

text = fallback

1051

return format_text(text, f) if allow_colors is True else text if fallback is None else fallback

1052

1053

def _format_out(self, *args, **kwargs):

1054

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

1055

1056

def _format_screen(self, *args, **kwargs):

1057

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

1058

1059

def _format_err(self, *args, **kwargs):

1060

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

1061

1062

def report_warning(self, message, only_once=False):

1063

"""

1064

Print the message to stderr, it will be prefixed with 'WARNING:'

1065

If stderr is a tty file the 'WARNING:' will be colored

1066

"""

1067

if self.params.get('logger') is not None:

1068

self.params['logger'].warning(message)

1069

else:

1070

if self.params.get('no_warnings'):

1071

return

1072

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

1073

1074

def deprecation_warning(self, message, *, stacklevel=0):

1075

deprecation_warning(

1076

message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)

1077

1078

def deprecated_feature(self, message):

1079

if self.params.get('logger') is not None:

1080

self.params['logger'].warning(f'Deprecated Feature: {message}')

1081

self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)

1082

1083

def report_error(self, message, *args, **kwargs):

1084

"""

1085

Do the same as trouble, but prefixes the message with 'ERROR:', colored

1086

in red if stderr is a tty file.

1087

"""

1088

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

1089

1090

def write_debug(self, message, only_once=False):

1091

"""Log debug message or Print message to stderr"""

1092

if not self.params.get('verbose', False):

1093

return

1094

message = f'[debug] {message}'

1095

if self.params.get('logger'):

1096

self.params['logger'].debug(message)

1097

else:

1098

self.to_stderr(message, only_once)

1099

1100

def report_file_already_downloaded(self, file_name):

1101

"""Report file has already been fully downloaded."""

1102

try:

1103

self.to_screen(f'[download] {file_name} has already been downloaded')

1104

except UnicodeEncodeError:

1105

self.to_screen('[download] The file has already been downloaded')

1106

1107

def report_file_delete(self, file_name):

1108

"""Report that existing file will be deleted."""

1109

try:

1110

self.to_screen(f'Deleting existing file {file_name}')

1111

except UnicodeEncodeError:

1112

self.to_screen('Deleting existing file')

1113

1114

def raise_no_formats(self, info, forced=False, *, msg=None):

1115

has_drm = info.get('_has_drm')

1116

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1117

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1118

if forced or not ignored:

1119

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1120

expected=has_drm or ignored or expected)

1121

else:

1122

self.report_warning(msg)

1123

1124

def parse_outtmpl(self):

1125

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1126

self._parse_outtmpl()

1127

return self.params['outtmpl']

1128

1129

def _parse_outtmpl(self):

1130

sanitize = IDENTITY

1131

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1132

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1133

1134

outtmpl = self.params.setdefault('outtmpl', {})

1135

if not isinstance(outtmpl, dict):

1136

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1137

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1138

1139

def get_output_path(self, dir_type='', filename=None):

1140

paths = self.params.get('paths', {})

1141

assert isinstance(paths, dict), '"paths" parameter must be a dictionary'

1142

path = os.path.join(

1143

expand_path(paths.get('home', '').strip()),

1144

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1145

filename or '')

1146

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1147

1148

@staticmethod

1149

def _outtmpl_expandpath(outtmpl):

1150

# expand_path translates '%%' into '%' and '$$' into '$'

1151

# correspondingly that is not what we want since we need to keep

1152

# '%%' intact for template dict substitution step. Working around

1153

# with boundary-alike separator hack.

1154

sep = ''.join(random.choices(string.ascii_letters, k=32))

1155

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1156

1157

# outtmpl should be expand_path'ed before template dict substitution

1158

# because meta fields may contain env variables we don't want to

1159

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1160

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1161

return expand_path(outtmpl).replace(sep, '')

1162

1163

@staticmethod

1164

def escape_outtmpl(outtmpl):

1165

""" Escape any remaining strings like %s, %abc% etc. """

1166

return re.sub(

1167

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1168

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1173

""" @return None or Exception object """

1174

outtmpl = re.sub(

1175

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1176

lambda mobj: f'{mobj.group(0)[:-1]}s',

1177

cls._outtmpl_expandpath(outtmpl))

1178

try:

1179

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1180

return None

1181

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1186

info_dict = dict(info_dict)

1187

info_dict.pop('__postprocessors', None)

1188

info_dict.pop('__pending_error', None)

1189

return info_dict

1190

1191

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1192

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1193

@param sanitize Whether to sanitize the output as a filename.

1194

For backward compatibility, a function can also be passed

1195

"""

1196

1197

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1198

1199

info_dict = self._copy_infodict(info_dict)

1200

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1201

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1202

if info_dict.get('duration', None) is not None

1203

else None)

1204

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1205

info_dict['video_autonumber'] = self._num_videos

1206

if info_dict.get('resolution') is None:

1207

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1208

1209

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1210

# of %(field)s to %(field)0Nd for backward compatibility

1211

field_size_compat_map = {

1212

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1213

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1214

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

'*': float.__mul__,

}

# Field is of the form key1.key2...

1225

# where keys (except first) can be string, int, slice or "{field, ...}"

1226

FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031

1227

FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031

1228

'inner': FIELD_INNER_RE,

1229

'field': rf'\w*(?:\.{FIELD_INNER_RE})*',

1230

}

1231

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1232

MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))

1233

INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)

1234

(?P<negate>-)?

1235

(?P<fields>{FIELD_RE})

1236

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1237

(?:>(?P<strf_format>.+?))?

1238

(?P<remaining>

1239

(?P<alternate>(?<!\\),[^|&)]+)?

1240

(?:&(?P<replacement>.*?))?

1241

(?:\|(?P<default>.*?))?

1242

)$''')

1243

1244

def _from_user_input(field):

if field == ':':

return ...

elif ':' in field:

return slice(*map(int_or_none, field.split(':')))

1249

elif int_or_none(field) is not None:

return int(field)

return field

def _traverse_infodict(fields):

1254

fields = [f for x in re.split(r'\.({.+?})\.?', fields)

1255

for f in ([x] if x.startswith('{') else x.split('.'))]

1256

for i in (0, -1):

1257

if fields and not fields[i]:

1258

fields.pop(i)

1259

1260

for i, f in enumerate(fields):

1261

if not f.startswith('{'):

1262

fields[i] = _from_user_input(f)

1263

continue

1264

assert f.endswith('}'), f'No closing brace for {f} in {fields}'

1265

fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}

1266

1267

return traverse_obj(info_dict, fields, traverse_string=True)

1268

1269

def get_value(mdict):

1270

# Object traversal

1271

value = _traverse_infodict(mdict['fields'])

1272

# Negative

1273

if mdict['negate']:

1274

value = float_or_none(value)

1275

if value is not None:

1276

value *= -1

1277

# Do maths

1278

offset_key = mdict['maths']

1279

if offset_key:

1280

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1285

offset_key).group(0)

1286

offset_key = offset_key[len(item):]

1287

if operator is None:

1288

operator = MATH_FUNCTIONS[item]

1289

continue

1290

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1291

offset = float_or_none(item)

1292

if offset is None:

1293

offset = float_or_none(_traverse_infodict(item))

1294

try:

1295

value = operator(value, multiplier * offset)

1296

except (TypeError, ZeroDivisionError):

1297

return None

1298

operator = None

1299

# Datetime formatting

1300

if mdict['strf_format']:

1301

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1302

1303

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1304

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1309

1310

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1311

return sanitize_filename(str(value), restricted=restricted, is_id=(

1312

bool(re.search(r'(^|[_.])id(\.|$)', key))

1313

if 'filename-sanitization' in self.params['compat_opts']

1314

else NO_DEFAULT))

1315

1316

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1317

sanitize = bool(sanitize)

1318

1319

def _dumpjson_default(obj):

1320

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

class _ReplacementFormatter(string.Formatter):

1325

def get_field(self, field_name, args, kwargs):

1326

if field_name.isdigit():

1327

return args[0], -1

1328

raise ValueError('Unsupported field')

1329

1330

replacement_formatter = _ReplacementFormatter()

1331

1332

def create_key(outer_mobj):

1333

if not outer_mobj.group('has_key'):

1334

return outer_mobj.group(0)

1335

key = outer_mobj.group('key')

1336

mobj = re.match(INTERNAL_FORMAT_RE, key)

1337

value, replacement, default, last_field = None, None, na, ''

1338

while mobj:

1339

mobj = mobj.groupdict()

1340

default = mobj['default'] if mobj['default'] is not None else default

1341

value = get_value(mobj)

1342

last_field, replacement = mobj['fields'], mobj['replacement']

1343

if value is None and mobj['alternate']:

1344

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

if None not in (value, replacement):

1349

try:

1350

value = replacement_formatter.format(replacement, value)

1351

except ValueError:

1352

value, default = None, na

1353

1354

fmt = outer_mobj.group('format')

1355

if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):

1356

fmt = f'0{field_size_compat_map[last_field]:d}d'

1357

1358

flags = outer_mobj.group('conversion') or ''

1359

str_fmt = f'{fmt[:-1]}s'

1360

if value is None:

1361

value, fmt = default, 's'

1362

elif fmt[-1] == 'l': # list

1363

delim = '\n' if '#' in flags else ', '

1364

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1365

elif fmt[-1] == 'j': # json

1366

value, fmt = json.dumps(

1367

value, default=_dumpjson_default,

1368

indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt

1369

elif fmt[-1] == 'h': # html

1370

value, fmt = escapeHTML(str(value)), str_fmt

1371

elif fmt[-1] == 'q': # quoted

1372

value = map(str, variadic(value) if '#' in flags else [value])

1373

value, fmt = shell_quote(value, shell=True), str_fmt

1374

elif fmt[-1] == 'B': # bytes

1375

value = f'%{str_fmt}'.encode() % str(value).encode()

1376

value, fmt = value.decode('utf-8', 'ignore'), 's'

1377

elif fmt[-1] == 'U': # unicode normalized

1378

value, fmt = unicodedata.normalize(

1379

# "+" = compatibility equivalence, "#" = NFD

1380

'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1381

value), str_fmt

1382

elif fmt[-1] == 'D': # decimal suffix

1383

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1384

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1385

factor=1024 if '#' in flags else 1000)

1386

elif fmt[-1] == 'S': # filename sanitization

1387

value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt

1388

elif fmt[-1] == 'c':

1389

if value:

1390

value = str(value)[0]

1391

else:

1392

fmt = str_fmt

1393

elif fmt[-1] not in 'rsa': # numeric

1394

value = float_or_none(value)

1395

if value is None:

1396

value, fmt = default, 's'

1397

1398

if sanitize:

1399

# If value is an object, sanitize might convert it to a string

1400

# So we convert it to repr first

1401

if fmt[-1] == 'r':

1402

value, fmt = repr(value), str_fmt

1403

elif fmt[-1] == 'a':

1404

value, fmt = ascii(value), str_fmt

1405

if fmt[-1] in 'csra':

1406

value = sanitizer(last_field, value)

1407

1408

key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))

1409

TMPL_DICT[key] = value

1410

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1411

1412

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1413

1414

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1415

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1416

return self.escape_outtmpl(outtmpl) % info_dict

1417

1418

@_catch_unsafe_extension_error

1419

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1420

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1421

if outtmpl is None:

1422

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1423

try:

1424

outtmpl = self._outtmpl_expandpath(outtmpl)

1425

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1430

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1431

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1432

filename = replace_extension(filename, ext, final_ext)

1433

elif tmpl_type:

1434

force_ext = OUTTMPL_TYPES[tmpl_type]

1435

if force_ext:

1436

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1437

1438

# https://github.com/blackjack4494/youtube-dlc/issues/85

1439

trim_file_name = self.params.get('trim_file_name', False)

1440

if trim_file_name:

1441

no_ext, *ext = filename.rsplit('.', 2)

1442

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1443

1444

return filename

1445

except ValueError as err:

1446

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1447

return None

1448

1449

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1450

"""Generate the output filename"""

1451

if outtmpl:

1452

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1453

dir_type = None

1454

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1455

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1460

pass

1461

elif filename == '-':

1462

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1463

elif os.path.isabs(filename):

1464

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1465

if filename == '-' or not filename:

1466

return filename

1467

1468

return self.get_output_path(dir_type, filename)

1469

1470

def _match_entry(self, info_dict, incomplete=False, silent=False):

1471

"""Returns None if the file should be downloaded"""

1472

_type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')

1473

assert incomplete or _type == 'video', 'Only video result can be considered complete'

1474

1475

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1476

1477

def check_filter():

1478

if _type in ('playlist', 'multi_video'):

1479

return

1480

elif _type in ('url', 'url_transparent') and not try_call(

1481

lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):

1482

return

1483

1484

if 'title' in info_dict:

1485

# This can happen when we're just evaluating the playlist

1486

title = info_dict['title']

1487

matchtitle = self.params.get('matchtitle', False)

1488

if matchtitle:

1489

if not re.search(matchtitle, title, re.IGNORECASE):

1490

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1491

rejecttitle = self.params.get('rejecttitle', False)

1492

if rejecttitle:

1493

if re.search(rejecttitle, title, re.IGNORECASE):

1494

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1495

1496

date = info_dict.get('upload_date')

1497

if date is not None:

1498

date_range = self.params.get('daterange', DateRange())

1499

if date not in date_range:

1500

return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'

1501

view_count = info_dict.get('view_count')

1502

if view_count is not None:

1503

min_views = self.params.get('min_views')

1504

if min_views is not None and view_count < min_views:

1505

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1506

max_views = self.params.get('max_views')

1507

if max_views is not None and view_count > max_views:

1508

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1509

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1510

return f'Skipping "{video_title}" because it is age restricted'

1511

1512

match_filter = self.params.get('match_filter')

1513

if match_filter is None:

return None

cancelled = None

try:

try:

ret = match_filter(info_dict, incomplete=incomplete)

1520

except TypeError:

1521

# For backward compatibility

1522

ret = None if incomplete else match_filter(info_dict)

1523

except DownloadCancelled as err:

1524

if err.msg is not NO_DEFAULT:

1525

raise

1526

ret, cancelled = err.msg, err

1527

1528

if ret is NO_DEFAULT:

1529

while True:

1530

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1531

reply = input(self._format_screen(

1532

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1533

if reply in {'y', ''}:

return None

elif reply == 'n':

if cancelled:

raise type(cancelled)(f'Skipping {video_title}')

1538

return f'Skipping {video_title}'

1539

return ret

1540

1541

if self.in_download_archive(info_dict):

1542

reason = ''.join((

1543

format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),

1544

format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),

1545

'has already been recorded in the archive'))

1546

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1547

else:

1548

try:

1549

reason = check_filter()

1550

except DownloadCancelled as e:

1551

reason, break_opt, break_err = e.msg, 'match_filter', type(e)

1552

else:

1553

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1554

if reason is not None:

1555

if not silent:

1556

self.to_screen('[download] ' + reason)

1557

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1563

"""Set the keys from extra_info in info dict if they are missing"""

1564

for key, value in extra_info.items():

1565

info_dict.setdefault(key, value)

1566

1567

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1568

process=True, force_generic_extractor=False):

1569

"""

1570

Extract and return the information dictionary of the URL

1571

1572

Arguments:

1573

@param url URL to extract

1574

1575

Keyword arguments:

1576

@param download Whether to download videos

1577

@param process Whether to resolve all unresolved references (URLs, playlist items).

1578

Must be True for download to work

1579

@param ie_key Use only the extractor with this key

1580

1581

@param extra_info Dictionary containing the extra values to add to the info (For internal use only)

1582

@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')

1583

"""

1584

1585

if extra_info is None:

1586

extra_info = {}

1587

1588

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}

else:

ies = self._ies

for key, ie in ies.items():

1597

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1602

'and will probably not work.')

1603

1604

temp_id = ie.get_temp_id(url)

1605

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):

1606

self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '

1607

'has already been recorded in the archive')

1608

if self.params.get('break_on_existing', False):

1609

raise ExistingVideoReached

1610

break

1611

return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)

1612

else:

1613

extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

1614

self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',

1615

tb=False if extractors_restricted else None)

1616

1617

def _handle_extraction_exceptions(func):

1618

@functools.wraps(func)

1619

def wrapper(self, *args, **kwargs):

1620

while True:

1621

try:

1622

return func(self, *args, **kwargs)

1623

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1624

raise

1625

except ReExtractInfo as e:

1626

if e.expected:

1627

self.to_screen(f'{e}; Re-extracting data')

1628

else:

1629

self.to_stderr('\r')

1630

self.report_warning(f'{e}; Re-extracting data')

1631

continue

1632

except GeoRestrictedError as e:

1633

msg = e.msg

1634

if e.countries:

1635

msg += '\nThis video is available in {}.'.format(', '.join(

1636

map(ISO3166Utils.short2full, e.countries)))

1637

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1638

self.report_error(msg)

1639

except ExtractorError as e: # An error we somewhat expected

1640

self.report_error(str(e), e.format_traceback())

1641

except Exception as e:

1642

if self.params.get('ignoreerrors'):

1643

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1650

if (not self.params.get('wait_for_video')

1651

or ie_result.get('_type', 'video') != 'video'

1652

or ie_result.get('formats') or ie_result.get('url')):

1653

return

1654

1655

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1661

if not self.params.get('noprogress'):

1662

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1663

elif last_msg:

1664

return

1665

self.to_screen(full_msg, skip_eol=True)

1666

last_msg = msg

1667

1668

min_wait, max_wait = self.params.get('wait_for_video')

1669

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1670

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1671

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1672

self.report_warning('Release time of video is not known')

1673

elif ie_result and (diff or 0) <= 0:

1674

self.report_warning('Video should already be available according to extracted info')

1675

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1676

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1677

1678

wait_till = time.time() + diff

1679

try:

1680

while True:

1681

diff = wait_till - time.time()

1682

if diff <= 0:

1683

progress('')

1684

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1685

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1686

time.sleep(1)

1687

except KeyboardInterrupt:

1688

progress('')

1689

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1690

except BaseException as e:

1691

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

def _load_cookies(self, data, *, autoscope=True):

1696

"""Loads cookies from a `Cookie` header

1697

1698

This tries to work around the security vulnerability of passing cookies to every domain.

1699

See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

1700

1701

@param data The Cookie header as string to load the cookies from

1702

@param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains

1703

If `True`, save cookies for later to be stored in the jar with a limited scope

1704

If a URL, save cookies in the jar with the domain of the URL

1705

"""

1706

for cookie in LenientSimpleCookie(data).values():

1707

if autoscope and any(cookie.values()):

1708

raise ValueError('Invalid syntax in Cookie Header')

1709

1710

domain = cookie.get('domain') or ''

1711

expiry = cookie.get('expires')

1712

if expiry == '': # 0 is valid

1713

expiry = None

1714

prepared_cookie = http.cookiejar.Cookie(

1715

cookie.get('version') or 0, cookie.key, cookie.value, None, False,

1716

domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),

1717

cookie.get('secure') or False, expiry, False, None, None, {})

1718

1719

if domain:

1720

self.cookiejar.set_cookie(prepared_cookie)

1721

elif autoscope is True:

1722

self.deprecated_feature(

1723

'Passing cookies as a header is a potential security risk; '

1724

'they will be scoped to the domain of the downloaded urls. '

1725

'Please consider loading cookies from a file or browser instead.')

1726

self.__header_cookies.append(prepared_cookie)

1727

elif autoscope:

1728

self.report_warning(

1729

'The extractor result contains an unscoped cookie as an HTTP header. '

1730

f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',

1731

only_once=True)

1732

self._apply_header_cookies(autoscope, [prepared_cookie])

1733

else:

1734

self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',

1735

tb=False, is_error=False)

1736

1737

def _apply_header_cookies(self, url, cookies=None):

1738

"""Applies stray header cookies to the provided url

1739

1740

This loads header cookies and scopes them to the domain provided in `url`.

1741

While this is not ideal, it helps reduce the risk of them being sent

1742

to an unintended destination while mostly maintaining compatibility.

1743

"""

1744

parsed = urllib.parse.urlparse(url)

1745

if not parsed.hostname:

1746

return

1747

1748

for cookie in map(copy.copy, cookies or self.__header_cookies):

1749

cookie.domain = f'.{parsed.hostname}'

1750

self.cookiejar.set_cookie(cookie)

1751

1752

@_handle_extraction_exceptions

1753

def __extract_info(self, url, ie, download, extra_info, process):

1754

self._apply_header_cookies(url)

1755

1756

try:

1757

ie_result = ie.extract(url)

1758

except UserNotLive as e:

1759

if process:

1760

if self.params.get('wait_for_video'):

1761

self.report_warning(e)

1762

self._wait_for_video()

1763

raise

1764

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1765

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1766

return

1767

if isinstance(ie_result, list):

1768

# Backwards compatibility: old IE result format

1769

ie_result = {

1770

'_type': 'compat_list',

1771

'entries': ie_result,

1772

}

1773

if extra_info.get('original_url'):

1774

ie_result.setdefault('original_url', extra_info['original_url'])

1775

self.add_default_extra_info(ie_result, ie, url)

1776

if process:

1777

self._wait_for_video(ie_result)

1778

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1783

if url is not None:

1784

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1789

if webpage_url:

1790

self.add_extra_info(ie_result, {

1791

'webpage_url_basename': url_basename(webpage_url),

1792

'webpage_url_domain': get_domain(webpage_url),

1793

})

1794

if ie is not None:

1795

self.add_extra_info(ie_result, {

1796

'extractor': ie.IE_NAME,

1797

'extractor_key': ie.ie_key(),

1798

})

1799

1800

def process_ie_result(self, ie_result, download=True, extra_info=None):

1801

"""

1802

Take the result of the ie(may be modified) and resolve all unresolved

1803

references (URLs, playlist items).

1804

1805

It will also download the videos if 'download'.

1806

Returns the resolved ie_result.

1807

"""

1808

if extra_info is None:

1809

extra_info = {}

1810

result_type = ie_result.get('_type', 'video')

1811

1812

if result_type in ('url', 'url_transparent'):

1813

ie_result['url'] = sanitize_url(

1814

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1815

if ie_result.get('original_url') and not extra_info.get('original_url'):

1816

extra_info = {'original_url': ie_result['original_url'], **extra_info}

1817

1818

extract_flat = self.params.get('extract_flat', False)

1819

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1820

or extract_flat is True):

1821

info_copy = ie_result.copy()

1822

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1823

if ie and not ie_result.get('id'):

1824

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1825

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1826

self.add_extra_info(info_copy, extra_info)

1827

info_copy, _ = self.pre_process(info_copy)

1828

self._fill_common_fields(info_copy, False)

1829

self.__forced_printings(info_copy)

1830

self._raise_pending_errors(info_copy)

1831

if self.params.get('force_write_download_archive', False):

1832

self.record_download_archive(info_copy)

1833

return ie_result

1834

1835

if result_type == 'video':

1836

self.add_extra_info(ie_result, extra_info)

1837

ie_result = self.process_video_result(ie_result, download=download)

1838

self._raise_pending_errors(ie_result)

1839

additional_urls = (ie_result or {}).get('additional_urls')

1840

if additional_urls:

1841

# TODO: Improve MetadataParserPP to allow setting a list

1842

if isinstance(additional_urls, str):

1843

additional_urls = [additional_urls]

1844

self.to_screen(

1845

'[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))

1846

self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))

1847

ie_result['additional_entries'] = [

1848

self.extract_info(

1849

url, download, extra_info=extra_info,

1850

force_generic_extractor=self.params.get('force_generic_extractor'))

1851

for url in additional_urls

1852

]

1853

return ie_result

1854

elif result_type == 'url':

1855

# We have to add extra_info to the results because it may be

1856

# contained in a playlist

1857

return self.extract_info(

1858

ie_result['url'], download,

1859

ie_key=ie_result.get('ie_key'),

1860

extra_info=extra_info)

1861

elif result_type == 'url_transparent':

1862

# Use the information from the embedding page

1863

info = self.extract_info(

1864

ie_result['url'], ie_key=ie_result.get('ie_key'),

1865

extra_info=extra_info, download=False, process=False)

1866

1867

# extract_info may return None when ignoreerrors is enabled and

1868

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1874

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1875

# For video clips, the id etc of the clip extractor should be used

1876

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1877

1878

new_result = info.copy()

1879

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1880

1881

# Extracted info may not be a video result (i.e.

1882

# info.get('_type', 'video') != video) but rather an url or

1883

# url_transparent. In such cases outer metadata (from ie_result)

1884

# should be propagated to inner one (info). For this to happen

1885

# _type of info should be overridden with url_transparent. This

1886

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1887

if new_result.get('_type') == 'url':

1888

new_result['_type'] = 'url_transparent'

1889

1890

return self.process_ie_result(

1891

new_result, download=download, extra_info=extra_info)

1892

elif result_type in ('playlist', 'multi_video'):

1893

# Protect from infinite recursion due to recursively nested playlists

1894

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1895

webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url

1896

if webpage_url and webpage_url in self._playlist_urls:

1897

self.to_screen(

1898

'[download] Skipping already downloaded playlist: {}'.format(

1899

ie_result.get('title')) or ie_result.get('id'))

1900

return

1901

1902

self._playlist_level += 1

1903

self._playlist_urls.add(webpage_url)

1904

self._fill_common_fields(ie_result, False)

1905

self._sanitize_thumbnails(ie_result)

1906

try:

1907

return self.__process_playlist(ie_result, download)

1908

finally:

1909

self._playlist_level -= 1

1910

if not self._playlist_level:

1911

self._playlist_urls.clear()

1912

elif result_type == 'compat_list':

1913

self.report_warning(

1914

'Extractor {} returned a compat_list result. '

1915

'It needs to be updated.'.format(ie_result.get('extractor')))

1916

1917

def _fixup(r):

1918

self.add_extra_info(r, {

1919

'extractor': ie_result['extractor'],

1920

'webpage_url': ie_result['webpage_url'],

1921

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1922

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1923

'extractor_key': ie_result['extractor_key'],

1924

})

1925

return r

1926

ie_result['entries'] = [

1927

self.process_ie_result(_fixup(r), download, extra_info)

1928

for r in ie_result['entries']

]

return ie_result

else:

raise Exception(f'Invalid result type: {result_type}')

1933

1934

def _ensure_dir_exists(self, path):

1935

return make_dir(path, self.report_error)

1936

1937

@staticmethod

1938

def _playlist_infodict(ie_result, strict=False, **kwargs):

1939

info = {

1940

'playlist_count': ie_result.get('playlist_count'),

1941

'playlist': ie_result.get('title') or ie_result.get('id'),

1942

'playlist_id': ie_result.get('id'),

1943

'playlist_title': ie_result.get('title'),

1944

'playlist_uploader': ie_result.get('uploader'),

1945

'playlist_uploader_id': ie_result.get('uploader_id'),

1946

'playlist_channel': ie_result.get('channel'),

1947

'playlist_channel_id': ie_result.get('channel_id'),

**kwargs,

}

if strict:

return info

if ie_result.get('webpage_url'):

1953

info.update({

1954

'webpage_url': ie_result['webpage_url'],

1955

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1956

'webpage_url_domain': get_domain(ie_result['webpage_url']),

})

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),

1962

'extractor': ie_result['extractor'],

1963

'extractor_key': ie_result['extractor_key'],

1964

}

1965

1966

def __process_playlist(self, ie_result, download):

1967

"""Process each entry in the playlist"""

1968

assert ie_result['_type'] in ('playlist', 'multi_video')

1969

1970

common_info = self._playlist_infodict(ie_result, strict=True)

1971

title = common_info.get('playlist') or '<Untitled>'

1972

if self._match_entry(common_info, incomplete=True) is not None:

1973

return

1974

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1975

1976

all_entries = PlaylistEntries(self, ie_result)

1977

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1978

1979

lazy = self.params.get('lazy_playlist')

1980

if lazy:

1981

resolved_entries, n_entries = [], 'N/A'

1982

ie_result['requested_entries'], ie_result['entries'] = None, None

1983

else:

1984

entries = resolved_entries = list(entries)

1985

n_entries = len(resolved_entries)

1986

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1987

if not ie_result.get('playlist_count'):

1988

# Better to do this after potentially exhausting entries

1989

ie_result['playlist_count'] = all_entries.get_full_count()

1990

1991

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1992

ie_copy = collections.ChainMap(ie_result, extra)

1993

1994

_infojson_written = False

1995

write_playlist_files = self.params.get('allow_playlist_files', True)

1996

if write_playlist_files and self.params.get('list_thumbnails'):

1997

self.list_thumbnails(ie_result)

1998

if write_playlist_files and not self.params.get('simulate'):

1999

_infojson_written = self._write_info_json(

2000

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

2001

if _infojson_written is None:

2002

return

2003

if self._write_description('playlist', ie_result,

2004

self.prepare_filename(ie_copy, 'pl_description')) is None:

2005

return

2006

# TODO: This should be passed to ThumbnailsConvertor if necessary

2007

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

2008

2009

if lazy:

2010

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

2011

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

2012

elif self.params.get('playlistreverse'):

2013

entries.reverse()

2014

elif self.params.get('playlistrandom'):

2015

random.shuffle(entries)

2016

2017

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'

2018

f'{format_field(ie_result, "playlist_count", " of %s")}')

2019

2020

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

2021

if self.params.get('extract_flat') == 'discard_in_playlist':

2022

keep_resolved_entries = ie_result['_type'] != 'playlist'

2023

if keep_resolved_entries:

2024

self.write_debug('The information of all playlist entries will be held in memory')

2025

2026

failures = 0

2027

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

2028

for i, (playlist_index, entry) in enumerate(entries):

2029

if lazy:

2030

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

2035

if not lazy and 'playlist-index' in self.params['compat_opts']:

2036

playlist_index = ie_result['requested_entries'][i]

2037

2038

entry_copy = collections.ChainMap(entry, {

2039

**common_info,

2040

'n_entries': int_or_none(n_entries),

2041

'playlist_index': playlist_index,

2042

'playlist_autonumber': i + 1,

2043

})

2044

2045

if self._match_entry(entry_copy, incomplete=True) is not None:

2046

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

2047

resolved_entries[i] = (playlist_index, NO_DEFAULT)

continue

self.to_screen(

f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '

2052

f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')

2053

2054

entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({

2055

'playlist_index': playlist_index,

2056

'playlist_autonumber': i + 1,

}, extra))

if not entry_result:

failures += 1

if failures >= max_failures:

2061

self.report_error(

2062

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

2063

break

2064

if keep_resolved_entries:

2065

resolved_entries[i] = (playlist_index, entry_result)

2066

2067

# Update with processed data

2068

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

2069

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

2070

if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):

2071

# Do not set for full playlist

2072

ie_result.pop('requested_entries')

2073

2074

# Write the updated info to json

2075

if _infojson_written is True and self._write_info_json(

2076

'updated playlist', ie_result,

2077

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

2078

return

2079

2080

ie_result = self.run_all_pps('playlist', ie_result)

2081

self.to_screen(f'[download] Finished downloading playlist: {title}')

2082

return ie_result

2083

2084

@_handle_extraction_exceptions

2085

def __process_iterable_entry(self, entry, download, extra_info):

2086

return self.process_ie_result(

2087

entry, download=download, extra_info=extra_info)

2088

2089

def _build_format_filter(self, filter_spec):

2090

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

2101

(?P<key>[\w.-]+)\s*

2102

(?P<op>{})(?P<none_inclusive>\s*\?)?\s*

2103

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

2104

'''.format('|'.join(map(re.escape, OPERATORS.keys()))))

2105

m = operator_rex.fullmatch(filter_spec)

2106

if m:

2107

try:

2108

comparison_value = int(m.group('value'))

2109

except ValueError:

2110

comparison_value = parse_filesize(m.group('value'))

2111

if comparison_value is None:

2112

comparison_value = parse_filesize(m.group('value') + 'B')

2113

if comparison_value is None:

2114

raise ValueError(

2115

'Invalid value {!r} in format specification {!r}'.format(

2116

m.group('value'), filter_spec))

2117

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

2123

'$=': lambda attr, value: attr.endswith(value),

2124

'*=': lambda attr, value: value in attr,

2125

'~=': lambda attr, value: value.search(attr) is not None,

2126

}

2127

str_operator_rex = re.compile(r'''(?x)\s*

2128

(?P<key>[a-zA-Z0-9._-]+)\s*

2129

(?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?

2130

(?P<quote>["'])?

2131

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

2132

(?(quote)(?P=quote))\s*

2133

'''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))

2134

m = str_operator_rex.fullmatch(filter_spec)

2135

if m:

2136

if m.group('op') == '~=':

2137

comparison_value = re.compile(m.group('value'))

2138

else:

2139

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

2140

str_op = STR_OPERATORS[m.group('op')]

2141

if m.group('negation'):

2142

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError(f'Invalid filter specification {filter_spec!r}')

2148

2149

def _filter(f):

2150

actual_value = f.get(m.group('key'))

2151

if actual_value is None:

2152

return m.group('none_inclusive')

2153

return op(actual_value, comparison_value)

2154

return _filter

2155

2156

def _check_formats(self, formats):

2157

for f in formats:

2158

working = f.get('__working')

2159

if working is not None:

if working:

yield f

continue

self.to_screen('[info] Testing format {}'.format(f['format_id']))

2164

path = self.get_output_path('temp')

2165

if not self._ensure_dir_exists(f'{path}/'):

2166

continue

2167

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

2168

temp_file.close()

2169

try:

2170

success, _ = self.dl(temp_file.name, f, test=True)

2171

except (DownloadError, OSError, ValueError, *network_exceptions):

2172

success = False

2173

finally:

2174

if os.path.exists(temp_file.name):

2175

try:

2176

os.remove(temp_file.name)

2177

except OSError:

2178

self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')

2179

f['__working'] = success

if success:

yield f

else:

self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))

2184

2185

def _select_formats(self, formats, selector):

2186

return list(selector({

2187

'formats': formats,

2188

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2189

'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video

2190

or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio

2191

}))

2192

2193

def _default_format_spec(self, info_dict, download=True):

2194

download = download and not self.params.get('simulate')

2195

prefer_best = download and (

2196

self.params['outtmpl']['default'] == '-'

2197

or info_dict.get('is_live') and not self.params.get('live_from_start'))

2198

2199

def can_merge():

2200

merger = FFmpegMergerPP(self)

2201

return merger.available and merger.can_merge()

2202

2203

if not prefer_best and download and not can_merge():

2204

prefer_best = True

2205

formats = self._get_formats(info_dict)

2206

evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))

2207

if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):

2208

self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '

2209

'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')

2210

2211

compat = (self.params.get('allow_multiple_audio_streams')

2212

or 'format-spec' in self.params['compat_opts'])

2213

2214

return ('best/bestvideo+bestaudio' if prefer_best

2215

else 'bestvideo+bestaudio/best' if compat

2216

else 'bestvideo*+bestaudio/best')

2217

2218

def build_format_selector(self, format_spec):

2219

def syntax_error(note, start):

2220

message = (

2221

'Invalid format specification: '

2222

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

2223

return SyntaxError(message)

2224

2225

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

2230

2231

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

2232

'video': self.params.get('allow_multiple_video_streams', False)}

2233

2234

def _parse_filter(tokens):

2235

filter_parts = []

2236

for type_, string_, _start, _, _ in tokens:

2237

if type_ == tokenize.OP and string_ == ']':

2238

return ''.join(filter_parts)

2239

else:

2240

filter_parts.append(string_)

2241

2242

def _remove_unused_ops(tokens):

2243

# Remove operators that we don't use and join them with the surrounding strings.

2244

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

2245

ALLOWED_OPS = ('/', '+', ',', '(', ')')

2246

last_string, last_start, last_end, last_line = None, None, None, None

2247

for type_, string_, start, end, line in tokens:

2248

if type_ == tokenize.OP and string_ == '[':

2249

if last_string:

2250

yield tokenize.NAME, last_string, last_start, last_end, last_line

2251

last_string = None

2252

yield type_, string_, start, end, line

2253

# everything inside brackets will be handled by _parse_filter

2254

for type_, string_, start, end, line in tokens:

2255

yield type_, string_, start, end, line

2256

if type_ == tokenize.OP and string_ == ']':

2257

break

2258

elif type_ == tokenize.OP and string_ in ALLOWED_OPS:

2259

if last_string:

2260

yield tokenize.NAME, last_string, last_start, last_end, last_line

2261

last_string = None

2262

yield type_, string_, start, end, line

2263

elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

2264

if not last_string:

2265

last_string = string_

last_start = start

last_end = end

else:

last_string += string_

2270

if last_string:

2271

yield tokenize.NAME, last_string, last_start, last_end, last_line

2272

2273

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2274

selectors = []

2275

current_selector = None

2276

for type_, string_, start, _, _ in tokens:

2277

# ENCODING is only defined in Python 3.x

2278

if type_ == getattr(tokenize, 'ENCODING', None):

2279

continue

2280

elif type_ in [tokenize.NAME, tokenize.NUMBER]:

2281

current_selector = FormatSelector(SINGLE, string_, [])

2282

elif type_ == tokenize.OP:

2283

if string_ == ')':

2284

if not inside_group:

2285

# ')' will be handled by the parentheses group

2286

tokens.restore_last_token()

2287

break

2288

elif inside_merge and string_ in ['/', ',']:

2289

tokens.restore_last_token()

2290

break

2291

elif inside_choice and string_ == ',':

2292

tokens.restore_last_token()

2293

break

2294

elif string_ == ',':

2295

if not current_selector:

2296

raise syntax_error('"," must follow a format selector', start)

2297

selectors.append(current_selector)

2298

current_selector = None

2299

elif string_ == '/':

2300

if not current_selector:

2301

raise syntax_error('"/" must follow a format selector', start)

2302

first_choice = current_selector

2303

second_choice = _parse_format_selection(tokens, inside_choice=True)

2304

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2305

elif string_ == '[':

2306

if not current_selector:

2307

current_selector = FormatSelector(SINGLE, 'best', [])

2308

format_filter = _parse_filter(tokens)

2309

current_selector.filters.append(format_filter)

2310

elif string_ == '(':

2311

if current_selector:

2312

raise syntax_error('Unexpected "("', start)

2313

group = _parse_format_selection(tokens, inside_group=True)

2314

current_selector = FormatSelector(GROUP, group, [])

2315

elif string_ == '+':

2316

if not current_selector:

2317

raise syntax_error('Unexpected "+"', start)

2318

selector_1 = current_selector

2319

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2320

if not selector_2:

2321

raise syntax_error('Expected a selector', start)

2322

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2323

else:

2324

raise syntax_error(f'Operator not recognized: "{string_}"', start)

2325

elif type_ == tokenize.ENDMARKER:

2326

break

2327

if current_selector:

2328

selectors.append(current_selector)

2329

return selectors

2330

2331

def _merge(formats_pair):

2332

format_1, format_2 = formats_pair

2333

2334

formats_info = []

2335

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2336

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2337

2338

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2339

get_no_more = {'video': False, 'audio': False}

2340

for (i, fmt_info) in enumerate(formats_info):

2341

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2342

formats_info.pop(i)

2343

continue

2344

for aud_vid in ['audio', 'video']:

2345

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2346

if get_no_more[aud_vid]:

2347

formats_info.pop(i)

2348

break

2349

get_no_more[aud_vid] = True

2350

2351

if len(formats_info) == 1:

2352

return formats_info[0]

2353

2354

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2355

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2356

2357

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2358

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2359

2360

output_ext = get_compatible_ext(

2361

vcodecs=[f.get('vcodec') for f in video_fmts],

2362

acodecs=[f.get('acodec') for f in audio_fmts],

2363

vexts=[f['ext'] for f in video_fmts],

2364

aexts=[f['ext'] for f in audio_fmts],

2365

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2366

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2367

2368

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2369

2370

new_dict = {

2371

'requested_formats': formats_info,

2372

'format': '+'.join(filtered('format')),

2373

'format_id': '+'.join(filtered('format_id')),

2374

'ext': output_ext,

2375

'protocol': '+'.join(map(determine_protocol, formats_info)),

2376

'language': '+'.join(orderedSet(filtered('language'))) or None,

2377

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2378

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2379

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2385

'height': the_only_video.get('height'),

2386

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2387

'fps': the_only_video.get('fps'),

2388

'dynamic_range': the_only_video.get('dynamic_range'),

2389

'vcodec': the_only_video.get('vcodec'),

2390

'vbr': the_only_video.get('vbr'),

2391

'stretched_ratio': the_only_video.get('stretched_ratio'),

2392

'aspect_ratio': the_only_video.get('aspect_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2398

'abr': the_only_audio.get('abr'),

2399

'asr': the_only_audio.get('asr'),

2400

'audio_channels': the_only_audio.get('audio_channels'),

})

return new_dict

def _check_formats(formats):

2406

if self.params.get('check_formats') == 'selected':

2407

yield from self._check_formats(formats)

2408

return

2409

elif (self.params.get('check_formats') is not None

2410

or self.params.get('allow_unplayable_formats')):

yield from formats

return

for f in formats:

if f.get('has_drm') or f.get('__needs_testing'):

2416

yield from self._check_formats([f])

else:

yield f

def _build_selector_function(selector):

2421

if isinstance(selector, list): # ,

2422

fs = [_build_selector_function(s) for s in selector]

2423

2424

def selector_function(ctx):

2425

for f in fs:

2426

yield from f(ctx)

2427

return selector_function

2428

2429

elif selector.type == GROUP: # ()

2430

selector_function = _build_selector_function(selector.selector)

2431

2432

elif selector.type == PICKFIRST: # /

2433

fs = [_build_selector_function(s) for s in selector.selector]

2434

2435

def selector_function(ctx):

2436

for f in fs:

2437

picked_formats = list(f(ctx))

2438

if picked_formats:

2439

return picked_formats

2440

return []

2441

2442

elif selector.type == MERGE: # +

2443

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2444

2445

def selector_function(ctx):

2446

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2447

yield _merge(pair)

2448

2449

elif selector.type == SINGLE: # atom

2450

format_spec = selector.selector or 'best'

2451

2452

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2453

if format_spec == 'all':

2454

def selector_function(ctx):

2455

yield from _check_formats(ctx['formats'][::-1])

2456

elif format_spec == 'mergeall':

2457

def selector_function(ctx):

2458

formats = list(_check_formats(

2459

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2460

if not formats:

2461

return

2462

merged_format = formats[-1]

2463

for f in formats[-2::-1]:

2464

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2474

format_reverse = mobj.group('bw')[0] == 'b'

2475

format_type = (mobj.group('type') or [None])[0]

2476

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2477

format_modified = mobj.group('mod') is not None

2478

2479

format_fallback = not format_type and not format_modified # for b, w

2480

_filter_f = (

2481

(lambda f: f.get(f'{format_type}codec') != 'none')

2482

if format_type and format_modified # bv*, ba*, wv*, wa*

2483

else (lambda f: f.get(f'{not_format_type}codec') == 'none')

2484

if format_type # bv, ba, wv, wa

2485

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2486

if not format_modified # b, w

2487

else lambda f: True) # b*, w*

2488

filter_f = lambda f: _filter_f(f) and (

2489

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2490

else:

2491

if format_spec in self._format_selection_exts['audio']:

2492

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2493

elif format_spec in self._format_selection_exts['video']:

2494

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2495

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2496

elif format_spec in self._format_selection_exts['storyboards']:

2497

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2498

else:

2499

filter_f = lambda f: f.get('format_id') == format_spec # id

2500

2501

def selector_function(ctx):

2502

formats = list(ctx['formats'])

2503

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2504

if not matches:

2505

if format_fallback and ctx['incomplete_formats']:

2506

# for extractors with incomplete formats (audio only (soundcloud)

2507

# or video only (imgur)) best/worst will fallback to

2508

# best/worst {video,audio}-only format

2509

matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))

2510

elif seperate_fallback and not ctx['has_merged_format']:

2511

# for compatibility with youtube-dl when there is no pre-merged format

2512

matches = list(filter(seperate_fallback, formats))

2513

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2514

try:

2515

yield matches[format_idx - 1]

2516

except LazyList.IndexError:

2517

return

2518

2519

filters = [self._build_format_filter(f) for f in selector.filters]

2520

2521

def final_selector(ctx):

2522

ctx_copy = dict(ctx)

2523

for _filter in filters:

2524

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2525

return selector_function(ctx_copy)

2526

return final_selector

2527

2528

# HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid

2529

# Prefix numbers with random letters to avoid it being classified as a number

2530

# See: https://github.com/yt-dlp/yt-dlp/pulls/8797

2531

# TODO: Implement parser not reliant on tokenize.tokenize

2532

prefix = ''.join(random.choices(string.ascii_letters, k=32))

2533

stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())

2534

try:

2535

tokens = list(_remove_unused_ops(

2536

token._replace(string=token.string.replace(prefix, ''))

2537

for token in tokenize.tokenize(stream.readline)))

2538

except tokenize.TokenError:

2539

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2540

2541

class TokenIterator:

2542

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2551

raise StopIteration

2552

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2559

self.counter -= 1

2560

2561

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2562

return _build_selector_function(parsed_selector)

2563

2564

def _calc_headers(self, info_dict, load_cookies=False):

2565

res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))

2566

clean_headers(res)

2567

2568

if load_cookies: # For --load-info-json

2569

self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat

2570

self._load_cookies(info_dict.get('cookies'), autoscope=False)

2571

# The `Cookie` header is removed to prevent leaks and unscoped cookies.

2572

# See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj

2573

res.pop('Cookie', None)

2574

cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])

2575

if cookies:

2576

encoder = LenientSimpleCookie()

2577

values = []

2578

for cookie in cookies:

2579

_, value = encoder.value_encode(cookie.value)

2580

values.append(f'{cookie.name}={value}')

2581

if cookie.domain:

2582

values.append(f'Domain={cookie.domain}')

2583

if cookie.path:

2584

values.append(f'Path={cookie.path}')

2585

if cookie.secure:

2586

values.append('Secure')

2587

if cookie.expires:

2588

values.append(f'Expires={cookie.expires}')

2589

if cookie.version:

2590

values.append(f'Version={cookie.version}')

2591

info_dict['cookies'] = '; '.join(values)

2592

2593

if 'X-Forwarded-For' not in res:

2594

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2595

if x_forwarded_for_ip:

2596

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2601

self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')

2602

return self.cookiejar.get_cookie_header(url)

2603

2604

def _sort_thumbnails(self, thumbnails):

2605

thumbnails.sort(key=lambda t: (

2606

t.get('preference') if t.get('preference') is not None else -1,

2607

t.get('width') if t.get('width') is not None else -1,

2608

t.get('height') if t.get('height') is not None else -1,

2609

t.get('id') if t.get('id') is not None else '',

2610

t.get('url')))

2611

2612

def _sanitize_thumbnails(self, info_dict):

2613

thumbnails = info_dict.get('thumbnails')

2614

if thumbnails is None:

2615

thumbnail = info_dict.get('thumbnail')

2616

if thumbnail:

2617

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2622

for t in thumbnails:

2623

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2624

try:

2625

self.urlopen(HEADRequest(t['url']))

2626

except network_exceptions as err:

2627

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2632

for i, t in enumerate(thumbnails):

2633

if t.get('id') is None:

2634

t['id'] = str(i)

2635

if t.get('width') and t.get('height'):

2636

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2637

t['url'] = sanitize_url(t['url'])

2638

2639

if self.params.get('check_formats') is True:

2640

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2641

else:

2642

info_dict['thumbnails'] = thumbnails

2643

2644

def _fill_common_fields(self, info_dict, final=True):

2645

# TODO: move sanitization here

2646

if final:

2647

title = info_dict['fulltitle'] = info_dict.get('title')

2648

if not title:

2649

if title == '':

2650

self.write_debug('Extractor gave empty title. Creating a generic title')

2651

else:

2652

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2653

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2654

2655

if info_dict.get('duration') is not None:

2656

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2657

2658

for ts_key, date_key in (

2659

('timestamp', 'upload_date'),

2660

('release_timestamp', 'release_date'),

2661

('modified_timestamp', 'modified_date'),

2662

):

2663

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2664

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2665

# see http://bugs.python.org/issue1646728)

2666

with contextlib.suppress(ValueError, OverflowError, OSError):

2667

upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)

2668

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2669

2670

if not info_dict.get('release_year'):

2671

info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))

2672

2673

live_keys = ('is_live', 'was_live')

2674

live_status = info_dict.get('live_status')

2675

if live_status is None:

2676

for key in live_keys:

2677

if info_dict.get(key) is False:

2678

continue

2679

if info_dict.get(key):

2680

live_status = key

2681

break

2682

if all(info_dict.get(key) is False for key in live_keys):

2683

live_status = 'not_live'

2684

if live_status:

2685

info_dict['live_status'] = live_status

2686

for key in live_keys:

2687

if info_dict.get(key) is None:

2688

info_dict[key] = (live_status == key)

2689

if live_status == 'post_live':

2690

info_dict['was_live'] = True

2691

2692

# Auto generate title fields corresponding to the *_number fields when missing

2693

# in order to always have clean titles. This is very common for TV series.

2694

for field in ('chapter', 'season', 'episode'):

2695

if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):

2696

info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])

2697

2698

for old_key, new_key in self._deprecated_multivalue_fields.items():

2699

if new_key in info_dict and old_key in info_dict:

2700

if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json

2701

self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')

2702

elif old_value := info_dict.get(old_key):

2703

info_dict[new_key] = old_value.split(', ')

2704

elif new_value := info_dict.get(new_key):

2705

info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)

2706

2707

def _raise_pending_errors(self, info):

2708

err = info.pop('__pending_error', None)

2709

if err:

2710

self.report_error(err, tb=False)

2711

2712

def sort_formats(self, info_dict):

2713

formats = self._get_formats(info_dict)

2714

formats.sort(key=FormatSorter(

2715

self, info_dict.get('_format_sort_fields') or []).calculate_preference)

2716

2717

def process_video_result(self, info_dict, download=True):

2718

assert info_dict.get('_type', 'video') == 'video'

2719

self._num_videos += 1

2720

2721

if 'id' not in info_dict:

2722

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2723

elif not info_dict.get('id'):

2724

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2725

2726

def report_force_conversion(field, field_not, conversion):

2727

self.report_warning(

2728

f'"{field}" field is not {field_not} - forcing {conversion} conversion, '

2729

'there is an error in extractor')

2730

2731

def sanitize_string_field(info, string_field):

2732

field = info.get(string_field)

2733

if field is None or isinstance(field, str):

2734

return

2735

report_force_conversion(string_field, 'a string', 'string')

2736

info[string_field] = str(field)

2737

2738

def sanitize_numeric_fields(info):

2739

for numeric_field in self._NUMERIC_FIELDS:

2740

field = info.get(numeric_field)

2741

if field is None or isinstance(field, (int, float)):

2742

continue

2743

report_force_conversion(numeric_field, 'numeric', 'int')

2744

info[numeric_field] = int_or_none(field)

2745

2746

sanitize_string_field(info_dict, 'id')

2747

sanitize_numeric_fields(info_dict)

2748

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2749

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2750

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2751

self.report_warning('"duration" field is negative, there is an error in extractor')

2752

2753

chapters = info_dict.get('chapters') or []

2754

if chapters and chapters[0].get('start_time'):

2755

chapters.insert(0, {'start_time': 0})

2756

2757

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2758

for idx, (prev, current, next_) in enumerate(zip(

2759

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2760

if current.get('start_time') is None:

2761

current['start_time'] = prev.get('end_time')

2762

if not current.get('end_time'):

2763

current['end_time'] = next_.get('start_time')

2764

if not current.get('title'):

2765

current['title'] = f'<Untitled Chapter {idx}>'

2766

2767

if 'playlist' not in info_dict:

2768

# It isn't part of a playlist

2769

info_dict['playlist'] = None

2770

info_dict['playlist_index'] = None

2771

2772

self._sanitize_thumbnails(info_dict)

2773

2774

thumbnail = info_dict.get('thumbnail')

2775

thumbnails = info_dict.get('thumbnails')

2776

if thumbnail:

2777

info_dict['thumbnail'] = sanitize_url(thumbnail)

2778

elif thumbnails:

2779

info_dict['thumbnail'] = thumbnails[-1]['url']

2780

2781

if info_dict.get('display_id') is None and 'id' in info_dict:

2782

info_dict['display_id'] = info_dict['id']

2783

2784

self._fill_common_fields(info_dict)

2785

2786

for cc_kind in ('subtitles', 'automatic_captions'):

2787

cc = info_dict.get(cc_kind)

2788

if cc:

2789

for _, subtitle in cc.items():

2790

for subtitle_format in subtitle:

2791

if subtitle_format.get('url'):

2792

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2793

if subtitle_format.get('ext') is None:

2794

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2795

2796

automatic_captions = info_dict.get('automatic_captions')

2797

subtitles = info_dict.get('subtitles')

2798

2799

info_dict['requested_subtitles'] = self.process_subtitles(

2800

info_dict['id'], subtitles, automatic_captions)

2801

2802

formats = self._get_formats(info_dict)

2803

2804

# Backward compatibility with InfoExtractor._sort_formats

2805

field_preference = (formats or [{}])[0].pop('__sort_fields', None)

2806

if field_preference:

2807

info_dict['_format_sort_fields'] = field_preference

2808

2809

info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it

2810

f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None

2811

if not self.params.get('allow_unplayable_formats'):

2812

formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']

2813

2814

if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2815

self.report_warning(

2816

f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'

2817

'only images are available for download. Use --list-formats to see them'.capitalize())

2818

2819

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2820

if not get_from_start:

2821

info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')

2822

if info_dict.get('is_live') and formats:

2823

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2824

if get_from_start and not formats:

2825

self.raise_no_formats(info_dict, msg=(

2826

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2827

'If you want to download from the current time, use --no-live-from-start'))

2828

2829

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2834

'there is an error in extractor')

2835

return False

2836

if isinstance(url, bytes):

2837

sanitize_string_field(f, 'url')

2838

return True

2839

2840

# Filter out malformed formats for better extraction robustness

2841

formats = list(filter(is_wellformed, formats or []))

2842

2843

if not formats:

2844

self.raise_no_formats(info_dict)

2845

2846

for fmt in formats:

2847

sanitize_string_field(fmt, 'format_id')

2848

sanitize_numeric_fields(fmt)

2849

fmt['url'] = sanitize_url(fmt['url'])

2850

if fmt.get('ext') is None:

2851

fmt['ext'] = determine_ext(fmt['url']).lower()

2852

if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):

2853

if fmt.get('acodec') is None:

2854

fmt['acodec'] = fmt['ext']

2855

if fmt.get('protocol') is None:

2856

fmt['protocol'] = determine_protocol(fmt)

2857

if fmt.get('resolution') is None:

2858

fmt['resolution'] = self.format_resolution(fmt, default=None)

2859

if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':

2860

fmt['dynamic_range'] = 'SDR'

2861

if fmt.get('aspect_ratio') is None:

2862

fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))

2863

# For fragmented formats, "tbr" is often max bitrate and not average

2864

if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))

2865

and not fmt.get('filesize') and not fmt.get('filesize_approx')):

2866

fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))

2867

fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)

2868

2869

# Safeguard against old/insecure infojson when using --load-info-json

2870

if info_dict.get('http_headers'):

2871

info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])

2872

info_dict['http_headers'].pop('Cookie', None)

2873

2874

# This is copied to http_headers by the above _calc_headers and can now be removed

2875

if '__x_forwarded_for_ip' in info_dict:

2876

del info_dict['__x_forwarded_for_ip']

self.sort_formats({

'formats': formats,

'_format_sort_fields': info_dict.get('_format_sort_fields'),

2881

})

2882

2883

# Sanitize and group by format_id

2884

formats_dict = {}

2885

for i, fmt in enumerate(formats):

2886

if not fmt.get('format_id'):

2887

fmt['format_id'] = str(i)

2888

else:

2889

# Sanitize format_id from characters used in format selector expression

2890

fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])

2891

formats_dict.setdefault(fmt['format_id'], []).append(fmt)

2892

2893

# Make sure all formats have unique format_id

2894

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2895

for format_id, ambiguous_formats in formats_dict.items():

2896

ambigious_id = len(ambiguous_formats) > 1

2897

for i, fmt in enumerate(ambiguous_formats):

2898

if ambigious_id:

2899

fmt['format_id'] = f'{format_id}-{i}'

2900

# Ensure there is no conflict between id and ext in format selection

2901

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2902

if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:

2903

fmt['format_id'] = 'f{}'.format(fmt['format_id'])

2904

2905

if fmt.get('format') is None:

2906

fmt['format'] = '{id} - {res}{note}'.format(

2907

id=fmt['format_id'],

2908

res=self.format_resolution(fmt),

2909

note=format_field(fmt, 'format_note', ' (%s)'),

2910

)

2911

2912

if self.params.get('check_formats') is True:

2913

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2914

2915

if not formats or formats[0] is not info_dict:

2916

# only set the 'formats' fields if the original info_dict list them

2917

# otherwise we end up with a circular reference, the first (and unique)

2918

# element in the 'formats' field in info_dict is info_dict itself,

2919

# which can't be exported to json

2920

info_dict['formats'] = formats

2921

2922

info_dict, _ = self.pre_process(info_dict)

2923

2924

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2925

return info_dict

2926

2927

self.post_extract(info_dict)

2928

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2929

2930

# The pre-processors may have modified the formats

2931

formats = self._get_formats(info_dict)

2932

2933

list_only = self.params.get('simulate') == 'list_only'

2934

interactive_format_selection = not list_only and self.format_selector == '-'

2935

if self.params.get('list_thumbnails'):

2936

self.list_thumbnails(info_dict)

2937

if self.params.get('listsubtitles'):

2938

if 'automatic_captions' in info_dict:

2939

self.list_subtitles(

2940

info_dict['id'], automatic_captions, 'automatic captions')

2941

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2942

if self.params.get('listformats') or interactive_format_selection:

2943

self.list_formats(info_dict)

2944

if list_only:

2945

# Without this printing, -F --print-json will not work

2946

self.__forced_printings(info_dict)

2947

return info_dict

2948

2949

format_selector = self.format_selector

2950

while True:

2951

if interactive_format_selection:

2952

req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)

2953

+ '(Press ENTER for default, or Ctrl+C to quit)'

2954

+ self._format_screen(': ', self.Styles.EMPHASIS))

2955

try:

2956

format_selector = self.build_format_selector(req_format) if req_format else None

2957

except SyntaxError as err:

2958

self.report_error(err, tb=False, is_error=False)

2959

continue

2960

2961

if format_selector is None:

2962

req_format = self._default_format_spec(info_dict, download=download)

2963

self.write_debug(f'Default format spec: {req_format}')

2964

format_selector = self.build_format_selector(req_format)

2965

2966

formats_to_download = self._select_formats(formats, format_selector)

2967

if interactive_format_selection and not formats_to_download:

2968

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2973

if not self.params.get('ignore_no_formats_error'):

2974

raise ExtractorError(

2975

'Requested format is not available. Use --list-formats for a list of available formats',

2976

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2977

self.report_warning('Requested format is not available')

2978

# Process what we can, even without any available formats.

2979

formats_to_download = [{}]

2980

2981

requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))

2982

best_format, downloaded_formats = formats_to_download[-1], []

2983

if download:

2984

if best_format and requested_ranges:

2985

def to_screen(*msg):

2986

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2987

2988

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2989

(f['format_id'] for f in formats_to_download))

2990

if requested_ranges != ({}, ):

2991

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2992

(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))

2993

max_downloads_reached = False

2994

2995

for fmt, chapter in itertools.product(formats_to_download, requested_ranges):

2996

new_info = self._copy_infodict(info_dict)

2997

new_info.update(fmt)

2998

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2999

end_time = offset + min(chapter.get('end_time', duration), duration)

3000

# duration may not be accurate. So allow deviations <1sec

3001

if end_time == float('inf') or end_time > offset + duration + 1:

3002

end_time = None

3003

if chapter or offset:

3004

new_info.update({

3005

'section_start': offset + chapter.get('start_time', 0),

3006

'section_end': end_time,

3007

'section_title': chapter.get('title'),

3008

'section_number': chapter.get('index'),

3009

})

3010

downloaded_formats.append(new_info)

3011

try:

3012

self.process_info(new_info)

3013

except MaxDownloadsReached:

3014

max_downloads_reached = True

3015

self._raise_pending_errors(new_info)

3016

# Remove copied info

3017

for key, val in tuple(new_info.items()):

3018

if info_dict.get(key) == val:

3019

new_info.pop(key)

3020

if max_downloads_reached:

3021

break

3022

3023

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

3024

assert write_archive.issubset({True, False, 'ignore'})

3025

if True in write_archive and False not in write_archive:

3026

self.record_download_archive(info_dict)

3027

3028

info_dict['requested_downloads'] = downloaded_formats

3029

info_dict = self.run_all_pps('after_video', info_dict)

3030

if max_downloads_reached:

3031

raise MaxDownloadsReached

3032

3033

# We update the info dict with the selected best quality format (backwards compatibility)

3034

info_dict.update(best_format)

3035

return info_dict

3036

3037

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

3038

"""Select the requested subtitles and their format"""

3039

available_subs, normal_sub_langs = {}, []

3040

if normal_subtitles and self.params.get('writesubtitles'):

3041

available_subs.update(normal_subtitles)

3042

normal_sub_langs = tuple(normal_subtitles.keys())

3043

if automatic_captions and self.params.get('writeautomaticsub'):

3044

for lang, cap_info in automatic_captions.items():

3045

if lang not in available_subs:

3046

available_subs[lang] = cap_info

3047

3048

if not available_subs or (

3049

not self.params.get('writesubtitles')

3050

and not self.params.get('writeautomaticsub')):

3051

return None

3052

3053

all_sub_langs = tuple(available_subs.keys())

3054

if self.params.get('allsubtitles', False):

3055

requested_langs = all_sub_langs

3056

elif self.params.get('subtitleslangs', False):

3057

try:

3058

requested_langs = orderedSet_from_options(

3059

self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)

3060

except re.error as e:

3061

raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')

3062

else:

3063

requested_langs = LazyList(itertools.chain(

3064

['en'] if 'en' in normal_sub_langs else [],

3065

filter(lambda f: f.startswith('en'), normal_sub_langs),

3066

['en'] if 'en' in all_sub_langs else [],

3067

filter(lambda f: f.startswith('en'), all_sub_langs),

3068

normal_sub_langs, all_sub_langs,

3069

))[:1]

3070

if requested_langs:

3071

self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')

3072

3073

formats_query = self.params.get('subtitlesformat', 'best')

3074

formats_preference = formats_query.split('/') if formats_query else []

3075

subs = {}

3076

for lang in requested_langs:

3077

formats = available_subs.get(lang)

3078

if formats is None:

3079

self.report_warning(f'{lang} subtitles not available for {video_id}')

3080

continue

3081

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "{}" for language {}, '

3093

'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

3098

if info_dict is None:

3099

return

3100

info_copy = info_dict.copy()

3101

info_copy.setdefault('filename', self.prepare_filename(info_dict))

3102

if info_dict.get('requested_formats') is not None:

3103

# For RTMP URLs, also include the playpath

3104

info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

3105

elif info_dict.get('url'):

3106

info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')

3107

info_copy['formats_table'] = self.render_formats_table(info_dict)

3108

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

3109

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

3110

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

3111

3112

def format_tmpl(tmpl):

3113

mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)

if not mobj:

return tmpl

fmt = '%({})s'

if tmpl.startswith('{'):

3119

tmpl, fmt = f'.{tmpl}', '%({})j'

3120

if tmpl.endswith('='):

3121

tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'

3122

return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))

3123

3124

for tmpl in self.params['forceprint'].get(key, []):

3125

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

3126

3127

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

3128

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

3129

tmpl = format_tmpl(tmpl)

3130

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

3131

if self._ensure_dir_exists(filename):

3132

with open(filename, 'a', encoding='utf-8', newline='') as f:

3133

f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)

return info_copy

def __forced_printings(self, info_dict, filename=None, incomplete=True):

3138

if (self.params.get('forcejson')

3139

or self.params['forceprint'].get('video')

3140

or self.params['print_to_file'].get('video')):

3141

self.post_extract(info_dict)

3142

if filename:

3143

info_dict['filename'] = filename

3144

info_copy = self._forceprint('video', info_dict)

3145

3146

def print_field(field, actual_field=None, optional=False):

3147

if actual_field is None:

3148

actual_field = field

3149

if self.params.get(f'force{field}') and (

3150

info_copy.get(field) is not None or (not optional and not incomplete)):

3151

self.to_stdout(info_copy[actual_field])

print_field('title')

print_field('id')

print_field('url', 'urls')

3156

print_field('thumbnail', optional=True)

3157

print_field('description', optional=True)

3158

print_field('filename')

3159

if self.params.get('forceduration') and info_copy.get('duration') is not None:

3160

self.to_stdout(formatSeconds(info_copy['duration']))

3161

print_field('format')

3162

3163

if self.params.get('forcejson'):

3164

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

3165

3166

def dl(self, name, info, subtitle=False, test=False):

3167

if not info.get('url'):

3168

self.raise_no_formats(info, True)

3169

3170

if test:

3171

verbose = self.params.get('verbose')

3172

params = {

3173

'test': True,

3174

'quiet': self.params.get('quiet') or not verbose,

3175

'verbose': verbose,

3176

'noprogress': not verbose,

3177

'nopart': True,

3178

'skip_unavailable_fragments': False,

3179

'keep_fragments': False,

3180

'overwrites': True,

3181

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

3186

if not test:

3187

for ph in self._progress_hooks:

3188

fd.add_progress_hook(ph)

3189

urls = '", "'.join(

3190

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

3191

for f in info.get('requested_formats', []) or [info])

3192

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

3193

3194

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

3195

# But it may contain objects that are not deep-copyable

3196

new_info = self._copy_infodict(info)

3197

if new_info.get('http_headers') is None:

3198

new_info['http_headers'] = self._calc_headers(new_info)

3199

return fd.download(name, new_info, subtitle)

3200

3201

def existing_file(self, filepaths, *, default_overwrite=True):

3202

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

3203

if existing_files and not self.params.get('overwrites', default_overwrite):

3204

return existing_files[0]

3205

3206

for file in existing_files:

3207

self.report_file_delete(file)

os.remove(file)

return None

@_catch_unsafe_extension_error

3212

def process_info(self, info_dict):

3213

"""Process a single resolved IE result. (Modifies it in-place)"""

3214

3215

assert info_dict.get('_type', 'video') == 'video'

3216

original_infodict = info_dict

3217

3218

if 'format' not in info_dict and 'ext' in info_dict:

3219

info_dict['format'] = info_dict['ext']

3220

3221

if self._match_entry(info_dict) is not None:

3222

info_dict['__write_download_archive'] = 'ignore'

3223

return

3224

3225

# Does nothing under normal operation - for backward compatibility of process_info

3226

self.post_extract(info_dict)

3227

3228

def replace_info_dict(new_info):

3229

nonlocal info_dict

3230

if new_info == info_dict:

3231

return

3232

info_dict.clear()

3233

info_dict.update(new_info)

3234

3235

new_info, _ = self.pre_process(info_dict, 'video')

3236

replace_info_dict(new_info)

3237

self._num_downloads += 1

3238

3239

# info_dict['_filename'] needs to be set for backward compatibility

3240

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

3241

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

3246

3247

def check_max_downloads():

3248

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

3249

raise MaxDownloadsReached

3250

3251

if self.params.get('simulate'):

3252

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3253

check_max_downloads()

3254

return

3255

3256

if full_filename is None:

3257

return

3258

if not self._ensure_dir_exists(encodeFilename(full_filename)):

3259

return

3260

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

3261

return

3262

3263

if self._write_description('video', info_dict,

3264

self.prepare_filename(info_dict, 'description')) is None:

3265

return

3266

3267

sub_files = self._write_subtitles(info_dict, temp_filename)

3268

if sub_files is None:

3269

return

3270

files_to_move.update(dict(sub_files))

3271

3272

thumb_files = self._write_thumbnails(

3273

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

3274

if thumb_files is None:

3275

return

3276

files_to_move.update(dict(thumb_files))

3277

3278

infofn = self.prepare_filename(info_dict, 'infojson')

3279

_infojson_written = self._write_info_json('video', info_dict, infofn)

3280

if _infojson_written:

3281

info_dict['infojson_filename'] = infofn

3282

# For backward compatibility, even though it was a private field

3283

info_dict['__infojson_filename'] = infofn

3284

elif _infojson_written is None:

3285

return

3286

3287

# Note: Annotations are deprecated

3288

annofn = None

3289

if self.params.get('writeannotations', False):

3290

annofn = self.prepare_filename(info_dict, 'annotation')

3291

if annofn:

3292

if not self._ensure_dir_exists(encodeFilename(annofn)):

3293

return

3294

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

3295

self.to_screen('[info] Video annotations are already present')

3296

elif not info_dict.get('annotations'):

3297

self.report_warning('There are no annotations to write.')

3298

else:

3299

try:

3300

self.to_screen('[info] Writing video annotations to: ' + annofn)

3301

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

3302

annofile.write(info_dict['annotations'])

3303

except (KeyError, TypeError):

3304

self.report_warning('There are no annotations to write.')

3305

except OSError:

3306

self.report_error('Cannot write annotations file: ' + annofn)

3307

return

3308

3309

# Write internet shortcut files

3310

def _write_link_file(link_type):

3311

url = try_get(info_dict['webpage_url'], iri_to_uri)

3312

if not url:

3313

self.report_warning(

3314

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3315

return True

3316

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3317

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3318

return False

3319

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3320

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3321

return True

3322

try:

3323

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3324

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3325

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3326

template_vars = {'url': url}

3327

if link_type == 'desktop':

3328

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3329

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3330

except OSError:

3331

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3337

'webloc': self.params.get('writewebloclink'),

3338

'desktop': self.params.get('writedesktoplink'),

3339

}

3340

if self.params.get('writelink'):

3341

link_type = ('webloc' if sys.platform == 'darwin'

3342

else 'desktop' if sys.platform.startswith('linux')

3343

else 'url')

3344

write_links[link_type] = True

3345

3346

if any(should_write and not _write_link_file(link_type)

3347

for link_type, should_write in write_links.items()):

3348

return

3349

3350

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3351

replace_info_dict(new_info)

3352

3353

if self.params.get('skip_download'):

3354

info_dict['filepath'] = temp_filename

3355

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3356

info_dict['__files_to_move'] = files_to_move

3357

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3358

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3359

else:

3360

# Download

3361

info_dict.setdefault('__postprocessors', [])

3362

try:

3363

3364

def existing_video_file(*filepaths):

3365

ext = info_dict.get('ext')

3366

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3367

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3368

default_overwrite=False)

3369

if file:

3370

info_dict['ext'] = os.path.splitext(file)[1][1:]

3371

return file

3372

3373

fd, success = None, True

3374

if info_dict.get('protocol') or info_dict.get('url'):

3375

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3376

if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (

3377

info_dict.get('section_start') or info_dict.get('section_end')):

3378

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3379

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3380

self.report_error(f'{msg}. Aborting')

3381

return

3382

3383

if info_dict.get('requested_formats') is not None:

3384

old_ext = info_dict['ext']

3385

if self.params.get('merge_output_format') is None:

3386

if (info_dict['ext'] == 'webm'

3387

and info_dict.get('thumbnails')

3388

# check with type instead of pp_key, __name__, or isinstance

3389

# since we dont want any custom PPs to trigger this

3390

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3391

info_dict['ext'] = 'mkv'

3392

self.report_warning(

3393

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3394

new_ext = info_dict['ext']

3395

3396

def correct_ext(filename, ext=new_ext):

3397

if filename == '-':

3398

return filename

3399

filename_real_ext = os.path.splitext(filename)[1][1:]

3400

filename_wo_ext = (

3401

os.path.splitext(filename)[0]

3402

if filename_real_ext in (old_ext, new_ext)

3403

else filename)

3404

return f'{filename_wo_ext}.{ext}'

3405

3406

# Ensure filename always has a correct extension for successful merge

3407

full_filename = correct_ext(full_filename)

3408

temp_filename = correct_ext(temp_filename)

3409

dl_filename = existing_video_file(full_filename, temp_filename)

3410

3411

info_dict['__real_download'] = False

3412

# NOTE: Copy so that original format dicts are not modified

3413

info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))

3414

3415

merger = FFmpegMergerPP(self)

3416

downloaded = []

3417

if dl_filename is not None:

3418

self.report_file_already_downloaded(dl_filename)

3419

elif fd:

3420

for f in info_dict['requested_formats'] if fd != FFmpegFD else []:

3421

f['filepath'] = fname = prepend_extension(

3422

correct_ext(temp_filename, info_dict['ext']),

3423

'f{}'.format(f['format_id']), info_dict['ext'])

3424

downloaded.append(fname)

3425

info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])

3426

success, real_download = self.dl(temp_filename, info_dict)

3427

info_dict['__real_download'] = real_download

3428

else:

3429

if self.params.get('allow_unplayable_formats'):

3430

self.report_warning(

3431

'You have requested merging of multiple formats '

3432

'while also allowing unplayable formats to be downloaded. '

3433

'The formats won\'t be merged to prevent data corruption.')

3434

elif not merger.available:

3435

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3436

if not self.params.get('ignoreerrors'):

3437

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3438

return

3439

self.report_warning(f'{msg}. The formats won\'t be merged')

3440

3441

if temp_filename == '-':

3442

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3443

else 'but the formats are incompatible for simultaneous download' if merger.available

3444

else 'but ffmpeg is not installed')

3445

self.report_warning(

3446

f'You have requested downloading multiple formats to stdout {reason}. '

3447

'The formats will be streamed one after the other')

3448

fname = temp_filename

3449

for f in info_dict['requested_formats']:

3450

new_info = dict(info_dict)

3451

del new_info['requested_formats']

3452

new_info.update(f)

3453

if temp_filename != '-':

3454

fname = prepend_extension(

3455

correct_ext(temp_filename, new_info['ext']),

3456

'f{}'.format(f['format_id']), new_info['ext'])

3457

if not self._ensure_dir_exists(fname):

3458

return

3459

f['filepath'] = fname

3460

downloaded.append(fname)

3461

partial_success, real_download = self.dl(fname, new_info)

3462

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3463

success = success and partial_success

3464

3465

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3466

info_dict['__postprocessors'].append(merger)

3467

info_dict['__files_to_merge'] = downloaded

3468

# Even if there were no downloads, it is being merged only now

3469

info_dict['__real_download'] = True

3470

else:

3471

for file in downloaded:

3472

files_to_move[file] = None

3473

else:

3474

# Just a single file

3475

dl_filename = existing_video_file(full_filename, temp_filename)

3476

if dl_filename is None or dl_filename == temp_filename:

3477

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3478

# So we should try to resume the download

3479

success, real_download = self.dl(temp_filename, info_dict)

3480

info_dict['__real_download'] = real_download

3481

else:

3482

self.report_file_already_downloaded(dl_filename)

3483

3484

dl_filename = dl_filename or temp_filename

3485

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3486

3487

except network_exceptions as err:

3488

self.report_error(f'unable to download video data: {err}')

3489

return

3490

except OSError as err:

3491

raise UnavailableVideoError(err)

3492

except ContentTooShortError as err:

3493

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3494

return

3495

3496

self._raise_pending_errors(info_dict)

3497

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3502

vid = info_dict['id']

3503

3504

if fixup_policy in ('ignore', 'never'):

3505

return

3506

elif fixup_policy == 'warn':

3507

do_fixup = 'warn'

3508

elif fixup_policy != 'force':

3509

assert fixup_policy in ('detect_or_warn', None)

3510

if not info_dict.get('__real_download'):

3511

do_fixup = False

3512

3513

def ffmpeg_fixup(cndn, msg, cls):

3514

if not (do_fixup and cndn):

3515

return

3516

elif do_fixup == 'warn':

3517

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3522

else:

3523

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3524

3525

stretched_ratio = info_dict.get('stretched_ratio')

3526

ffmpeg_fixup(stretched_ratio not in (1, None),

3527

f'Non-uniform pixel ratio {stretched_ratio}',

3528

FFmpegFixupStretchedPP)

3529

3530

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3531

downloader = downloader.FD_NAME if downloader else None

3532

3533

ext = info_dict.get('ext')

3534

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3535

isinstance(pp, FFmpegVideoConvertorPP)

3536

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3537

) for pp in self._pps['post_process'])

3538

3539

if not postprocessed_by_ffmpeg:

3540

ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'

3541

and info_dict.get('container') == 'm4a_dash',

3542

'writing DASH m4a. Only some players support this container',

3543

FFmpegFixupM4aPP)

3544

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3545

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3546

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3547

FFmpegFixupM3u8PP)

3548

ffmpeg_fixup(downloader == 'dashsegments'

3549

and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),

3550

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3551

3552

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3553

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3558

except PostProcessingError as err:

3559

self.report_error(f'Postprocessing: {err}')

3560

return

3561

try:

3562

for ph in self._post_hooks:

3563

ph(info_dict['filepath'])

3564

except Exception as err:

3565

self.report_error(f'post hooks: {err}')

3566

return

3567

info_dict['__write_download_archive'] = True

3568

3569

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3570

if self.params.get('force_write_download_archive'):

3571

info_dict['__write_download_archive'] = True

3572

check_max_downloads()

3573

3574

def __download_wrapper(self, func):

3575

@functools.wraps(func)

3576

def wrapper(*args, **kwargs):

3577

try:

3578

res = func(*args, **kwargs)

3579

except UnavailableVideoError as e:

3580

self.report_error(e)

3581

except DownloadCancelled as e:

3582

self.to_screen(f'[info] {e}')

3583

if not self.params.get('break_per_url'):

3584

raise

3585

self._num_downloads = 0

3586

else:

3587

if self.params.get('dump_single_json', False):

3588

self.post_extract(res)

3589

self.to_stdout(json.dumps(self.sanitize_info(res)))

3590

return wrapper

3591

3592

def download(self, url_list):

3593

"""Download a given list of URLs."""

3594

url_list = variadic(url_list) # Passing a single URL is a common mistake

3595

outtmpl = self.params['outtmpl']['default']

3596

if (len(url_list) > 1

3597

and outtmpl != '-'

3598

and '%' not in outtmpl

3599

and self.params.get('max_downloads') != 1):

3600

raise SameFileError(outtmpl)

3601

3602

for url in url_list:

3603

self.__download_wrapper(self.extract_info)(

3604

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3605

3606

return self._download_retcode

3607

3608

def download_with_info_file(self, info_filename):

3609

with contextlib.closing(fileinput.FileInput(

3610

[info_filename], mode='r',

3611

openhook=fileinput.hook_encoded('utf-8'))) as f:

3612

# FileInput doesn't have a read method, we can't call json.load

3613

infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))

3614

for info in variadic(json.loads('\n'.join(f)))]

3615

for info in infos:

3616

try:

3617

self.__download_wrapper(self.process_ie_result)(info, download=True)

3618

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3619

if not isinstance(e, EntryNotInPlaylist):

3620

self.to_stderr('\r')

3621

webpage_url = info.get('webpage_url')

3622

if webpage_url is None:

3623

raise

3624

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3625

self.download([webpage_url])

3626

except ExtractorError as e:

3627

self.report_error(e)

3628

return self._download_retcode

3629

3630

@staticmethod

3631

def sanitize_info(info_dict, remove_private_keys=False):

3632

""" Sanitize the infodict for converting to json """

3633

if info_dict is None:

3634

return info_dict

3635

info_dict.setdefault('epoch', int(time.time()))

3636

info_dict.setdefault('_type', 'video')

3637

info_dict.setdefault('_version', {

3638

'version': __version__,

3639

'current_git_head': current_git_head(),

3640

'release_git_head': RELEASE_GIT_HEAD,

3641

'repository': ORIGIN,

3642

})

3643

3644

if remove_private_keys:

3645

reject = lambda k, v: v is None or k.startswith('__') or k in {

3646

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3647

'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',

3648

'playlist_autonumber',

3649

}

3650

else:

3651

reject = lambda k, v: False

3652

3653

def filter_fn(obj):

3654

if isinstance(obj, dict):

3655

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3656

elif isinstance(obj, (list, tuple, set, LazyList)):

3657

return list(map(filter_fn, obj))

3658

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3664

3665

@staticmethod

3666

def filter_requested_info(info_dict, actually_filter=True):

3667

""" Alias of sanitize_info for backward compatibility """

3668

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3669

3670

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3671

for filename in set(filter(None, files_to_delete)):

3672

if msg:

3673

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3678

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3679

del info['__files_to_move'][filename]

3680

3681

@staticmethod

3682

def post_extract(info_dict):

3683

def actual_post_extract(info_dict):

3684

if info_dict.get('_type') in ('playlist', 'multi_video'):

3685

for video_dict in info_dict.get('entries', {}):

3686

actual_post_extract(video_dict or {})

3687

return

3688

3689

post_extractor = info_dict.pop('__post_extractor', None) or dict

3690

info_dict.update(post_extractor())

3691

3692

actual_post_extract(info_dict or {})

3693

3694

def run_pp(self, pp, infodict):

3695

files_to_delete = []

3696

if '__files_to_move' not in infodict:

3697

infodict['__files_to_move'] = {}

3698

try:

3699

files_to_delete, infodict = pp.run(infodict)

3700

except PostProcessingError as e:

3701

# Must be True and not 'only_download'

3702

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3708

return infodict

3709

if self.params.get('keepvideo', False):

3710

for f in files_to_delete:

3711

infodict['__files_to_move'].setdefault(f, '')

3712

else:

3713

self._delete_downloaded_files(

3714

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3715

return infodict

3716

3717

def run_all_pps(self, key, info, *, additional_pps=None):

3718

if key != 'video':

3719

self._forceprint(key, info)

3720

for pp in (additional_pps or []) + self._pps[key]:

3721

info = self.run_pp(pp, info)

3722

return info

3723

3724

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3725

info = dict(ie_info)

3726

info['__files_to_move'] = files_to_move or {}

3727

try:

3728

info = self.run_all_pps(key, info)

3729

except PostProcessingError as err:

3730

msg = f'Preprocessing: {err}'

3731

info.setdefault('__pending_error', msg)

3732

self.report_error(msg, is_error=False)

3733

return info, info.pop('__files_to_move', None)

3734

3735

def post_process(self, filename, info, files_to_move=None):

3736

"""Run all the postprocessors on the given file."""

3737

info['filepath'] = filename

3738

info['__files_to_move'] = files_to_move or {}

3739

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3740

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3741

del info['__files_to_move']

3742

return self.run_all_pps('after_move', info)

3743

3744

def _make_archive_id(self, info_dict):

3745

video_id = info_dict.get('id')

3746

if not video_id:

3747

return

3748

# Future-proof against any change in case

3749

# and backwards compatibility with prior versions

3750

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3751

if extractor is None:

3752

url = str_or_none(info_dict.get('url'))

3753

if not url:

3754

return

3755

# Try to find matching extractor for the URL and take its ie_key

3756

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3763

3764

def in_download_archive(self, info_dict):

if not self.archive:

return False

vid_ids = [self._make_archive_id(info_dict)]

3769

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3770

return any(id_ in self.archive for id_ in vid_ids)

3771

3772

def record_download_archive(self, info_dict):

3773

fn = self.params.get('download_archive')

3774

if fn is None:

3775

return

3776

vid_id = self._make_archive_id(info_dict)

3777

assert vid_id

3778

3779

self.write_debug(f'Adding to archive: {vid_id}')

3780

if is_path_like(fn):

3781

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3782

archive_file.write(vid_id + '\n')

3783

self.archive.add(vid_id)

3784

3785

@staticmethod

3786

def format_resolution(format, default='unknown'):

3787

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3788

return 'audio only'

3789

if format.get('resolution') is not None:

3790

return format['resolution']

3791

if format.get('width') and format.get('height'):

3792

return '%dx%d' % (format['width'], format['height'])

3793

elif format.get('height'):

3794

return '{}p'.format(format['height'])

3795

elif format.get('width'):

3796

return '%dx?' % format['width']

3797

return default

3798

3799

def _list_format_headers(self, *headers):

3800

if self.params.get('listformats_table', True) is not False:

3801

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3802

return headers

3803

3804

def _format_note(self, fdict):

3805

res = ''

3806

if fdict.get('ext') in ['f4f', 'f4m']:

3807

res += '(unsupported)'

3808

if fdict.get('language'):

3809

if res:

3810

res += ' '

3811

res += '[{}]'.format(fdict['language'])

3812

if fdict.get('format_note') is not None:

3813

if res:

3814

res += ' '

3815

res += fdict['format_note']

3816

if fdict.get('tbr') is not None:

3817

if res:

3818

res += ', '

3819

res += '%4dk' % fdict['tbr']

3820

if fdict.get('container') is not None:

3821

if res:

3822

res += ', '

3823

res += '{} container'.format(fdict['container'])

3824

if (fdict.get('vcodec') is not None

3825

and fdict.get('vcodec') != 'none'):

3826

if res:

3827

res += ', '

3828

res += fdict['vcodec']

3829

if fdict.get('vbr') is not None:

3830

res += '@'

3831

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3832

res += 'video@'

3833

if fdict.get('vbr') is not None:

3834

res += '%4dk' % fdict['vbr']

3835

if fdict.get('fps') is not None:

3836

if res:

3837

res += ', '

3838

res += '{}fps'.format(fdict['fps'])

3839

if fdict.get('acodec') is not None:

3840

if res:

3841

res += ', '

3842

if fdict['acodec'] == 'none':

3843

res += 'video only'

3844

else:

3845

res += '%-5s' % fdict['acodec']

3846

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3851

res += '@%3dk' % fdict['abr']

3852

if fdict.get('asr') is not None:

3853

res += ' (%5dHz)' % fdict['asr']

3854

if fdict.get('filesize') is not None:

3855

if res:

3856

res += ', '

3857

res += format_bytes(fdict['filesize'])

3858

elif fdict.get('filesize_approx') is not None:

3859

if res:

3860

res += ', '

3861

res += '~' + format_bytes(fdict['filesize_approx'])

3862

return res

3863

3864

def _get_formats(self, info_dict):

3865

if info_dict.get('formats') is None:

3866

if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':

3867

return [info_dict]

3868

return []

3869

return info_dict['formats']

3870

3871

def render_formats_table(self, info_dict):

3872

formats = self._get_formats(info_dict)

3873

if not formats:

3874

return

3875

if not self.params.get('listformats_table', True) is not False:

3876

table = [

3877

[

3878

format_field(f, 'format_id'),

3879

format_field(f, 'ext'),

3880

self.format_resolution(f),

3881

self._format_note(f),

3882

] for f in formats if (f.get('preference') or 0) >= -1000]

3883

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3884

3885

def simplified_codec(f, field):

3886

assert field in ('acodec', 'vcodec')

codec = f.get(field)

if not codec:

return 'unknown'

elif codec != 'none':

3891

return '.'.join(codec.split('.')[:4])

3892

3893

if field == 'vcodec' and f.get('acodec') == 'none':

3894

return 'images'

3895

elif field == 'acodec' and f.get('vcodec') == 'none':

3896

return ''

3897

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3898

self.Styles.SUPPRESS)

3899

3900

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3901

table = [

3902

[

3903

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3904

format_field(f, 'ext'),

3905

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3906

format_field(f, 'fps', '\t%d', func=round),

3907

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3908

format_field(f, 'audio_channels', '\t%s'),

3909

delim, (

3910

format_field(f, 'filesize', ' \t%s', func=format_bytes)

3911

or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)

3912

or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,

3913

self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),

3914

format_field(f, 'tbr', '\t%dk', func=round),

3915

shorten_protocol_name(f.get('protocol', '')),

3916

delim,

3917

simplified_codec(f, 'vcodec'),

3918

format_field(f, 'vbr', '\t%dk', func=round),

3919

simplified_codec(f, 'acodec'),

3920

format_field(f, 'abr', '\t%dk', func=round),

3921

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3922

join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(

3923

self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,

3924

(self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'

3925

else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),

3926

format_field(f, 'format_note'),

3927

format_field(f, 'container', ignore=(None, f.get('ext'))),

3928

delim=', '), delim=' '),

3929

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3930

header_line = self._list_format_headers(

3931

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3932

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3933

3934

return render_table(

3935

header_line, table, hide_empty=True,

3936

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3937

3938

def render_thumbnails_table(self, info_dict):

3939

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3944

[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])

3945

3946

def render_subtitles_table(self, video_id, subtitles):

3947

def _row(lang, formats):

3948

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3949

if len(set(names)) == 1:

3950

names = [] if names[0] == 'unknown' else names[:1]

3951

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3957

[_row(lang, formats) for lang, formats in subtitles.items()],

3958

hide_empty=True)

3959

3960

def __list_table(self, video_id, name, func, *args):

3961

table = func(*args)

3962

if not table:

3963

self.to_screen(f'{video_id} has no {name}')

3964

return

3965

self.to_screen(f'[info] Available {name} for {video_id}:')

3966

self.to_stdout(table)

3967

3968

def list_formats(self, info_dict):

3969

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3970

3971

def list_thumbnails(self, info_dict):

3972

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3973

3974

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3975

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3976

3977

def print_debug_header(self):

3978

if not self.params.get('verbose'):

3979

return

3980

3981

from . import _IN_CLI # Must be delayed import

3982

3983

# These imports can be slow. So import them only as needed

3984

from .extractor.extractors import _LAZY_LOADER

3985

from .extractor.extractors import (

3986

_PLUGIN_CLASSES as plugin_ies,

3987

_PLUGIN_OVERRIDES as plugin_ie_overrides,

3988

)

3989

3990

def get_encoding(stream):

3991

ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))

3992

additional_info = []

3993

if os.environ.get('TERM', '').lower() == 'dumb':

3994

additional_info.append('dumb')

3995

if not supports_terminal_sequences(stream):

3996

from .utils import WINDOWS_VT_MODE # Must be imported locally

3997

additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')

3998

if additional_info:

3999

ret = f'{ret} ({",".join(additional_info)})'

4000

return ret

4001

4002

encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(

4003

locale.getpreferredencoding(),

4004

sys.getfilesystemencoding(),

4005

self.get_encoding(),

4006

', '.join(

4007

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

4008

if stream is not None and key != 'console'),

4009

)

4010

4011

logger = self.params.get('logger')

4012

if logger:

4013

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

4014

write_debug(encoding_str)

4015

else:

4016

write_string(f'[debug] {encoding_str}\n', encoding=None)

4017

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

4018

4019

source = detect_variant()

4020

if VARIANT not in (None, 'pip'):

4021

source += '*'

4022

klass = type(self)

4023

write_debug(join_nonempty(

4024

f'{REPOSITORY.rpartition("/")[2]} version',

4025

_make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),

4026

f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',

4027

'' if source == 'unknown' else f'({source})',

4028

'' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',

delim=' '))

if not _IN_CLI:

write_debug(f'params: {self.params}')

4033

4034

if not _LAZY_LOADER:

4035

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

4036

write_debug('Lazy loading extractors is forcibly disabled')

4037

else:

4038

write_debug('Lazy loading extractors is disabled')

4039

if self.params['compat_opts']:

4040

write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))

4041

4042

if current_git_head():

4043

write_debug(f'Git HEAD: {current_git_head()}')

4044

write_debug(system_identifier())

4045

4046

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

4047

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

4048

if ffmpeg_features:

4049

exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))

4050

4051

exe_versions['rtmpdump'] = rtmpdump_version()

4052

exe_versions['phantomjs'] = PhantomJSwrapper._version()

4053

exe_str = ', '.join(

4054

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

4055

) or 'none'

4056

write_debug(f'exe versions: {exe_str}')

4057

4058

from .compat.compat_utils import get_package_info

4059

from .dependencies import available_dependencies

4060

4061

write_debug('Optional libraries: %s' % (', '.join(sorted({

4062

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

4063

})) or 'none'))

4064

4065

write_debug(f'Proxy map: {self.proxies}')

4066

write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')

4067

for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():

4068

display_list = ['{}{}'.format(

4069

klass.__name__, '' if klass.__name__ == name else f' as {name}')

4070

for name, klass in plugins.items()]

4071

if plugin_type == 'Extractor':

4072

display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'

4073

for parent, plugins in plugin_ie_overrides.items())

4074

if not display_list:

4075

continue

4076

write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')

4077

4078

plugin_dirs = plugin_directories()

4079

if plugin_dirs:

4080

write_debug(f'Plugin directories: {plugin_dirs}')

4081

4082

# Not implemented

4083

if False and self.params.get('call_home'):

4084

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

4085

write_debug(f'Public IP address: {ipaddr}')

4086

latest_version = self.urlopen(

4087

'https://yt-dl.org/latest/version').read().decode()

4088

if version_tuple(latest_version) > version_tuple(__version__):

4089

self.report_warning(

4090

f'You are using an outdated version (newest version: {latest_version})! '

4091

'See https://yt-dl.org/update if you need help updating.')

4092

4093

@functools.cached_property

4094

def proxies(self):

4095

"""Global proxy configuration"""

4096

opts_proxy = self.params.get('proxy')

4097

if opts_proxy is not None:

4098

if opts_proxy == '':

4099

opts_proxy = '__noproxy__'

4100

proxies = {'all': opts_proxy}

4101

else:

4102

proxies = urllib.request.getproxies()

4103

# compat. Set HTTPS_PROXY to __noproxy__ to revert

4104

if 'http' in proxies and 'https' not in proxies:

4105

proxies['https'] = proxies['http']

return proxies

@functools.cached_property

4110

def cookiejar(self):

4111

"""Global cookiejar instance"""

4112

return load_cookies(

4113

self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)

@property

def _opener(self):

"""

Get a urllib OpenerDirector from the Urllib handler (deprecated).

4119

"""

4120

self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')

4121

handler = self._request_director.handlers['Urllib']

4122

return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)

4123

4124

def _get_available_impersonate_targets(self):

4125

# TODO(future): make available as public API

4126

return [

4127

(target, rh.RH_NAME)

4128

for rh in self._request_director.handlers.values()

4129

if isinstance(rh, ImpersonateRequestHandler)

4130

for target in rh.supported_targets

4131

]

4132

4133

def _impersonate_target_available(self, target):

4134

# TODO(future): make available as public API

4135

return any(

4136

rh.is_supported_target(target)

4137

for rh in self._request_director.handlers.values()

4138

if isinstance(rh, ImpersonateRequestHandler))

4139

4140

def urlopen(self, req):

4141

""" Start an HTTP download """

4142

if isinstance(req, str):

4143

req = Request(req)

4144

elif isinstance(req, urllib.request.Request):

4145

self.deprecation_warning(

4146

'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '

4147

'Use yt_dlp.networking.common.Request instead.')

4148

req = urllib_req_to_req(req)

4149

assert isinstance(req, Request)

4150

4151

# compat: Assume user:pass url params are basic auth

4152

url, basic_auth_header = extract_basic_auth(req.url)

4153

if basic_auth_header:

4154

req.headers['Authorization'] = basic_auth_header

4155

req.url = sanitize_url(url)

4156

4157

clean_proxies(proxies=req.proxies, headers=req.headers)

4158

clean_headers(req.headers)

4159

4160

try:

4161

return self._request_director.send(req)

4162

except NoSupportingHandlers as e:

4163

for ue in e.unsupported_errors:

4164

# FIXME: This depends on the order of errors.

4165

if not (ue.handler and ue.msg):

4166

continue

4167

if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():

4168

raise RequestError(

4169

'file:// URLs are disabled by default in yt-dlp for security reasons. '

4170

'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue

4171

if (

4172

'unsupported proxy type: "https"' in ue.msg.lower()

4173

and 'requests' not in self._request_director.handlers

4174

and 'curl_cffi' not in self._request_director.handlers

4175

):

4176

raise RequestError(

4177

'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')

4178

4179

elif (

4180

re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())

4181

and 'websockets' not in self._request_director.handlers

4182

):

4183

raise RequestError(

4184

'This request requires WebSocket support. '

4185

'Ensure one of the following dependencies are installed: websockets',

4186

cause=ue) from ue

4187

4188

elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):

4189

raise RequestError(

4190

f'Impersonate target "{req.extensions["impersonate"]}" is not available.'

4191

f' See --list-impersonate-targets for available targets.'

4192

f' This request requires browser impersonation, however you may be missing dependencies'

4193

f' required to support this target.')

4194

raise

4195

except SSLError as e:

4196

if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):

4197

raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e

4198

elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):

4199

raise RequestError(

4200

'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '

4201

'Try using --legacy-server-connect', cause=e) from e

4202

raise

4203

4204

def build_request_director(self, handlers, preferences=None):

4205

logger = _YDLLogger(self)

4206

headers = self.params['http_headers'].copy()

4207

proxies = self.proxies.copy()

4208

clean_headers(headers)

4209

clean_proxies(proxies, headers)

4210

4211

director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))

4212

for handler in handlers:

4213

director.add_handler(handler(

4214

logger=logger,

4215

headers=headers,

4216

cookiejar=self.cookiejar,

4217

proxies=proxies,

4218

prefer_system_certs='no-certifi' in self.params['compat_opts'],

4219

verify=not self.params.get('nocheckcertificate'),

4220

**traverse_obj(self.params, {

4221

'verbose': 'debug_printtraffic',

4222

'source_address': 'source_address',

4223

'timeout': 'socket_timeout',

4224

'legacy_ssl_support': 'legacyserverconnect',

4225

'enable_file_urls': 'enable_file_urls',

4226

'impersonate': 'impersonate',

4227

'client_cert': {

4228

'client_certificate': 'client_certificate',

4229

'client_certificate_key': 'client_certificate_key',

4230

'client_certificate_password': 'client_certificate_password',

},

}),

))

director.preferences.update(preferences or [])

4235

if 'prefer-legacy-http-handler' in self.params['compat_opts']:

4236

director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)

4237

return director

4238

4239

@functools.cached_property

4240

def _request_director(self):

4241

return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)

4242

4243

def encode(self, s):

4244

if isinstance(s, bytes):

4245

return s # Already encoded

4246

4247

try:

4248

return s.encode(self.get_encoding())

4249

except UnicodeEncodeError as err:

4250

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

4251

raise

4252

4253

def get_encoding(self):

4254

encoding = self.params.get('encoding')

4255

if encoding is None:

4256

encoding = preferredencoding()

4257

return encoding

4258

4259

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

4260

""" Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """

4261

if overwrite is None:

4262

overwrite = self.params.get('overwrites', True)

4263

if not self.params.get('writeinfojson'):

4264

return False

4265

elif not infofn:

4266

self.write_debug(f'Skipping writing {label} infojson')

4267

return False

4268

elif not self._ensure_dir_exists(infofn):

4269

return None

4270

elif not overwrite and os.path.exists(infofn):

4271

self.to_screen(f'[info] {label.title()} metadata is already present')

4272

return 'exists'

4273

4274

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

4275

try:

4276

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

4277

return True

4278

except OSError:

4279

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

4280

return None

4281

4282

def _write_description(self, label, ie_result, descfn):

4283

""" Write description and returns True = written, False = skip, None = error """

4284

if not self.params.get('writedescription'):

4285

return False

4286

elif not descfn:

4287

self.write_debug(f'Skipping writing {label} description')

4288

return False

4289

elif not self._ensure_dir_exists(descfn):

4290

return None

4291

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

4292

self.to_screen(f'[info] {label.title()} description is already present')

4293

elif ie_result.get('description') is None:

4294

self.to_screen(f'[info] There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

4299

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

4300

descfile.write(ie_result['description'])

4301

except OSError:

4302

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

4307

""" Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""

4308

ret = []

4309

subtitles = info_dict.get('requested_subtitles')

4310

if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

4311

# subtitles download errors are already managed as troubles in relevant IE

4312

# that way it will silently go on when used with unsupporting IE

4313

return ret

4314

elif not subtitles:

4315

self.to_screen('[info] There are no subtitles for the requested languages')

4316

return ret

4317

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

4318

if not sub_filename_base:

4319

self.to_screen('[info] Skipping writing video subtitles')

4320

return ret

4321

4322

for sub_lang, sub_info in subtitles.items():

4323

sub_format = sub_info['ext']

4324

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

4325

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

4326

existing_sub = self.existing_file((sub_filename_final, sub_filename))

4327

if existing_sub:

4328

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

4329

sub_info['filepath'] = existing_sub

4330

ret.append((existing_sub, sub_filename_final))

4331

continue

4332

4333

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

4334

if sub_info.get('data') is not None:

4335

try:

4336

# Use newline='' to prevent conversion of newline characters

4337

# See https://github.com/ytdl-org/youtube-dl/issues/10268

4338

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

4339

subfile.write(sub_info['data'])

4340

sub_info['filepath'] = sub_filename

4341

ret.append((sub_filename, sub_filename_final))

4342

continue

4343

except OSError:

4344

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

4349

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

4350

self.dl(sub_filename, sub_copy, subtitle=True)

4351

sub_info['filepath'] = sub_filename

4352

ret.append((sub_filename, sub_filename_final))

4353

except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:

4354

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

4355

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

4356

if not self.params.get('ignoreerrors'):

4357

self.report_error(msg)

4358

raise DownloadError(msg)

4359

self.report_warning(msg)

4360

return ret

4361

4362

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

4363

""" Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """

4364

write_all = self.params.get('write_all_thumbnails', False)

4365

thumbnails, ret = [], []

4366

if write_all or self.params.get('writethumbnail', False):

4367

thumbnails = info_dict.get('thumbnails') or []

4368

if not thumbnails:

4369

self.to_screen(f'[info] There are no {label} thumbnails to download')

4370

return ret

4371

multiple = write_all and len(thumbnails) > 1

4372

4373

if thumb_filename_base is None:

4374

thumb_filename_base = filename

4375

if thumbnails and not thumb_filename_base:

4376

self.write_debug(f'Skipping writing {label} thumbnail')

4377

return ret

4378

4379

if thumbnails and not self._ensure_dir_exists(filename):

4380

return None

4381

4382

for idx, t in list(enumerate(thumbnails))[::-1]:

4383

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

4384

thumb_display_id = f'{label} thumbnail {t["id"]}'

4385

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

4386

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

4387

4388

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

4389

if existing_thumb:

4390

self.to_screen('[info] {} is already present'.format((

4391

thumb_display_id if multiple else f'{label} thumbnail').capitalize()))

4392

t['filepath'] = existing_thumb

4393

ret.append((existing_thumb, thumb_filename_final))

4394

else:

4395

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

4396

try:

4397

uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))

4398

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

4399

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

4400

shutil.copyfileobj(uf, thumbf)

4401

ret.append((thumb_filename, thumb_filename_final))

4402

t['filepath'] = thumb_filename

4403

except network_exceptions as err:

4404

if isinstance(err, HTTPError) and err.status == 404:

4405

self.to_screen(f'[info] {thumb_display_id.title()} does not exist')

4406

else:

4407

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

4408

thumbnails.pop(idx)

4409

if ret and not write_all:

4410

break

4411

return ret