jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import datetime
	4	import errno
	5	import fileinput
	6	import functools
	7	import io
	8	import itertools
	9	import json
	10	import locale
	11	import operator
	12	import os
	13	import random
	14	import re
	15	import shutil
	16	import subprocess
	17	import sys
	18	import tempfile
	19	import time
	20	import tokenize
	21	import traceback
	22	import unicodedata
	23	import urllib.request
	24	from string import ascii_letters
	25
	26	from .cache import Cache
	27	from .compat import compat_os_name, compat_shlex_quote
	28	from .cookies import load_cookies
	29	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	30	from .downloader.rtmp import rtmpdump_version
	31	from .extractor import gen_extractor_classes, get_info_extractor
	32	from .extractor.common import UnsupportedURLIE
	33	from .extractor.openload import PhantomJSwrapper
	34	from .minicurses import format_text
	35	from .plugins import directories as plugin_directories
	36	from .postprocessor import _PLUGIN_CLASSES as plugin_pps
	37	from .postprocessor import (
	38	EmbedThumbnailPP,
	39	FFmpegFixupDuplicateMoovPP,
	40	FFmpegFixupDurationPP,
	41	FFmpegFixupM3u8PP,
	42	FFmpegFixupM4aPP,
	43	FFmpegFixupStretchedPP,
	44	FFmpegFixupTimestampPP,
	45	FFmpegMergerPP,
	46	FFmpegPostProcessor,
	47	FFmpegVideoConvertorPP,
	48	MoveFilesAfterDownloadPP,
	49	get_postprocessor,
	50	)
	51	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	52	from .update import REPOSITORY, current_git_head, detect_variant
	53	from .utils import (
	54	DEFAULT_OUTTMPL,
	55	IDENTITY,
	56	LINK_TEMPLATES,
	57	MEDIA_EXTENSIONS,
	58	NO_DEFAULT,
	59	NUMBER_RE,
	60	OUTTMPL_TYPES,
	61	POSTPROCESS_WHEN,
	62	STR_FORMAT_RE_TMPL,
	63	STR_FORMAT_TYPES,
	64	ContentTooShortError,
	65	DateRange,
	66	DownloadCancelled,
	67	DownloadError,
	68	EntryNotInPlaylist,
	69	ExistingVideoReached,
	70	ExtractorError,
	71	FormatSorter,
	72	GeoRestrictedError,
	73	HEADRequest,
	74	ISO3166Utils,
	75	LazyList,
	76	MaxDownloadsReached,
	77	Namespace,
	78	PagedList,
	79	PerRequestProxyHandler,
	80	PlaylistEntries,
	81	Popen,
	82	PostProcessingError,
	83	ReExtractInfo,
	84	RejectedVideoReached,
	85	SameFileError,
	86	UnavailableVideoError,
	87	UserNotLive,
	88	YoutubeDLCookieProcessor,
	89	YoutubeDLHandler,
	90	YoutubeDLRedirectHandler,
	91	age_restricted,
	92	args_to_str,
	93	bug_reports_message,
	94	date_from_str,
	95	deprecation_warning,
	96	determine_ext,
	97	determine_protocol,
	98	encode_compat_str,
	99	encodeFilename,
	100	error_to_compat_str,
	101	escapeHTML,
	102	expand_path,
	103	filter_dict,
	104	float_or_none,
	105	format_bytes,
	106	format_decimal_suffix,
	107	format_field,
	108	formatSeconds,
	109	get_compatible_ext,
	110	get_domain,
	111	int_or_none,
	112	iri_to_uri,
	113	is_path_like,
	114	join_nonempty,
	115	locked_file,
	116	make_archive_id,
	117	make_dir,
	118	make_HTTPS_handler,
	119	merge_headers,
	120	network_exceptions,
	121	number_of_digits,
	122	orderedSet,
	123	orderedSet_from_options,
	124	parse_filesize,
	125	preferredencoding,
	126	prepend_extension,
	127	register_socks_protocols,
	128	remove_terminal_sequences,
	129	render_table,
	130	replace_extension,
	131	sanitize_filename,
	132	sanitize_path,
	133	sanitize_url,
	134	sanitized_Request,
	135	std_headers,
	136	str_or_none,
	137	strftime_or_none,
	138	subtitles_filename,
	139	supports_terminal_sequences,
	140	system_identifier,
	141	timetuple_from_msec,
	142	to_high_limit_path,
	143	traverse_obj,
	144	try_call,
	145	try_get,
	146	url_basename,
	147	variadic,
	148	version_tuple,
	149	windows_enable_vt_mode,
	150	write_json_file,
	151	write_string,
	152	)
	153	from .version import RELEASE_GIT_HEAD, VARIANT, __version__
	154
	155	if compat_os_name == 'nt':
	156	import ctypes
	157
	158
	159	class YoutubeDL:
	160	"""YoutubeDL class.
	161
	162	YoutubeDL objects are the ones responsible of downloading the
	163	actual video file and writing it to disk if the user has requested
	164	it, among some other tasks. In most cases there should be one per
	165	program. As, given a video URL, the downloader doesn't know how to
	166	extract all the needed information, task that InfoExtractors do, it
	167	has to pass the URL to one of them.
	168
	169	For this, YoutubeDL objects have a method that allows
	170	InfoExtractors to be registered in a given order. When it is passed
	171	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	172	finds that reports being able to handle it. The InfoExtractor extracts
	173	all the information about the video or videos the URL refers to, and
	174	YoutubeDL process the extracted information, possibly using a File
	175	Downloader to download the video.
	176
	177	YoutubeDL objects accept a lot of parameters. In order not to saturate
	178	the object constructor with arguments, it receives a dictionary of
	179	options instead. These options are available through the params
	180	attribute for the InfoExtractors to use. The YoutubeDL also
	181	registers itself as the downloader in charge for the InfoExtractors
	182	that are added to it, so this is a "mutual registration".
	183
	184	Available options:
	185
	186	username: Username for authentication purposes.
	187	password: Password for authentication purposes.
	188	videopassword: Password for accessing a video.
	189	ap_mso: Adobe Pass multiple-system operator identifier.
	190	ap_username: Multiple-system operator account username.
	191	ap_password: Multiple-system operator account password.
	192	usenetrc: Use netrc for authentication instead.
	193	verbose: Print additional info to stdout.
	194	quiet: Do not print messages to stdout.
	195	no_warnings: Do not print out anything for warnings.
	196	forceprint: A dict with keys WHEN mapped to a list of templates to
	197	print to stdout. The allowed keys are video or any of the
	198	items in utils.POSTPROCESS_WHEN.
	199	For compatibility, a single list is also accepted
	200	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	201	a list of tuples with (template, filename)
	202	forcejson: Force printing info_dict as JSON.
	203	dump_single_json: Force printing the info_dict of the whole playlist
	204	(or video) as a single JSON line.
	205	force_write_download_archive: Force writing download archive regardless
	206	of 'skip_download' or 'simulate'.
	207	simulate: Do not download the video files. If unset (or None),
	208	simulate only if listsubtitles, listformats or list_thumbnails is used
	209	format: Video format code. see "FORMAT SELECTION" for more details.
	210	You can also pass a function. The function takes 'ctx' as
	211	argument and returns the formats to download.
	212	See "build_format_selector" for an implementation
	213	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	214	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	215	extracting metadata even if the video is not actually
	216	available for download (experimental)
	217	format_sort: A list of fields by which to sort the video formats.
	218	See "Sorting Formats" for more details.
	219	format_sort_force: Force the given format_sort. see "Sorting Formats"
	220	for more details.
	221	prefer_free_formats: Whether to prefer video formats with free containers
	222	over non-free ones of same quality.
	223	allow_multiple_video_streams: Allow multiple video streams to be merged
	224	into a single file
	225	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	226	into a single file
	227	check_formats Whether to test if the formats are downloadable.
	228	Can be True (check all), False (check none),
	229	'selected' (check selected formats),
	230	or None (check only if requested by extractor)
	231	paths: Dictionary of output paths. The allowed keys are 'home'
	232	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	233	outtmpl: Dictionary of templates for output names. Allowed keys
	234	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	235	For compatibility with youtube-dl, a single string can also be used
	236	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	237	restrictfilenames: Do not allow "&" and spaces in file names
	238	trim_file_name: Limit length of filename (extension excluded)
	239	windowsfilenames: Force the filenames to be windows compatible
	240	ignoreerrors: Do not stop on download/postprocessing errors.
	241	Can be 'only_download' to ignore only download errors.
	242	Default is 'only_download' for CLI, but False for API
	243	skip_playlist_after_errors: Number of allowed failures until the rest of
	244	the playlist is skipped
	245	allowed_extractors: List of regexes to match against extractor names that are allowed
	246	overwrites: Overwrite all video and metadata files if True,
	247	overwrite only non-video files if None
	248	and don't overwrite any file if False
	249	For compatibility with youtube-dl,
	250	"nooverwrites" may also be used instead
	251	playlist_items: Specific indices of playlist to download.
	252	playlistrandom: Download playlist items in random order.
	253	lazy_playlist: Process playlist entries as they are received.
	254	matchtitle: Download only matching titles.
	255	rejecttitle: Reject downloads for matching titles.
	256	logger: Log messages to a logging.Logger instance.
	257	logtostderr: Print everything to stderr instead of stdout.
	258	consoletitle: Display progress in console window's titlebar.
	259	writedescription: Write the video description to a .description file
	260	writeinfojson: Write the video description to a .info.json file
	261	clean_infojson: Remove private fields from the infojson
	262	getcomments: Extract video comments. This will not be written to disk
	263	unless writeinfojson is also given
	264	writeannotations: Write the video annotations to a .annotations.xml file
	265	writethumbnail: Write the thumbnail image to a file
	266	allow_playlist_files: Whether to write playlists' description, infojson etc
	267	also to disk when using the 'write*' options
	268	write_all_thumbnails: Write all thumbnail formats to files
	269	writelink: Write an internet shortcut file, depending on the
	270	current platform (.url/.webloc/.desktop)
	271	writeurllink: Write a Windows internet shortcut file (.url)
	272	writewebloclink: Write a macOS internet shortcut file (.webloc)
	273	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	274	writesubtitles: Write the video subtitles to a file
	275	writeautomaticsub: Write the automatically generated subtitles to a file
	276	listsubtitles: Lists all available subtitles for the video
	277	subtitlesformat: The format code for subtitles
	278	subtitleslangs: List of languages of the subtitles to download (can be regex).
	279	The list may contain "all" to refer to all the available
	280	subtitles. The language can be prefixed with a "-" to
	281	exclude it from the requested languages, e.g. ['all', '-live_chat']
	282	keepvideo: Keep the video file after post-processing
	283	daterange: A DateRange object, download only if the upload_date is in the range.
	284	skip_download: Skip the actual download of the video file
	285	cachedir: Location of the cache files in the filesystem.
	286	False to disable filesystem cache.
	287	noplaylist: Download single video instead of a playlist if in doubt.
	288	age_limit: An integer representing the user's age in years.
	289	Unsuitable videos for the given age are skipped.
	290	min_views: An integer representing the minimum view count the video
	291	must have in order to not be skipped.
	292	Videos without view count information are always
	293	downloaded. None for no limit.
	294	max_views: An integer representing the maximum view count.
	295	Videos that are more popular than that are not
	296	downloaded.
	297	Videos without view count information are always
	298	downloaded. None for no limit.
	299	download_archive: A set, or the name of a file where all downloads are recorded.
	300	Videos already present in the file are not downloaded again.
	301	break_on_existing: Stop the download process after attempting to download a
	302	file that is in the archive.
	303	break_on_reject: Stop the download process when encountering a video that
	304	has been filtered out.
	305	break_per_url: Whether break_on_reject and break_on_existing
	306	should act on each input URL as opposed to for the entire queue
	307	cookiefile: File name or text stream from where cookies should be read and dumped to
	308	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	309	name/path from where cookies are loaded, the name of the keyring,
	310	and the container name, e.g. ('chrome', ) or
	311	('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
	312	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	313	support RFC 5746 secure renegotiation
	314	nocheckcertificate: Do not verify SSL certificates
	315	client_certificate: Path to client certificate file in PEM format. May include the private key
	316	client_certificate_key: Path to private key file for client certificate
	317	client_certificate_password: Password for client certificate private key, if encrypted.
	318	If not provided and the key is encrypted, yt-dlp will ask interactively
	319	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	320	(Only supported by some extractors)
	321	enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
	322	http_headers: A dictionary of custom headers to be used for all requests
	323	proxy: URL of the proxy server to use
	324	geo_verification_proxy: URL of the proxy to use for IP address verification
	325	on geo-restricted sites.
	326	socket_timeout: Time to wait for unresponsive hosts, in seconds
	327	bidi_workaround: Work around buggy terminals without bidirectional text
	328	support, using fridibi
	329	debug_printtraffic:Print out sent and received HTTP traffic
	330	default_search: Prepend this string if an input url is not valid.
	331	'auto' for elaborate guessing
	332	encoding: Use this encoding instead of the system-specified.
	333	extract_flat: Whether to resolve and process url_results further
	334	* False: Always process (default)
	335	* True: Never process
	336	* 'in_playlist': Do not process inside playlist/multi_video
	337	* 'discard': Always process, but don't return the result
	338	from inside playlist/multi_video
	339	* 'discard_in_playlist': Same as "discard", but only for
	340	playlists (not multi_video)
	341	wait_for_video: If given, wait for scheduled streams to become available.
	342	The value should be a tuple containing the range
	343	(min_secs, max_secs) to wait between retries
	344	postprocessors: A list of dictionaries, each with an entry
	345	* key: The name of the postprocessor. See
	346	yt_dlp/postprocessor/__init__.py for a list.
	347	* when: When to run the postprocessor. Allowed values are
	348	the entries of utils.POSTPROCESS_WHEN
	349	Assumed to be 'post_process' if not given
	350	progress_hooks: A list of functions that get called on download
	351	progress, with a dictionary with the entries
	352	* status: One of "downloading", "error", or "finished".
	353	Check this first and ignore unknown values.
	354	* info_dict: The extracted info_dict
	355
	356	If status is one of "downloading", or "finished", the
	357	following properties may also be present:
	358	* filename: The final filename (always present)
	359	* tmpfilename: The filename we're currently writing to
	360	* downloaded_bytes: Bytes on disk
	361	* total_bytes: Size of the whole file, None if unknown
	362	* total_bytes_estimate: Guess of the eventual file size,
	363	None if unavailable.
	364	* elapsed: The number of seconds since download started.
	365	* eta: The estimated time in seconds, None if unknown
	366	* speed: The download speed in bytes/second, None if
	367	unknown
	368	* fragment_index: The counter of the currently
	369	downloaded video fragment.
	370	* fragment_count: The number of fragments (= individual
	371	files that will be merged)
	372
	373	Progress hooks are guaranteed to be called at least once
	374	(with status "finished") if the download is successful.
	375	postprocessor_hooks: A list of functions that get called on postprocessing
	376	progress, with a dictionary with the entries
	377	* status: One of "started", "processing", or "finished".
	378	Check this first and ignore unknown values.
	379	* postprocessor: Name of the postprocessor
	380	* info_dict: The extracted info_dict
	381
	382	Progress hooks are guaranteed to be called at least twice
	383	(with status "started" and "finished") if the processing is successful.
	384	merge_output_format: "/" separated list of extensions to use when merging formats.
	385	final_ext: Expected final extension; used to detect when the file was
	386	already downloaded and converted
	387	fixup: Automatically correct known faults of the file.
	388	One of:
	389	- "never": do nothing
	390	- "warn": only emit a warning
	391	- "detect_or_warn": check whether we can do anything
	392	about it, warn otherwise (default)
	393	source_address: Client-side IP address to bind to.
	394	sleep_interval_requests: Number of seconds to sleep between requests
	395	during extraction
	396	sleep_interval: Number of seconds to sleep before each download when
	397	used alone or a lower bound of a range for randomized
	398	sleep before each download (minimum possible number
	399	of seconds to sleep) when used along with
	400	max_sleep_interval.
	401	max_sleep_interval:Upper bound of a range for randomized sleep before each
	402	download (maximum possible number of seconds to sleep).
	403	Must only be used along with sleep_interval.
	404	Actual sleep time will be a random float from range
	405	[sleep_interval; max_sleep_interval].
	406	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	407	listformats: Print an overview of available video formats and exit.
	408	list_thumbnails: Print a table of all thumbnails and exit.
	409	match_filter: A function that gets called for every video with the signature
	410	(info_dict, *, incomplete: bool) -> Optional[str]
	411	For backward compatibility with youtube-dl, the signature
	412	(info_dict) -> Optional[str] is also allowed.
	413	- If it returns a message, the video is ignored.
	414	- If it returns None, the video is downloaded.
	415	- If it returns utils.NO_DEFAULT, the user is interactively
	416	asked whether to download the video.
	417	match_filter_func in utils.py is one example for this.
	418	no_color: Do not emit color codes in output.
	419	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	420	HTTP header
	421	geo_bypass_country:
	422	Two-letter ISO 3166-2 country code that will be used for
	423	explicit geographic restriction bypassing via faking
	424	X-Forwarded-For HTTP header
	425	geo_bypass_ip_block:
	426	IP range in CIDR notation that will be used similarly to
	427	geo_bypass_country
	428	external_downloader: A dictionary of protocol keys and the executable of the
	429	external downloader to use for it. The allowed protocols
	430	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	431	Set the value to 'native' to use the native downloader
	432	compat_opts: Compatibility options. See "Differences in default behavior".
	433	The following options do not work when used through the API:
	434	filename, abort-on-error, multistreams, no-live-chat, format-sort
	435	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	436	Refer __init__.py for their implementation
	437	progress_template: Dictionary of templates for progress outputs.
	438	Allowed keys are 'download', 'postprocess',
	439	'download-title' (console title) and 'postprocess-title'.
	440	The template is mapped on a dictionary with keys 'progress' and 'info'
	441	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	442	as argument and returns the time to sleep in seconds.
	443	Allowed keys are 'http', 'fragment', 'file_access'
	444	download_ranges: A callback function that gets called for every video with
	445	the signature (info_dict, ydl) -> Iterable[Section].
	446	Only the returned sections will be downloaded.
	447	Each Section is a dict with the following keys:
	448	* start_time: Start time of the section in seconds
	449	* end_time: End time of the section in seconds
	450	* title: Section title (Optional)
	451	* index: Section number (Optional)
	452	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	453	noprogress: Do not print the progress bar
	454	live_from_start: Whether to download livestreams videos from the start
	455
	456	The following parameters are not used by YoutubeDL itself, they are used by
	457	the downloader (see yt_dlp/downloader/common.py):
	458	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	459	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	460	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	461	external_downloader_args, concurrent_fragment_downloads.
	462
	463	The following options are used by the post processors:
	464	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	465	to the binary or its containing directory.
	466	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	467	and a list of additional command-line arguments for the
	468	postprocessor/executable. The dict can also have "PP+EXE" keys
	469	which are used when the given exe is used by the given PP.
	470	Use 'default' as the name for arguments to passed to all PP
	471	For compatibility with youtube-dl, a single list of args
	472	can also be used
	473
	474	The following options are used by the extractors:
	475	extractor_retries: Number of times to retry for known errors
	476	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	477	hls_split_discontinuity: Split HLS playlists to different formats at
	478	discontinuities such as ad breaks (default: False)
	479	extractor_args: A dictionary of arguments to be passed to the extractors.
	480	See "EXTRACTOR ARGUMENTS" for details.
	481	E.g. {'youtube': {'skip': ['dash', 'hls']}}
	482	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	483
	484	The following options are deprecated and may be removed in the future:
	485
	486	force_generic_extractor: Force downloader to use the generic extractor
	487	- Use allowed_extractors = ['generic', 'default']
	488	playliststart: - Use playlist_items
	489	Playlist item to start at.
	490	playlistend: - Use playlist_items
	491	Playlist item to end at.
	492	playlistreverse: - Use playlist_items
	493	Download playlist items in reverse order.
	494	forceurl: - Use forceprint
	495	Force printing final URL.
	496	forcetitle: - Use forceprint
	497	Force printing title.
	498	forceid: - Use forceprint
	499	Force printing ID.
	500	forcethumbnail: - Use forceprint

1

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

import urllib.request

24

from string import ascii_letters

25

26

from .cache import Cache

27

from .compat import compat_os_name, compat_shlex_quote

28

from .cookies import load_cookies

29

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

30

from .downloader.rtmp import rtmpdump_version

31

from .extractor import gen_extractor_classes, get_info_extractor

32

from .extractor.common import UnsupportedURLIE

33

from .extractor.openload import PhantomJSwrapper

34

from .minicurses import format_text

35

from .plugins import directories as plugin_directories

36

from .postprocessor import _PLUGIN_CLASSES as plugin_pps

37

from .postprocessor import (

38

EmbedThumbnailPP,

39

FFmpegFixupDuplicateMoovPP,

40

FFmpegFixupDurationPP,

41

FFmpegFixupM3u8PP,

42

FFmpegFixupM4aPP,

43

FFmpegFixupStretchedPP,

44

FFmpegFixupTimestampPP,

45

FFmpegMergerPP,

46

FFmpegPostProcessor,

47

FFmpegVideoConvertorPP,

48

MoveFilesAfterDownloadPP,

49

get_postprocessor,

50

)

51

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

52

from .update import REPOSITORY, current_git_head, detect_variant

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

FormatSorter,

GeoRestrictedError,

HEADRequest,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

85

SameFileError,

86

UnavailableVideoError,

87

UserNotLive,

88

YoutubeDLCookieProcessor,

89

YoutubeDLHandler,

90

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

bug_reports_message,

date_from_str,

deprecation_warning,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

escapeHTML,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

is_path_like,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

orderedSet_from_options,

parse_filesize,

preferredencoding,

prepend_extension,

register_socks_protocols,

128

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .version import RELEASE_GIT_HEAD, VARIANT, __version__

154

155

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

163

actual video file and writing it to disk if the user has requested

164

it, among some other tasks. In most cases there should be one per

165

program. As, given a video URL, the downloader doesn't know how to

166

extract all the needed information, task that InfoExtractors do, it

167

has to pass the URL to one of them.

168

169

For this, YoutubeDL objects have a method that allows

170

InfoExtractors to be registered in a given order. When it is passed

171

a URL, the YoutubeDL object handles it to the first InfoExtractor it

172

finds that reports being able to handle it. The InfoExtractor extracts

173

all the information about the video or videos the URL refers to, and

174

YoutubeDL process the extracted information, possibly using a File

175

Downloader to download the video.

176

177

YoutubeDL objects accept a lot of parameters. In order not to saturate

178

the object constructor with arguments, it receives a dictionary of

179

options instead. These options are available through the params

180

attribute for the InfoExtractors to use. The YoutubeDL also

181

registers itself as the downloader in charge for the InfoExtractors

182

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

187

password: Password for authentication purposes.

188

videopassword: Password for accessing a video.

189

ap_mso: Adobe Pass multiple-system operator identifier.

190

ap_username: Multiple-system operator account username.

191

ap_password: Multiple-system operator account password.

192

usenetrc: Use netrc for authentication instead.

193

verbose: Print additional info to stdout.

194

quiet: Do not print messages to stdout.

195

no_warnings: Do not print out anything for warnings.

196

forceprint: A dict with keys WHEN mapped to a list of templates to

197

print to stdout. The allowed keys are video or any of the

198

items in utils.POSTPROCESS_WHEN.

199

For compatibility, a single list is also accepted

200

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

201

a list of tuples with (template, filename)

202

forcejson: Force printing info_dict as JSON.

203

dump_single_json: Force printing the info_dict of the whole playlist

204

(or video) as a single JSON line.

205

force_write_download_archive: Force writing download archive regardless

206

of 'skip_download' or 'simulate'.

207

simulate: Do not download the video files. If unset (or None),

208

simulate only if listsubtitles, listformats or list_thumbnails is used

209

format: Video format code. see "FORMAT SELECTION" for more details.

210

You can also pass a function. The function takes 'ctx' as

211

argument and returns the formats to download.

212

See "build_format_selector" for an implementation

213

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

214

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

215

extracting metadata even if the video is not actually

216

available for download (experimental)

217

format_sort: A list of fields by which to sort the video formats.

218

See "Sorting Formats" for more details.

219

format_sort_force: Force the given format_sort. see "Sorting Formats"

220

for more details.

221

prefer_free_formats: Whether to prefer video formats with free containers

222

over non-free ones of same quality.

223

allow_multiple_video_streams: Allow multiple video streams to be merged

224

into a single file

225

allow_multiple_audio_streams: Allow multiple audio streams to be merged

226

into a single file

227

check_formats Whether to test if the formats are downloadable.

228

Can be True (check all), False (check none),

229

'selected' (check selected formats),

230

or None (check only if requested by extractor)

231

paths: Dictionary of output paths. The allowed keys are 'home'

232

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

233

outtmpl: Dictionary of templates for output names. Allowed keys

234

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

235

For compatibility with youtube-dl, a single string can also be used

236

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

237

restrictfilenames: Do not allow "&" and spaces in file names

238

trim_file_name: Limit length of filename (extension excluded)

239

windowsfilenames: Force the filenames to be windows compatible

240

ignoreerrors: Do not stop on download/postprocessing errors.

241

Can be 'only_download' to ignore only download errors.

242

Default is 'only_download' for CLI, but False for API

243

skip_playlist_after_errors: Number of allowed failures until the rest of

244

the playlist is skipped

245

allowed_extractors: List of regexes to match against extractor names that are allowed

246

overwrites: Overwrite all video and metadata files if True,

247

overwrite only non-video files if None

248

and don't overwrite any file if False

249

For compatibility with youtube-dl,

250

"nooverwrites" may also be used instead

251

playlist_items: Specific indices of playlist to download.

252

playlistrandom: Download playlist items in random order.

253

lazy_playlist: Process playlist entries as they are received.

254

matchtitle: Download only matching titles.

255

rejecttitle: Reject downloads for matching titles.

256

logger: Log messages to a logging.Logger instance.

257

logtostderr: Print everything to stderr instead of stdout.

258

consoletitle: Display progress in console window's titlebar.

259

writedescription: Write the video description to a .description file

260

writeinfojson: Write the video description to a .info.json file

261

clean_infojson: Remove private fields from the infojson

262

getcomments: Extract video comments. This will not be written to disk

263

unless writeinfojson is also given

264

writeannotations: Write the video annotations to a .annotations.xml file

265

writethumbnail: Write the thumbnail image to a file

266

allow_playlist_files: Whether to write playlists' description, infojson etc

267

also to disk when using the 'write*' options

268

write_all_thumbnails: Write all thumbnail formats to files

269

writelink: Write an internet shortcut file, depending on the

270

current platform (.url/.webloc/.desktop)

271

writeurllink: Write a Windows internet shortcut file (.url)

272

writewebloclink: Write a macOS internet shortcut file (.webloc)

273

writedesktoplink: Write a Linux internet shortcut file (.desktop)

274

writesubtitles: Write the video subtitles to a file

275

writeautomaticsub: Write the automatically generated subtitles to a file

276

listsubtitles: Lists all available subtitles for the video

277

subtitlesformat: The format code for subtitles

278

subtitleslangs: List of languages of the subtitles to download (can be regex).

279

The list may contain "all" to refer to all the available

280

subtitles. The language can be prefixed with a "-" to

281

exclude it from the requested languages, e.g. ['all', '-live_chat']

282

keepvideo: Keep the video file after post-processing

283

daterange: A DateRange object, download only if the upload_date is in the range.

284

skip_download: Skip the actual download of the video file

285

cachedir: Location of the cache files in the filesystem.

286

False to disable filesystem cache.

287

noplaylist: Download single video instead of a playlist if in doubt.

288

age_limit: An integer representing the user's age in years.

289

Unsuitable videos for the given age are skipped.

290

min_views: An integer representing the minimum view count the video

291

must have in order to not be skipped.

292

Videos without view count information are always

293

downloaded. None for no limit.

294

max_views: An integer representing the maximum view count.

295

Videos that are more popular than that are not

296

downloaded.

297

Videos without view count information are always

298

downloaded. None for no limit.

299

download_archive: A set, or the name of a file where all downloads are recorded.

300

Videos already present in the file are not downloaded again.

301

break_on_existing: Stop the download process after attempting to download a

302

file that is in the archive.

303

break_on_reject: Stop the download process when encountering a video that

304

has been filtered out.

305

break_per_url: Whether break_on_reject and break_on_existing

306

should act on each input URL as opposed to for the entire queue

307

cookiefile: File name or text stream from where cookies should be read and dumped to

308

cookiesfrombrowser: A tuple containing the name of the browser, the profile

309

name/path from where cookies are loaded, the name of the keyring,

310

and the container name, e.g. ('chrome', ) or

311

('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')

312

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

313

support RFC 5746 secure renegotiation

314

nocheckcertificate: Do not verify SSL certificates

315

client_certificate: Path to client certificate file in PEM format. May include the private key

316

client_certificate_key: Path to private key file for client certificate

317

client_certificate_password: Password for client certificate private key, if encrypted.

318

If not provided and the key is encrypted, yt-dlp will ask interactively

319

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

320

(Only supported by some extractors)

321

enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.

322

http_headers: A dictionary of custom headers to be used for all requests

323

proxy: URL of the proxy server to use

324

geo_verification_proxy: URL of the proxy to use for IP address verification

325

on geo-restricted sites.

326

socket_timeout: Time to wait for unresponsive hosts, in seconds

327

bidi_workaround: Work around buggy terminals without bidirectional text

328

support, using fridibi

329

debug_printtraffic:Print out sent and received HTTP traffic

330

default_search: Prepend this string if an input url is not valid.

331

'auto' for elaborate guessing

332

encoding: Use this encoding instead of the system-specified.

333

extract_flat: Whether to resolve and process url_results further

334

* False: Always process (default)

335

* True: Never process

336

* 'in_playlist': Do not process inside playlist/multi_video

337

* 'discard': Always process, but don't return the result

338

from inside playlist/multi_video

339

* 'discard_in_playlist': Same as "discard", but only for

340

playlists (not multi_video)

341

wait_for_video: If given, wait for scheduled streams to become available.

342

The value should be a tuple containing the range

343

(min_secs, max_secs) to wait between retries

344

postprocessors: A list of dictionaries, each with an entry

345

* key: The name of the postprocessor. See

346

yt_dlp/postprocessor/__init__.py for a list.

347

* when: When to run the postprocessor. Allowed values are

348

the entries of utils.POSTPROCESS_WHEN

349

Assumed to be 'post_process' if not given

350

progress_hooks: A list of functions that get called on download

351

progress, with a dictionary with the entries

352

* status: One of "downloading", "error", or "finished".

353

Check this first and ignore unknown values.

354

* info_dict: The extracted info_dict

355

356

If status is one of "downloading", or "finished", the

357

following properties may also be present:

358

* filename: The final filename (always present)

359

* tmpfilename: The filename we're currently writing to

360

* downloaded_bytes: Bytes on disk

361

* total_bytes: Size of the whole file, None if unknown

362

* total_bytes_estimate: Guess of the eventual file size,

363

None if unavailable.

364

* elapsed: The number of seconds since download started.

365

* eta: The estimated time in seconds, None if unknown

366

* speed: The download speed in bytes/second, None if

367

unknown

368

* fragment_index: The counter of the currently

369

downloaded video fragment.

370

* fragment_count: The number of fragments (= individual

371

files that will be merged)

372

373

Progress hooks are guaranteed to be called at least once

374

(with status "finished") if the download is successful.

375

postprocessor_hooks: A list of functions that get called on postprocessing

376

progress, with a dictionary with the entries

377

* status: One of "started", "processing", or "finished".

378

Check this first and ignore unknown values.

379

* postprocessor: Name of the postprocessor

380

* info_dict: The extracted info_dict

381

382

Progress hooks are guaranteed to be called at least twice

383

(with status "started" and "finished") if the processing is successful.

384

merge_output_format: "/" separated list of extensions to use when merging formats.

385

final_ext: Expected final extension; used to detect when the file was

386

already downloaded and converted

387

fixup: Automatically correct known faults of the file.

388

One of:

389

- "never": do nothing

390

- "warn": only emit a warning

391

- "detect_or_warn": check whether we can do anything

392

about it, warn otherwise (default)

393

source_address: Client-side IP address to bind to.

394

sleep_interval_requests: Number of seconds to sleep between requests

395

during extraction

396

sleep_interval: Number of seconds to sleep before each download when

397

used alone or a lower bound of a range for randomized

398

sleep before each download (minimum possible number

399

of seconds to sleep) when used along with

400

max_sleep_interval.

401

max_sleep_interval:Upper bound of a range for randomized sleep before each

402

download (maximum possible number of seconds to sleep).

403

Must only be used along with sleep_interval.

404

Actual sleep time will be a random float from range

405

[sleep_interval; max_sleep_interval].

406

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

407

listformats: Print an overview of available video formats and exit.

408

list_thumbnails: Print a table of all thumbnails and exit.

409

match_filter: A function that gets called for every video with the signature

410

(info_dict, *, incomplete: bool) -> Optional[str]

411

For backward compatibility with youtube-dl, the signature

412

(info_dict) -> Optional[str] is also allowed.

413

- If it returns a message, the video is ignored.

414

- If it returns None, the video is downloaded.

415

- If it returns utils.NO_DEFAULT, the user is interactively

416

asked whether to download the video.

417

match_filter_func in utils.py is one example for this.

418

no_color: Do not emit color codes in output.

419

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

420

HTTP header

421

geo_bypass_country:

422

Two-letter ISO 3166-2 country code that will be used for

423

explicit geographic restriction bypassing via faking

424

X-Forwarded-For HTTP header

425

geo_bypass_ip_block:

426

IP range in CIDR notation that will be used similarly to

427

geo_bypass_country

428

external_downloader: A dictionary of protocol keys and the executable of the

429

external downloader to use for it. The allowed protocols

430

431

Set the value to 'native' to use the native downloader

432

compat_opts: Compatibility options. See "Differences in default behavior".

433

The following options do not work when used through the API:

434

filename, abort-on-error, multistreams, no-live-chat, format-sort

435

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

436

Refer __init__.py for their implementation

437

progress_template: Dictionary of templates for progress outputs.

438

Allowed keys are 'download', 'postprocess',

439

'download-title' (console title) and 'postprocess-title'.

440

The template is mapped on a dictionary with keys 'progress' and 'info'

441

retry_sleep_functions: Dictionary of functions that takes the number of attempts

442

as argument and returns the time to sleep in seconds.

443

Allowed keys are 'http', 'fragment', 'file_access'

444

download_ranges: A callback function that gets called for every video with

445

the signature (info_dict, ydl) -> Iterable[Section].

446

Only the returned sections will be downloaded.

447

Each Section is a dict with the following keys:

448

* start_time: Start time of the section in seconds

449

* end_time: End time of the section in seconds

450

* title: Section title (Optional)

451

* index: Section number (Optional)

452

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

453

noprogress: Do not print the progress bar

454

live_from_start: Whether to download livestreams videos from the start

455

456

The following parameters are not used by YoutubeDL itself, they are used by

457

the downloader (see yt_dlp/downloader/common.py):

458

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

459

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

460

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

461

external_downloader_args, concurrent_fragment_downloads.

462

463

The following options are used by the post processors:

464

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

465

to the binary or its containing directory.

466

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

467

and a list of additional command-line arguments for the

468

postprocessor/executable. The dict can also have "PP+EXE" keys

469

which are used when the given exe is used by the given PP.

470

Use 'default' as the name for arguments to passed to all PP

471

For compatibility with youtube-dl, a single list of args

472

can also be used

473

474

The following options are used by the extractors:

475

extractor_retries: Number of times to retry for known errors

476

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

477

hls_split_discontinuity: Split HLS playlists to different formats at

478

discontinuities such as ad breaks (default: False)

479

extractor_args: A dictionary of arguments to be passed to the extractors.

480

See "EXTRACTOR ARGUMENTS" for details.

481

E.g. {'youtube': {'skip': ['dash', 'hls']}}

482

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

483

484

The following options are deprecated and may be removed in the future:

485

486

force_generic_extractor: Force downloader to use the generic extractor

487

- Use allowed_extractors = ['generic', 'default']

488

playliststart: - Use playlist_items

489

Playlist item to start at.

490

playlistend: - Use playlist_items

491

Playlist item to end at.

492

playlistreverse: - Use playlist_items

493

Download playlist items in reverse order.

494

forceurl: - Use forceprint

495

Force printing final URL.

496

forcetitle: - Use forceprint

497

Force printing title.

498

forceid: - Use forceprint

499

Force printing ID.

500

forcethumbnail: - Use forceprint

501

Force printing thumbnail URL.

502

forcedescription: - Use forceprint

503

Force printing description.

504

forcefilename: - Use forceprint

505

Force printing final filename.

506

forceduration: - Use forceprint

507

Force printing duration.

508

allsubtitles: - Use subtitleslangs = ['all']

509

Downloads all the subtitles of the video

510

(requires writesubtitles or writeautomaticsub)

511

include_ads: - Doesn't work

512

Download ads as well

513

call_home: - Not implemented

514

Boolean, true iff we are allowed to contact the

515

yt-dlp servers for debugging.

516

post_hooks: - Register a custom postprocessor

517

A list of functions that get called as the final step

518

for each video file, after all postprocessors have been

519

called. The filename will be passed as the only argument.

520

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

521

Use the native HLS downloader instead of ffmpeg/avconv

522

if True, otherwise use ffmpeg/avconv if False, otherwise

523

use downloader suggested by extractor if None.

524

prefer_ffmpeg: - avconv support is deprecated

525

If False, use avconv instead of ffmpeg if both are available,

526

otherwise prefer ffmpeg.

527

youtube_include_dash_manifest: - Use extractor_args

528

If True (default), DASH manifests and related

529

data will be downloaded and processed by extractor.

530

You can reduce network I/O by disabling it if you don't

531

care about DASH. (only for youtube)

532

youtube_include_hls_manifest: - Use extractor_args

533

If True (default), HLS manifests and related

534

data will be downloaded and processed by extractor.

535

You can reduce network I/O by disabling it if you don't

536

care about HLS. (only for youtube)

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

541

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

542

'timestamp', 'release_timestamp',

543

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

544

'average_rating', 'comment_count', 'age_limit',

545

'start_time', 'end_time',

546

'chapter_number', 'season_number', 'episode_number',

547

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

552

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

553

'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

554

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',

555

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

556

'preference', 'language', 'language_preference', 'quality', 'source_preference',

557

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',

558

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

559

}

560

_format_selection_exts = {

561

'audio': set(MEDIA_EXTENSIONS.common_audio),

562

'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),

563

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

564

}

565

566

def __init__(self, params=None, auto_init=True):

567

"""Create a FileDownloader object with the given options.

568

@param auto_init Whether to load the default extractors and print header (if verbose).

569

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

576

self._pps = {k: [] for k in POSTPROCESS_WHEN}

577

self._printed_messages = set()

578

self._first_webpage_request = True

579

self._post_hooks = []

580

self._progress_hooks = []

581

self._postprocessor_hooks = []

582

self._download_retcode = 0

583

self._num_downloads = 0

584

self._num_videos = 0

585

self._playlist_level = 0

586

self._playlist_urls = set()

587

self.cache = Cache(self)

588

589

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

590

self._out_files = Namespace(

591

out=stdout,

592

error=sys.stderr,

593

screen=sys.stderr if self.params.get('quiet') else stdout,

594

console=None if compat_os_name == 'nt' else next(

595

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

)

try:

windows_enable_vt_mode()

600

except Exception as e:

601

self.write_debug(f'Failed to enable VT mode: {e}')

602

603

self._allow_colors = Namespace(**{

604

type_: not self.params.get('no_color') and supports_terminal_sequences(stream)

605

for type_, stream in self._out_files.items_ if type_ != 'console'

606

})

607

608

# The code is left like this to be reused for future deprecations

609

MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)

610

current_version = sys.version_info[:2]

611

if current_version < MIN_RECOMMENDED:

612

msg = ('Support for Python version %d.%d has been deprecated. '

613

'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'

614

'\n You will no longer receive updates on this version')

615

if current_version < MIN_SUPPORTED:

616

msg = 'Python version %d.%d is no longer supported'

617

self.deprecation_warning(

618

f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))

619

620

if self.params.get('allow_unplayable_formats'):

621

self.report_warning(

622

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

623

'This is a developer option intended for debugging. \n'

624

' If you experience any issues while using this option, '

625

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

626

627

if self.params.get('bidi_workaround', False):

628

try:

629

import pty

630

master, slave = pty.openpty()

631

width = shutil.get_terminal_size().columns

632

width_args = [] if width is None else ['-w', str(width)]

633

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

634

try:

635

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

636

except OSError:

637

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

638

self._output_channel = os.fdopen(master, 'rb')

639

except OSError as ose:

640

if ose.errno == errno.ENOENT:

641

self.report_warning(

642

'Could not find fribidi executable, ignoring --bidi-workaround. '

643

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

648

if auto_init and auto_init != 'no_verbose_header':

649

self.print_debug_header()

650

651

def check_deprecated(param, option, suggestion):

652

if self.params.get(param) is not None:

653

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

658

if self.params.get('geo_verification_proxy') is None:

659

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

660

661

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

662

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

663

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

664

665

for msg in self.params.get('_warnings', []):

666

self.report_warning(msg)

667

for msg in self.params.get('_deprecation_warnings', []):

668

self.deprecated_feature(msg)

669

670

if 'list-formats' in self.params['compat_opts']:

671

self.params['listformats_table'] = False

672

673

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

674

# nooverwrites was unnecessarily changed to overwrites

675

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

676

# This ensures compatibility with both keys

677

self.params['overwrites'] = not self.params['nooverwrites']

678

elif self.params.get('overwrites') is None:

679

self.params.pop('overwrites', None)

680

else:

681

self.params['nooverwrites'] = not self.params['overwrites']

682

683

if self.params.get('simulate') is None and any((

684

self.params.get('list_thumbnails'),

685

self.params.get('listformats'),

686

self.params.get('listsubtitles'),

687

)):

688

self.params['simulate'] = 'list_only'

689

690

self.params.setdefault('forceprint', {})

691

self.params.setdefault('print_to_file', {})

692

693

# Compatibility with older syntax

694

if not isinstance(params['forceprint'], dict):

695

self.params['forceprint'] = {'video': params['forceprint']}

696

697

if auto_init:

698

self.add_default_info_extractors()

699

700

if (sys.platform != 'win32'

701

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

702

and not self.params.get('restrictfilenames', False)):

703

# Unicode filesystem API will throw errors (#1474, #13027)

704

self.report_warning(

705

'Assuming --restrict-filenames since file system encoding '

706

'cannot encode all characters. '

707

'Set the LC_ALL environment variable to fix this.')

708

self.params['restrictfilenames'] = True

709

710

self._parse_outtmpl()

711

712

# Creating format selector here allows us to catch syntax errors before the extraction

713

self.format_selector = (

714

self.params.get('format') if self.params.get('format') in (None, '-')

715

else self.params['format'] if callable(self.params['format'])

716

else self.build_format_selector(self.params['format']))

717

718

# Set http_headers defaults according to std_headers

719

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

720

721

hooks = {

722

'post_hooks': self.add_post_hook,

723

'progress_hooks': self.add_progress_hook,

724

'postprocessor_hooks': self.add_postprocessor_hook,

725

}

726

for opt, fn in hooks.items():

727

for ph in self.params.get(opt, []):

728

fn(ph)

729

730

for pp_def_raw in self.params.get('postprocessors', []):

731

pp_def = dict(pp_def_raw)

732

when = pp_def.pop('when', 'post_process')

733

self.add_post_processor(

734

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

register_socks_protocols()

739

740

def preload_download_archive(fn):

741

"""Preload the archive, if any is specified"""

archive = set()

if fn is None:

return archive

elif not is_path_like(fn):

746

return fn

747

748

self.write_debug(f'Loading archive file {fn!r}')

749

try:

750

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

751

for line in archive_file:

752

archive.add(line.strip())

753

except OSError as ioe:

754

if ioe.errno != errno.ENOENT:

raise

return archive

self.archive = preload_download_archive(self.params.get('download_archive'))

759

760

def warn_if_short_id(self, argv):

761

# short YouTube ID starting with dash?

762

idxs = [

763

i for i, a in enumerate(argv)

764

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

769

+ ['--'] + [argv[i] for i in idxs]

770

)

771

self.report_warning(

772

'Long argument string detected. '

773

'Use -- to separate parameters and URLs, like this:\n%s' %

774

args_to_str(correct_argv))

775

776

def add_info_extractor(self, ie):

777

"""Add an InfoExtractor object to the end of the list."""

778

ie_key = ie.ie_key()

779

self._ies[ie_key] = ie

780

if not isinstance(ie, type):

781

self._ies_instances[ie_key] = ie

782

ie.set_downloader(self)

783

784

def get_info_extractor(self, ie_key):

785

"""

786

Get an instance of an IE with name ie_key, it will try to get one from

787

the _ies list, if there's no instance it will create a new one and add

788

it to the extractor list.

789

"""

790

ie = self._ies_instances.get(ie_key)

791

if ie is None:

792

ie = get_info_extractor(ie_key)()

793

self.add_info_extractor(ie)

794

return ie

795

796

def add_default_info_extractors(self):

797

"""

798

Add the InfoExtractors returned by gen_extractors to the end of the list

799

"""

800

all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}

801

all_ies['end'] = UnsupportedURLIE()

802

try:

803

ie_names = orderedSet_from_options(

804

self.params.get('allowed_extractors', ['default']), {

805

'all': list(all_ies),

806

'default': [name for name, ie in all_ies.items() if ie._ENABLED],

807

}, use_regex=True)

808

except re.error as e:

809

raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')

810

for name in ie_names:

811

self.add_info_extractor(all_ies[name])

812

self.write_debug(f'Loaded {len(ie_names)} extractors')

813

814

def add_post_processor(self, pp, when='post_process'):

815

"""Add a PostProcessor object to the end of the chain."""

816

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

817

self._pps[when].append(pp)

818

pp.set_downloader(self)

819

820

def add_post_hook(self, ph):

821

"""Add the post hook"""

822

self._post_hooks.append(ph)

823

824

def add_progress_hook(self, ph):

825

"""Add the download progress hook"""

826

self._progress_hooks.append(ph)

827

828

def add_postprocessor_hook(self, ph):

829

"""Add the postprocessing progress hook"""

830

self._postprocessor_hooks.append(ph)

831

for pps in self._pps.values():

832

for pp in pps:

833

pp.add_progress_hook(ph)

834

835

def _bidi_workaround(self, message):

836

if not hasattr(self, '_output_channel'):

837

return message

838

839

assert hasattr(self, '_output_process')

840

assert isinstance(message, str)

841

line_count = message.count('\n') + 1

842

self._output_process.stdin.write((message + '\n').encode())

843

self._output_process.stdin.flush()

844

res = ''.join(self._output_channel.readline().decode()

845

for _ in range(line_count))

846

return res[:-len('\n')]

847

848

def _write_string(self, message, out=None, only_once=False):

849

if only_once:

850

if message in self._printed_messages:

851

return

852

self._printed_messages.add(message)

853

write_string(message, out=out, encoding=self.params.get('encoding'))

854

855

def to_stdout(self, message, skip_eol=False, quiet=None):

856

"""Print message to stdout"""

857

if quiet is not None:

858

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '

859

'Use "YoutubeDL.to_screen" instead')

860

if skip_eol is not False:

861

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '

862

'Use "YoutubeDL.to_screen" instead')

863

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

864

865

def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):

866

"""Print message to screen if not in quiet mode"""

867

if self.params.get('logger'):

868

self.params['logger'].debug(message)

869

return

870

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

871

return

872

self._write_string(

873

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

874

self._out_files.screen, only_once=only_once)

875

876

def to_stderr(self, message, only_once=False):

877

"""Print message to stderr"""

878

assert isinstance(message, str)

879

if self.params.get('logger'):

880

self.params['logger'].error(message)

881

else:

882

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

883

884

def _send_console_code(self, code):

885

if compat_os_name == 'nt' or not self._out_files.console:

886

return

887

self._write_string(code, self._out_files.console)

888

889

def to_console_title(self, message):

890

if not self.params.get('consoletitle', False):

891

return

892

message = remove_terminal_sequences(message)

893

if compat_os_name == 'nt':

894

if ctypes.windll.kernel32.GetConsoleWindow():

895

# c_wchar_p() might not be necessary if `message` is

896

# already of type unicode()

897

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

898

else:

899

self._send_console_code(f'\033]0;{message}\007')

900

901

def save_console_title(self):

902

if not self.params.get('consoletitle') or self.params.get('simulate'):

903

return

904

self._send_console_code('\033[22;0t') # Save the title on stack

905

906

def restore_console_title(self):

907

if not self.params.get('consoletitle') or self.params.get('simulate'):

908

return

909

self._send_console_code('\033[23;0t') # Restore the title from stack

910

911

def __enter__(self):

912

self.save_console_title()

913

return self

914

915

def __exit__(self, *args):

916

self.restore_console_title()

917

918

if self.params.get('cookiefile') is not None:

919

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

920

921

def trouble(self, message=None, tb=None, is_error=True):

922

"""Determine action to take when a download problem appears.

923

924

Depending on if the downloader has been configured to ignore

925

download errors or not, this method may throw an exception or

926

not when errors are found, after printing the message.

927

928

@param tb If given, is additional traceback information

929

@param is_error Whether to raise error according to ignorerrors

930

"""

931

if message is not None:

932

self.to_stderr(message)

933

if self.params.get('verbose'):

934

if tb is None:

935

if sys.exc_info()[0]: # if .trouble has been called from an except block

936

tb = ''

937

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

938

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

939

tb += encode_compat_str(traceback.format_exc())

940

else:

941

tb_data = traceback.format_list(traceback.extract_stack())

942

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

948

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

949

exc_info = sys.exc_info()[1].exc_info

950

else:

951

exc_info = sys.exc_info()

952

raise DownloadError(message, exc_info)

953

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

WARNING='yellow',

SUPPRESS='light black',

964

)

965

966

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

971

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

972

text = text.encode(encoding, 'ignore').decode(encoding)

973

if fallback is not None and text != original_text:

974

text = fallback

975

return format_text(text, f) if allow_colors else text if fallback is None else fallback

976

977

def _format_out(self, *args, **kwargs):

978

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

979

980

def _format_screen(self, *args, **kwargs):

981

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

982

983

def _format_err(self, *args, **kwargs):

984

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

985

986

def report_warning(self, message, only_once=False):

987

'''

988

Print the message to stderr, it will be prefixed with 'WARNING:'

989

If stderr is a tty file the 'WARNING:' will be colored

990

'''

991

if self.params.get('logger') is not None:

992

self.params['logger'].warning(message)

993

else:

994

if self.params.get('no_warnings'):

995

return

996

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

997

998

def deprecation_warning(self, message, *, stacklevel=0):

999

deprecation_warning(

1000

message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)

1001

1002

def deprecated_feature(self, message):

1003

if self.params.get('logger') is not None:

1004

self.params['logger'].warning(f'Deprecated Feature: {message}')

1005

self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)

1006

1007

def report_error(self, message, *args, **kwargs):

1008

'''

1009

Do the same as trouble, but prefixes the message with 'ERROR:', colored

1010

in red if stderr is a tty file.

1011

'''

1012

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

1013

1014

def write_debug(self, message, only_once=False):

1015

'''Log debug message or Print message to stderr'''

1016

if not self.params.get('verbose', False):

1017

return

1018

message = f'[debug] {message}'

1019

if self.params.get('logger'):

1020

self.params['logger'].debug(message)

1021

else:

1022

self.to_stderr(message, only_once)

1023

1024

def report_file_already_downloaded(self, file_name):

1025

"""Report file has already been fully downloaded."""

1026

try:

1027

self.to_screen('[download] %s has already been downloaded' % file_name)

1028

except UnicodeEncodeError:

1029

self.to_screen('[download] The file has already been downloaded')

1030

1031

def report_file_delete(self, file_name):

1032

"""Report that existing file will be deleted."""

1033

try:

1034

self.to_screen('Deleting existing file %s' % file_name)

1035

except UnicodeEncodeError:

1036

self.to_screen('Deleting existing file')

1037

1038

def raise_no_formats(self, info, forced=False, *, msg=None):

1039

has_drm = info.get('_has_drm')

1040

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1041

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1042

if forced or not ignored:

1043

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1044

expected=has_drm or ignored or expected)

1045

else:

1046

self.report_warning(msg)

1047

1048

def parse_outtmpl(self):

1049

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1050

self._parse_outtmpl()

1051

return self.params['outtmpl']

1052

1053

def _parse_outtmpl(self):

1054

sanitize = IDENTITY

1055

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1056

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1057

1058

outtmpl = self.params.setdefault('outtmpl', {})

1059

if not isinstance(outtmpl, dict):

1060

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1061

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1062

1063

def get_output_path(self, dir_type='', filename=None):

1064

paths = self.params.get('paths', {})

1065

assert isinstance(paths, dict), '"paths" parameter must be a dictionary'

1066

path = os.path.join(

1067

expand_path(paths.get('home', '').strip()),

1068

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1069

filename or '')

1070

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1071

1072

@staticmethod

1073

def _outtmpl_expandpath(outtmpl):

1074

# expand_path translates '%%' into '%' and '$$' into '$'

1075

# correspondingly that is not what we want since we need to keep

1076

# '%%' intact for template dict substitution step. Working around

1077

# with boundary-alike separator hack.

1078

sep = ''.join(random.choices(ascii_letters, k=32))

1079

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1080

1081

# outtmpl should be expand_path'ed before template dict substitution

1082

# because meta fields may contain env variables we don't want to

1083

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1084

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1085

return expand_path(outtmpl).replace(sep, '')

1086

1087

@staticmethod

1088

def escape_outtmpl(outtmpl):

1089

''' Escape any remaining strings like %s, %abc% etc. '''

1090

return re.sub(

1091

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1092

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1097

''' @return None or Exception object '''

1098

outtmpl = re.sub(

1099

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1100

lambda mobj: f'{mobj.group(0)[:-1]}s',

1101

cls._outtmpl_expandpath(outtmpl))

1102

try:

1103

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1104

return None

1105

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1110

info_dict = dict(info_dict)

1111

info_dict.pop('__postprocessors', None)

1112

info_dict.pop('__pending_error', None)

1113

return info_dict

1114

1115

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1116

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1117

@param sanitize Whether to sanitize the output as a filename.

1118

For backward compatibility, a function can also be passed

1119

"""

1120

1121

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1122

1123

info_dict = self._copy_infodict(info_dict)

1124

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1125

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1126

if info_dict.get('duration', None) is not None

1127

else None)

1128

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1129

info_dict['video_autonumber'] = self._num_videos

1130

if info_dict.get('resolution') is None:

1131

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1132

1133

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1134

# of %(field)s to %(field)0Nd for backward compatibility

1135

field_size_compat_map = {

1136

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1137

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1138

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1148

# where keys (except first) can be string, int, slice or "{field, ...}"

1149

FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}

1150

FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {

1151

'inner': FIELD_INNER_RE,

1152

'field': rf'\w*(?:\.{FIELD_INNER_RE})*'

1153

}

1154

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1155

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1156

INTERNAL_FORMAT_RE = re.compile(rf'''(?x)

1157

(?P<negate>-)?

1158

(?P<fields>{FIELD_RE})

1159

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1160

(?:>(?P<strf_format>.+?))?

1161

(?P<remaining>

1162

(?P<alternate>(?<!\\),[^|&)]+)?

1163

(?:&(?P<replacement>.*?))?

1164

(?:\|(?P<default>.*?))?

1165

)$''')

1166

1167

def _traverse_infodict(fields):

1168

fields = [f for x in re.split(r'\.({.+?})\.?', fields)

1169

for f in ([x] if x.startswith('{') else x.split('.'))]

1170

for i in (0, -1):

1171

if fields and not fields[i]:

1172

fields.pop(i)

1173

1174

for i, f in enumerate(fields):

1175

if not f.startswith('{'):

1176

continue

1177

assert f.endswith('}'), f'No closing brace for {f} in {fields}'

1178

fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}

1179

1180

return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)

1181

1182

def get_value(mdict):

1183

# Object traversal

1184

value = _traverse_infodict(mdict['fields'])

1185

# Negative

1186

if mdict['negate']:

1187

value = float_or_none(value)

1188

if value is not None:

1189

value *= -1

1190

# Do maths

1191

offset_key = mdict['maths']

1192

if offset_key:

1193

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1198

offset_key).group(0)

1199

offset_key = offset_key[len(item):]

1200

if operator is None:

1201

operator = MATH_FUNCTIONS[item]

1202

continue

1203

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1204

offset = float_or_none(item)

1205

if offset is None:

1206

offset = float_or_none(_traverse_infodict(item))

1207

try:

1208

value = operator(value, multiplier * offset)

1209

except (TypeError, ZeroDivisionError):

1210

return None

1211

operator = None

1212

# Datetime formatting

1213

if mdict['strf_format']:

1214

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1215

1216

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1217

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1222

1223

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1224

return sanitize_filename(str(value), restricted=restricted, is_id=(

1225

bool(re.search(r'(^|[_.])id(\.|$)', key))

1226

if 'filename-sanitization' in self.params['compat_opts']

1227

else NO_DEFAULT))

1228

1229

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1230

sanitize = bool(sanitize)

1231

1232

def _dumpjson_default(obj):

1233

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1238

if not outer_mobj.group('has_key'):

1239

return outer_mobj.group(0)

1240

key = outer_mobj.group('key')

1241

mobj = re.match(INTERNAL_FORMAT_RE, key)

1242

initial_field = mobj.group('fields') if mobj else ''

1243

value, replacement, default = None, None, na

1244

while mobj:

1245

mobj = mobj.groupdict()

1246

default = mobj['default'] if mobj['default'] is not None else default

1247

value = get_value(mobj)

1248

replacement = mobj['replacement']

1249

if value is None and mobj['alternate']:

1250

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1255

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1256

fmt = f'0{field_size_compat_map[key]:d}d'

1257

1258

value = default if value is None else value if replacement is None else replacement

1259

1260

flags = outer_mobj.group('conversion') or ''

1261

str_fmt = f'{fmt[:-1]}s'

1262

if fmt[-1] == 'l': # list

1263

delim = '\n' if '#' in flags else ', '

1264

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1265

elif fmt[-1] == 'j': # json

1266

value, fmt = json.dumps(

1267

value, default=_dumpjson_default,

1268

indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt

1269

elif fmt[-1] == 'h': # html

1270

value, fmt = escapeHTML(str(value)), str_fmt

1271

elif fmt[-1] == 'q': # quoted

1272

value = map(str, variadic(value) if '#' in flags else [value])

1273

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1274

elif fmt[-1] == 'B': # bytes

1275

value = f'%{str_fmt}'.encode() % str(value).encode()

1276

value, fmt = value.decode('utf-8', 'ignore'), 's'

1277

elif fmt[-1] == 'U': # unicode normalized

1278

value, fmt = unicodedata.normalize(

1279

# "+" = compatibility equivalence, "#" = NFD

1280

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1281

value), str_fmt

1282

elif fmt[-1] == 'D': # decimal suffix

1283

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1284

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1285

factor=1024 if '#' in flags else 1000)

1286

elif fmt[-1] == 'S': # filename sanitization

1287

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1288

elif fmt[-1] == 'c':

1289

if value:

1290

value = str(value)[0]

1291

else:

1292

fmt = str_fmt

1293

elif fmt[-1] not in 'rs': # numeric

1294

value = float_or_none(value)

1295

if value is None:

1296

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1301

# So we convert it to repr first

1302

value, fmt = repr(value), str_fmt

1303

if fmt[-1] in 'csr':

1304

value = sanitizer(initial_field, value)

1305

1306

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1307

TMPL_DICT[key] = value

1308

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1309

1310

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1311

1312

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1313

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1314

return self.escape_outtmpl(outtmpl) % info_dict

1315

1316

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1317

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1318

if outtmpl is None:

1319

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1320

try:

1321

outtmpl = self._outtmpl_expandpath(outtmpl)

1322

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1327

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1328

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1329

filename = replace_extension(filename, ext, final_ext)

1330

elif tmpl_type:

1331

force_ext = OUTTMPL_TYPES[tmpl_type]

1332

if force_ext:

1333

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1334

1335

# https://github.com/blackjack4494/youtube-dlc/issues/85

1336

trim_file_name = self.params.get('trim_file_name', False)

1337

if trim_file_name:

1338

no_ext, *ext = filename.rsplit('.', 2)

1339

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1340

1341

return filename

1342

except ValueError as err:

1343

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1344

return None

1345

1346

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1347

"""Generate the output filename"""

1348

if outtmpl:

1349

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1350

dir_type = None

1351

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1352

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1357

pass

1358

elif filename == '-':

1359

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1360

elif os.path.isabs(filename):

1361

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1362

if filename == '-' or not filename:

1363

return filename

1364

1365

return self.get_output_path(dir_type, filename)

1366

1367

def _match_entry(self, info_dict, incomplete=False, silent=False):

1368

"""Returns None if the file should be downloaded"""

1369

_type = info_dict.get('_type', 'video')

1370

assert incomplete or _type == 'video', 'Only video result can be considered complete'

1371

1372

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1373

1374

def check_filter():

1375

if _type in ('playlist', 'multi_video'):

1376

return

1377

elif _type in ('url', 'url_transparent') and not try_call(

1378

lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):

1379

return

1380

1381

if 'title' in info_dict:

1382

# This can happen when we're just evaluating the playlist

1383

title = info_dict['title']

1384

matchtitle = self.params.get('matchtitle', False)

1385

if matchtitle:

1386

if not re.search(matchtitle, title, re.IGNORECASE):

1387

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1388

rejecttitle = self.params.get('rejecttitle', False)

1389

if rejecttitle:

1390

if re.search(rejecttitle, title, re.IGNORECASE):

1391

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1392

1393

date = info_dict.get('upload_date')

1394

if date is not None:

1395

dateRange = self.params.get('daterange', DateRange())

1396

if date not in dateRange:

1397

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1398

view_count = info_dict.get('view_count')

1399

if view_count is not None:

1400

min_views = self.params.get('min_views')

1401

if min_views is not None and view_count < min_views:

1402

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1403

max_views = self.params.get('max_views')

1404

if max_views is not None and view_count > max_views:

1405

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1406

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1407

return 'Skipping "%s" because it is age restricted' % video_title

1408

1409

match_filter = self.params.get('match_filter')

1410

if match_filter is not None:

1411

try:

1412

ret = match_filter(info_dict, incomplete=incomplete)

1413

except TypeError:

1414

# For backward compatibility

1415

ret = None if incomplete else match_filter(info_dict)

1416

if ret is NO_DEFAULT:

1417

while True:

1418

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1419

reply = input(self._format_screen(

1420

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1421

if reply in {'y', ''}:

1422

return None

1423

elif reply == 'n':

1424

return f'Skipping {video_title}'

1425

elif ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1430

reason = '%s has already been recorded in the archive' % video_title

1431

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1432

else:

1433

reason = check_filter()

1434

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1435

if reason is not None:

1436

if not silent:

1437

self.to_screen('[download] ' + reason)

1438

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1444

'''Set the keys from extra_info in info dict if they are missing'''

1445

for key, value in extra_info.items():

1446

info_dict.setdefault(key, value)

1447

1448

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1449

process=True, force_generic_extractor=False):

1450

"""

1451

Extract and return the information dictionary of the URL

1452

1453

Arguments:

1454

@param url URL to extract

1455

1456

Keyword arguments:

1457

@param download Whether to download videos

1458

@param process Whether to resolve all unresolved references (URLs, playlist items).

1459

Must be True for download to work

1460

@param ie_key Use only the extractor with this key

1461

1462

@param extra_info Dictionary containing the extra values to add to the info (For internal use only)

1463

@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')

1464

"""

1465

1466

if extra_info is None:

1467

extra_info = {}

1468

1469

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}

else:

ies = self._ies

for key, ie in ies.items():

1478

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1483

'and will probably not work.')

1484

1485

temp_id = ie.get_temp_id(url)

1486

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):

1487

self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')

1488

if self.params.get('break_on_existing', False):

1489

raise ExistingVideoReached()

1490

break

1491

return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)

1492

else:

1493

extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

1494

self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',

1495

tb=False if extractors_restricted else None)

1496

1497

def _handle_extraction_exceptions(func):

1498

@functools.wraps(func)

1499

def wrapper(self, *args, **kwargs):

1500

while True:

1501

try:

1502

return func(self, *args, **kwargs)

1503

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1504

raise

1505

except ReExtractInfo as e:

1506

if e.expected:

1507

self.to_screen(f'{e}; Re-extracting data')

1508

else:

1509

self.to_stderr('\r')

1510

self.report_warning(f'{e}; Re-extracting data')

1511

continue

1512

except GeoRestrictedError as e:

1513

msg = e.msg

1514

if e.countries:

1515

msg += '\nThis video is available in %s.' % ', '.join(

1516

map(ISO3166Utils.short2full, e.countries))

1517

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1518

self.report_error(msg)

1519

except ExtractorError as e: # An error we somewhat expected

1520

self.report_error(str(e), e.format_traceback())

1521

except Exception as e:

1522

if self.params.get('ignoreerrors'):

1523

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1530

if (not self.params.get('wait_for_video')

1531

or ie_result.get('_type', 'video') != 'video'

1532

or ie_result.get('formats') or ie_result.get('url')):

1533

return

1534

1535

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1541

if not self.params.get('noprogress'):

1542

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1543

elif last_msg:

1544

return

1545

self.to_screen(full_msg, skip_eol=True)

1546

last_msg = msg

1547

1548

min_wait, max_wait = self.params.get('wait_for_video')

1549

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1550

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1551

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1552

self.report_warning('Release time of video is not known')

1553

elif ie_result and (diff or 0) <= 0:

1554

self.report_warning('Video should already be available according to extracted info')

1555

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1556

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1557

1558

wait_till = time.time() + diff

1559

try:

1560

while True:

1561

diff = wait_till - time.time()

1562

if diff <= 0:

1563

progress('')

1564

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1565

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1566

time.sleep(1)

1567

except KeyboardInterrupt:

1568

progress('')

1569

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1570

except BaseException as e:

1571

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@_handle_extraction_exceptions

1576

def __extract_info(self, url, ie, download, extra_info, process):

1577

try:

1578

ie_result = ie.extract(url)

1579

except UserNotLive as e:

1580

if process:

1581

if self.params.get('wait_for_video'):

1582

self.report_warning(e)

1583

self._wait_for_video()

1584

raise

1585

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1586

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1587

return

1588

if isinstance(ie_result, list):

1589

# Backwards compatibility: old IE result format

1590

ie_result = {

1591

'_type': 'compat_list',

1592

'entries': ie_result,

1593

}

1594

if extra_info.get('original_url'):

1595

ie_result.setdefault('original_url', extra_info['original_url'])

1596

self.add_default_extra_info(ie_result, ie, url)

1597

if process:

1598

self._wait_for_video(ie_result)

1599

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1604

if url is not None:

1605

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1610

if webpage_url:

1611

self.add_extra_info(ie_result, {

1612

'webpage_url_basename': url_basename(webpage_url),

1613

'webpage_url_domain': get_domain(webpage_url),

1614

})

1615

if ie is not None:

1616

self.add_extra_info(ie_result, {

1617

'extractor': ie.IE_NAME,

1618

'extractor_key': ie.ie_key(),

1619

})

1620

1621

def process_ie_result(self, ie_result, download=True, extra_info=None):

1622

"""

1623

Take the result of the ie(may be modified) and resolve all unresolved

1624

references (URLs, playlist items).

1625

1626

It will also download the videos if 'download'.

1627

Returns the resolved ie_result.

1628

"""

1629

if extra_info is None:

1630

extra_info = {}

1631

result_type = ie_result.get('_type', 'video')

1632

1633

if result_type in ('url', 'url_transparent'):

1634

ie_result['url'] = sanitize_url(

1635

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1636

if ie_result.get('original_url') and not extra_info.get('original_url'):

1637

extra_info = {'original_url': ie_result['original_url'], **extra_info}

1638

1639

extract_flat = self.params.get('extract_flat', False)

1640

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1641

or extract_flat is True):

1642

info_copy = ie_result.copy()

1643

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1644

if ie and not ie_result.get('id'):

1645

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1646

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1647

self.add_extra_info(info_copy, extra_info)

1648

info_copy, _ = self.pre_process(info_copy)

1649

self._fill_common_fields(info_copy, False)

1650

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1651

self._raise_pending_errors(info_copy)

1652

if self.params.get('force_write_download_archive', False):

1653

self.record_download_archive(info_copy)

1654

return ie_result

1655

1656

if result_type == 'video':

1657

self.add_extra_info(ie_result, extra_info)

1658

ie_result = self.process_video_result(ie_result, download=download)

1659

self._raise_pending_errors(ie_result)

1660

additional_urls = (ie_result or {}).get('additional_urls')

1661

if additional_urls:

1662

# TODO: Improve MetadataParserPP to allow setting a list

1663

if isinstance(additional_urls, str):

1664

additional_urls = [additional_urls]

1665

self.to_screen(

1666

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1667

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1668

ie_result['additional_entries'] = [

1669

self.extract_info(

1670

url, download, extra_info=extra_info,

1671

force_generic_extractor=self.params.get('force_generic_extractor'))

1672

for url in additional_urls

1673

]

1674

return ie_result

1675

elif result_type == 'url':

1676

# We have to add extra_info to the results because it may be

1677

# contained in a playlist

1678

return self.extract_info(

1679

ie_result['url'], download,

1680

ie_key=ie_result.get('ie_key'),

1681

extra_info=extra_info)

1682

elif result_type == 'url_transparent':

1683

# Use the information from the embedding page

1684

info = self.extract_info(

1685

ie_result['url'], ie_key=ie_result.get('ie_key'),

1686

extra_info=extra_info, download=False, process=False)

1687

1688

# extract_info may return None when ignoreerrors is enabled and

1689

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1695

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1696

# For video clips, the id etc of the clip extractor should be used

1697

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1698

1699

new_result = info.copy()

1700

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1701

1702

# Extracted info may not be a video result (i.e.

1703

# info.get('_type', 'video') != video) but rather an url or

1704

# url_transparent. In such cases outer metadata (from ie_result)

1705

# should be propagated to inner one (info). For this to happen

1706

# _type of info should be overridden with url_transparent. This

1707

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1708

if new_result.get('_type') == 'url':

1709

new_result['_type'] = 'url_transparent'

1710

1711

return self.process_ie_result(

1712

new_result, download=download, extra_info=extra_info)

1713

elif result_type in ('playlist', 'multi_video'):

1714

# Protect from infinite recursion due to recursively nested playlists

1715

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1716

webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url

1717

if webpage_url and webpage_url in self._playlist_urls:

1718

self.to_screen(

1719

'[download] Skipping already downloaded playlist: %s'

1720

% ie_result.get('title') or ie_result.get('id'))

1721

return

1722

1723

self._playlist_level += 1

1724

self._playlist_urls.add(webpage_url)

1725

self._fill_common_fields(ie_result, False)

1726

self._sanitize_thumbnails(ie_result)

1727

try:

1728

return self.__process_playlist(ie_result, download)

1729

finally:

1730

self._playlist_level -= 1

1731

if not self._playlist_level:

1732

self._playlist_urls.clear()

1733

elif result_type == 'compat_list':

1734

self.report_warning(

1735

'Extractor %s returned a compat_list result. '

1736

'It needs to be updated.' % ie_result.get('extractor'))

1737

1738

def _fixup(r):

1739

self.add_extra_info(r, {

1740

'extractor': ie_result['extractor'],

1741

'webpage_url': ie_result['webpage_url'],

1742

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1743

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1744

'extractor_key': ie_result['extractor_key'],

1745

})

1746

return r

1747

ie_result['entries'] = [

1748

self.process_ie_result(_fixup(r), download, extra_info)

1749

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1754

1755

def _ensure_dir_exists(self, path):

1756

return make_dir(path, self.report_error)

1757

1758

@staticmethod

1759

def _playlist_infodict(ie_result, strict=False, **kwargs):

1760

info = {

1761

'playlist_count': ie_result.get('playlist_count'),

1762

'playlist': ie_result.get('title') or ie_result.get('id'),

1763

'playlist_id': ie_result.get('id'),

1764

'playlist_title': ie_result.get('title'),

1765

'playlist_uploader': ie_result.get('uploader'),

1766

'playlist_uploader_id': ie_result.get('uploader_id'),

**kwargs,

}

if strict:

return info

if ie_result.get('webpage_url'):

1772

info.update({

1773

'webpage_url': ie_result['webpage_url'],

1774

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1775

'webpage_url_domain': get_domain(ie_result['webpage_url']),

})

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),

1781

'extractor': ie_result['extractor'],

1782

'extractor_key': ie_result['extractor_key'],

1783

}

1784

1785

def __process_playlist(self, ie_result, download):

1786

"""Process each entry in the playlist"""

1787

assert ie_result['_type'] in ('playlist', 'multi_video')

1788

1789

common_info = self._playlist_infodict(ie_result, strict=True)

1790

title = common_info.get('playlist') or '<Untitled>'

1791

if self._match_entry(common_info, incomplete=True) is not None:

1792

return

1793

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1794

1795

all_entries = PlaylistEntries(self, ie_result)

1796

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1797

1798

lazy = self.params.get('lazy_playlist')

1799

if lazy:

1800

resolved_entries, n_entries = [], 'N/A'

1801

ie_result['requested_entries'], ie_result['entries'] = None, None

1802

else:

1803

entries = resolved_entries = list(entries)

1804

n_entries = len(resolved_entries)

1805

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1806

if not ie_result.get('playlist_count'):

1807

# Better to do this after potentially exhausting entries

1808

ie_result['playlist_count'] = all_entries.get_full_count()

1809

1810

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1811

ie_copy = collections.ChainMap(ie_result, extra)

1812

1813

_infojson_written = False

1814

write_playlist_files = self.params.get('allow_playlist_files', True)

1815

if write_playlist_files and self.params.get('list_thumbnails'):

1816

self.list_thumbnails(ie_result)

1817

if write_playlist_files and not self.params.get('simulate'):

1818

_infojson_written = self._write_info_json(

1819

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1820

if _infojson_written is None:

1821

return

1822

if self._write_description('playlist', ie_result,

1823

self.prepare_filename(ie_copy, 'pl_description')) is None:

1824

return

1825

# TODO: This should be passed to ThumbnailsConvertor if necessary

1826

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1827

1828

if lazy:

1829

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

1830

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

1831

elif self.params.get('playlistreverse'):

1832

entries.reverse()

1833

elif self.params.get('playlistrandom'):

1834

random.shuffle(entries)

1835

1836

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'

1837

f'{format_field(ie_result, "playlist_count", " of %s")}')

1838

1839

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

1840

if self.params.get('extract_flat') == 'discard_in_playlist':

1841

keep_resolved_entries = ie_result['_type'] != 'playlist'

1842

if keep_resolved_entries:

1843

self.write_debug('The information of all playlist entries will be held in memory')

1844

1845

failures = 0

1846

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1847

for i, (playlist_index, entry) in enumerate(entries):

1848

if lazy:

1849

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

1854

if not lazy and 'playlist-index' in self.params.get('compat_opts', []):

1855

playlist_index = ie_result['requested_entries'][i]

1856

1857

entry_copy = collections.ChainMap(entry, {

1858

**common_info,

1859

'n_entries': int_or_none(n_entries),

1860

'playlist_index': playlist_index,

1861

'playlist_autonumber': i + 1,

1862

})

1863

1864

if self._match_entry(entry_copy, incomplete=True) is not None:

1865

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

1866

resolved_entries[i] = (playlist_index, NO_DEFAULT)

1867

continue

1868

1869

self.to_screen('[download] Downloading item %s of %s' % (

1870

self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1871

1872

entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({

1873

'playlist_index': playlist_index,

1874

'playlist_autonumber': i + 1,

}, extra))

if not entry_result:

failures += 1

if failures >= max_failures:

1879

self.report_error(

1880

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

1881

break

1882

if keep_resolved_entries:

1883

resolved_entries[i] = (playlist_index, entry_result)

1884

1885

# Update with processed data

1886

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

1887

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

1888

if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):

1889

# Do not set for full playlist

1890

ie_result.pop('requested_entries')

1891

1892

# Write the updated info to json

1893

if _infojson_written is True and self._write_info_json(

1894

'updated playlist', ie_result,

1895

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1896

return

1897

1898

ie_result = self.run_all_pps('playlist', ie_result)

1899

self.to_screen(f'[download] Finished downloading playlist: {title}')

1900

return ie_result

1901

1902

@_handle_extraction_exceptions

1903

def __process_iterable_entry(self, entry, download, extra_info):

1904

return self.process_ie_result(

1905

entry, download=download, extra_info=extra_info)

1906

1907

def _build_format_filter(self, filter_spec):

1908

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1919

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1920

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1921

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1922

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1923

m = operator_rex.fullmatch(filter_spec)

1924

if m:

1925

try:

1926

comparison_value = int(m.group('value'))

1927

except ValueError:

1928

comparison_value = parse_filesize(m.group('value'))

1929

if comparison_value is None:

1930

comparison_value = parse_filesize(m.group('value') + 'B')

1931

if comparison_value is None:

1932

raise ValueError(

1933

'Invalid value %r in format specification %r' % (

1934

m.group('value'), filter_spec))

1935

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1941

'$=': lambda attr, value: attr.endswith(value),

1942

'*=': lambda attr, value: value in attr,

1943

'~=': lambda attr, value: value.search(attr) is not None

1944

}

1945

str_operator_rex = re.compile(r'''(?x)\s*

1946

(?P<key>[a-zA-Z0-9._-]+)\s*

1947

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1948

(?P<quote>["'])?

1949

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1950

(?(quote)(?P=quote))\s*

1951

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1952

m = str_operator_rex.fullmatch(filter_spec)

1953

if m:

1954

if m.group('op') == '~=':

1955

comparison_value = re.compile(m.group('value'))

1956

else:

1957

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1958

str_op = STR_OPERATORS[m.group('op')]

1959

if m.group('negation'):

1960

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1966

1967

def _filter(f):

1968

actual_value = f.get(m.group('key'))

1969

if actual_value is None:

1970

return m.group('none_inclusive')

1971

return op(actual_value, comparison_value)

1972

return _filter

1973

1974

def _check_formats(self, formats):

1975

for f in formats:

1976

self.to_screen('[info] Testing format %s' % f['format_id'])

1977

path = self.get_output_path('temp')

1978

if not self._ensure_dir_exists(f'{path}/'):

1979

continue

1980

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1981

temp_file.close()

1982

try:

1983

success, _ = self.dl(temp_file.name, f, test=True)

1984

except (DownloadError, OSError, ValueError) + network_exceptions:

1985

success = False

1986

finally:

1987

if os.path.exists(temp_file.name):

1988

try:

1989

os.remove(temp_file.name)

1990

except OSError:

1991

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1996

1997

def _default_format_spec(self, info_dict, download=True):

1998

1999

def can_merge():

2000

merger = FFmpegMergerPP(self)

2001

return merger.available and merger.can_merge()

2002

2003

prefer_best = (

2004

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

2009

or self.params['outtmpl']['default'] == '-'))

2010

compat = (

2011

prefer_best

2012

or self.params.get('allow_multiple_audio_streams', False)

2013

or 'format-spec' in self.params['compat_opts'])

2014

2015

return (

2016

'best/bestvideo+bestaudio' if prefer_best

2017

else 'bestvideo*+bestaudio/best' if not compat

2018

else 'bestvideo+bestaudio/best')

2019

2020

def build_format_selector(self, format_spec):

2021

def syntax_error(note, start):

2022

message = (

2023

'Invalid format specification: '

2024

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

2025

return SyntaxError(message)

2026

2027

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

2032

2033

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

2034

'video': self.params.get('allow_multiple_video_streams', False)}

2035

2036

check_formats = self.params.get('check_formats') == 'selected'

2037

2038

def _parse_filter(tokens):

2039

filter_parts = []

2040

for type, string, start, _, _ in tokens:

2041

if type == tokenize.OP and string == ']':

2042

return ''.join(filter_parts)

2043

else:

2044

filter_parts.append(string)

2045

2046

def _remove_unused_ops(tokens):

2047

# Remove operators that we don't use and join them with the surrounding strings.

2048

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

2049

ALLOWED_OPS = ('/', '+', ',', '(', ')')

2050

last_string, last_start, last_end, last_line = None, None, None, None

2051

for type, string, start, end, line in tokens:

2052

if type == tokenize.OP and string == '[':

2053

if last_string:

2054

yield tokenize.NAME, last_string, last_start, last_end, last_line

2055

last_string = None

2056

yield type, string, start, end, line

2057

# everything inside brackets will be handled by _parse_filter

2058

for type, string, start, end, line in tokens:

2059

yield type, string, start, end, line

2060

if type == tokenize.OP and string == ']':

2061

break

2062

elif type == tokenize.OP and string in ALLOWED_OPS:

2063

if last_string:

2064

yield tokenize.NAME, last_string, last_start, last_end, last_line

2065

last_string = None

2066

yield type, string, start, end, line

2067

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

2074

if last_string:

2075

yield tokenize.NAME, last_string, last_start, last_end, last_line

2076

2077

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2078

selectors = []

2079

current_selector = None

2080

for type, string, start, _, _ in tokens:

2081

# ENCODING is only defined in python 3.x

2082

if type == getattr(tokenize, 'ENCODING', None):

2083

continue

2084

elif type in [tokenize.NAME, tokenize.NUMBER]:

2085

current_selector = FormatSelector(SINGLE, string, [])

2086

elif type == tokenize.OP:

2087

if string == ')':

2088

if not inside_group:

2089

# ')' will be handled by the parentheses group

2090

tokens.restore_last_token()

2091

break

2092

elif inside_merge and string in ['/', ',']:

2093

tokens.restore_last_token()

2094

break

2095

elif inside_choice and string == ',':

2096

tokens.restore_last_token()

2097

break

2098

elif string == ',':

2099

if not current_selector:

2100

raise syntax_error('"," must follow a format selector', start)

2101

selectors.append(current_selector)

2102

current_selector = None

2103

elif string == '/':

2104

if not current_selector:

2105

raise syntax_error('"/" must follow a format selector', start)

2106

first_choice = current_selector

2107

second_choice = _parse_format_selection(tokens, inside_choice=True)

2108

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2109

elif string == '[':

2110

if not current_selector:

2111

current_selector = FormatSelector(SINGLE, 'best', [])

2112

format_filter = _parse_filter(tokens)

2113

current_selector.filters.append(format_filter)

2114

elif string == '(':

2115

if current_selector:

2116

raise syntax_error('Unexpected "("', start)

2117

group = _parse_format_selection(tokens, inside_group=True)

2118

current_selector = FormatSelector(GROUP, group, [])

2119

elif string == '+':

2120

if not current_selector:

2121

raise syntax_error('Unexpected "+"', start)

2122

selector_1 = current_selector

2123

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2124

if not selector_2:

2125

raise syntax_error('Expected a selector', start)

2126

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2127

else:

2128

raise syntax_error(f'Operator not recognized: "{string}"', start)

2129

elif type == tokenize.ENDMARKER:

2130

break

2131

if current_selector:

2132

selectors.append(current_selector)

2133

return selectors

2134

2135

def _merge(formats_pair):

2136

format_1, format_2 = formats_pair

2137

2138

formats_info = []

2139

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2140

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2141

2142

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2143

get_no_more = {'video': False, 'audio': False}

2144

for (i, fmt_info) in enumerate(formats_info):

2145

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2146

formats_info.pop(i)

2147

continue

2148

for aud_vid in ['audio', 'video']:

2149

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2150

if get_no_more[aud_vid]:

2151

formats_info.pop(i)

2152

break

2153

get_no_more[aud_vid] = True

2154

2155

if len(formats_info) == 1:

2156

return formats_info[0]

2157

2158

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2159

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2160

2161

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2162

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2163

2164

output_ext = get_compatible_ext(

2165

vcodecs=[f.get('vcodec') for f in video_fmts],

2166

acodecs=[f.get('acodec') for f in audio_fmts],

2167

vexts=[f['ext'] for f in video_fmts],

2168

aexts=[f['ext'] for f in audio_fmts],

2169

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2170

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2171

2172

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2173

2174

new_dict = {

2175

'requested_formats': formats_info,

2176

'format': '+'.join(filtered('format')),

2177

'format_id': '+'.join(filtered('format_id')),

2178

'ext': output_ext,

2179

'protocol': '+'.join(map(determine_protocol, formats_info)),

2180

'language': '+'.join(orderedSet(filtered('language'))) or None,

2181

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2182

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2183

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2189

'height': the_only_video.get('height'),

2190

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2191

'fps': the_only_video.get('fps'),

2192

'dynamic_range': the_only_video.get('dynamic_range'),

2193

'vcodec': the_only_video.get('vcodec'),

2194

'vbr': the_only_video.get('vbr'),

2195

'stretched_ratio': the_only_video.get('stretched_ratio'),

2196

'aspect_ratio': the_only_video.get('aspect_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2202

'abr': the_only_audio.get('abr'),

2203

'asr': the_only_audio.get('asr'),

2204

'audio_channels': the_only_audio.get('audio_channels')

})

return new_dict

def _check_formats(formats):

2210

if not check_formats:

2211

yield from formats

2212

return

2213

yield from self._check_formats(formats)

2214

2215

def _build_selector_function(selector):

2216

if isinstance(selector, list): # ,

2217

fs = [_build_selector_function(s) for s in selector]

2218

2219

def selector_function(ctx):

2220

for f in fs:

2221

yield from f(ctx)

2222

return selector_function

2223

2224

elif selector.type == GROUP: # ()

2225

selector_function = _build_selector_function(selector.selector)

2226

2227

elif selector.type == PICKFIRST: # /

2228

fs = [_build_selector_function(s) for s in selector.selector]

2229

2230

def selector_function(ctx):

2231

for f in fs:

2232

picked_formats = list(f(ctx))

2233

if picked_formats:

2234

return picked_formats

2235

return []

2236

2237

elif selector.type == MERGE: # +

2238

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2239

2240

def selector_function(ctx):

2241

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2242

yield _merge(pair)

2243

2244

elif selector.type == SINGLE: # atom

2245

format_spec = selector.selector or 'best'

2246

2247

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2248

if format_spec == 'all':

2249

def selector_function(ctx):

2250

yield from _check_formats(ctx['formats'][::-1])

2251

elif format_spec == 'mergeall':

2252

def selector_function(ctx):

2253

formats = list(_check_formats(

2254

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2255

if not formats:

2256

return

2257

merged_format = formats[-1]

2258

for f in formats[-2::-1]:

2259

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2269

format_reverse = mobj.group('bw')[0] == 'b'

2270

format_type = (mobj.group('type') or [None])[0]

2271

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2272

format_modified = mobj.group('mod') is not None

2273

2274

format_fallback = not format_type and not format_modified # for b, w

2275

_filter_f = (

2276

(lambda f: f.get('%scodec' % format_type) != 'none')

2277

if format_type and format_modified # bv*, ba*, wv*, wa*

2278

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2279

if format_type # bv, ba, wv, wa

2280

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2281

if not format_modified # b, w

2282

else lambda f: True) # b*, w*

2283

filter_f = lambda f: _filter_f(f) and (

2284

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2285

else:

2286

if format_spec in self._format_selection_exts['audio']:

2287

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2288

elif format_spec in self._format_selection_exts['video']:

2289

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2290

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2291

elif format_spec in self._format_selection_exts['storyboards']:

2292

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2293

else:

2294

filter_f = lambda f: f.get('format_id') == format_spec # id

2295

2296

def selector_function(ctx):

2297

formats = list(ctx['formats'])

2298

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2299

if not matches:

2300

if format_fallback and ctx['incomplete_formats']:

2301

# for extractors with incomplete formats (audio only (soundcloud)

2302

# or video only (imgur)) best/worst will fallback to

2303

# best/worst {video,audio}-only format

2304

matches = formats

2305

elif seperate_fallback and not ctx['has_merged_format']:

2306

# for compatibility with youtube-dl when there is no pre-merged format

2307

matches = list(filter(seperate_fallback, formats))

2308

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2309

try:

2310

yield matches[format_idx - 1]

2311

except LazyList.IndexError:

2312

return

2313

2314

filters = [self._build_format_filter(f) for f in selector.filters]

2315

2316

def final_selector(ctx):

2317

ctx_copy = dict(ctx)

2318

for _filter in filters:

2319

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2320

return selector_function(ctx_copy)

2321

return final_selector

2322

2323

stream = io.BytesIO(format_spec.encode())

2324

try:

2325

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2326

except tokenize.TokenError:

2327

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2328

2329

class TokenIterator:

2330

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2339

raise StopIteration()

2340

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2347

self.counter -= 1

2348

2349

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2350

return _build_selector_function(parsed_selector)

2351

2352

def _calc_headers(self, info_dict):

2353

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2354

2355

cookies = self._calc_cookies(info_dict['url'])

2356

if cookies:

2357

res['Cookie'] = cookies

2358

2359

if 'X-Forwarded-For' not in res:

2360

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2361

if x_forwarded_for_ip:

2362

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2367

pr = sanitized_Request(url)

2368

self.cookiejar.add_cookie_header(pr)

2369

return pr.get_header('Cookie')

2370

2371

def _sort_thumbnails(self, thumbnails):

2372

thumbnails.sort(key=lambda t: (

2373

t.get('preference') if t.get('preference') is not None else -1,

2374

t.get('width') if t.get('width') is not None else -1,

2375

t.get('height') if t.get('height') is not None else -1,

2376

t.get('id') if t.get('id') is not None else '',

2377

t.get('url')))

2378

2379

def _sanitize_thumbnails(self, info_dict):

2380

thumbnails = info_dict.get('thumbnails')

2381

if thumbnails is None:

2382

thumbnail = info_dict.get('thumbnail')

2383

if thumbnail:

2384

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2389

for t in thumbnails:

2390

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2391

try:

2392

self.urlopen(HEADRequest(t['url']))

2393

except network_exceptions as err:

2394

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2399

for i, t in enumerate(thumbnails):

2400

if t.get('id') is None:

2401

t['id'] = '%d' % i

2402

if t.get('width') and t.get('height'):

2403

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2404

t['url'] = sanitize_url(t['url'])

2405

2406

if self.params.get('check_formats') is True:

2407

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2408

else:

2409

info_dict['thumbnails'] = thumbnails

2410

2411

def _fill_common_fields(self, info_dict, final=True):

2412

# TODO: move sanitization here

2413

if final:

2414

title = info_dict['fulltitle'] = info_dict.get('title')

2415

if not title:

2416

if title == '':

2417

self.write_debug('Extractor gave empty title. Creating a generic title')

2418

else:

2419

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2420

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2421

2422

if info_dict.get('duration') is not None:

2423

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2424

2425

for ts_key, date_key in (

2426

('timestamp', 'upload_date'),

2427

('release_timestamp', 'release_date'),

2428

('modified_timestamp', 'modified_date'),

2429

):

2430

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2431

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2432

# see http://bugs.python.org/issue1646728)

2433

with contextlib.suppress(ValueError, OverflowError, OSError):

2434

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2435

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2436

2437

live_keys = ('is_live', 'was_live')

2438

live_status = info_dict.get('live_status')

2439

if live_status is None:

2440

for key in live_keys:

2441

if info_dict.get(key) is False:

2442

continue

2443

if info_dict.get(key):

2444

live_status = key

2445

break

2446

if all(info_dict.get(key) is False for key in live_keys):

2447

live_status = 'not_live'

2448

if live_status:

2449

info_dict['live_status'] = live_status

2450

for key in live_keys:

2451

if info_dict.get(key) is None:

2452

info_dict[key] = (live_status == key)

2453

if live_status == 'post_live':

2454

info_dict['was_live'] = True

2455

2456

# Auto generate title fields corresponding to the *_number fields when missing

2457

# in order to always have clean titles. This is very common for TV series.

2458

for field in ('chapter', 'season', 'episode'):

2459

if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2460

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2461

2462

def _raise_pending_errors(self, info):

2463

err = info.pop('__pending_error', None)

2464

if err:

2465

self.report_error(err, tb=False)

2466

2467

def sort_formats(self, info_dict):

2468

formats = self._get_formats(info_dict)

2469

formats.sort(key=FormatSorter(

2470

self, info_dict.get('_format_sort_fields') or []).calculate_preference)

2471

2472

def process_video_result(self, info_dict, download=True):

2473

assert info_dict.get('_type', 'video') == 'video'

2474

self._num_videos += 1

2475

2476

if 'id' not in info_dict:

2477

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2478

elif not info_dict.get('id'):

2479

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2480

2481

def report_force_conversion(field, field_not, conversion):

2482

self.report_warning(

2483

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2484

% (field, field_not, conversion))

2485

2486

def sanitize_string_field(info, string_field):

2487

field = info.get(string_field)

2488

if field is None or isinstance(field, str):

2489

return

2490

report_force_conversion(string_field, 'a string', 'string')

2491

info[string_field] = str(field)

2492

2493

def sanitize_numeric_fields(info):

2494

for numeric_field in self._NUMERIC_FIELDS:

2495

field = info.get(numeric_field)

2496

if field is None or isinstance(field, (int, float)):

2497

continue

2498

report_force_conversion(numeric_field, 'numeric', 'int')

2499

info[numeric_field] = int_or_none(field)

2500

2501

sanitize_string_field(info_dict, 'id')

2502

sanitize_numeric_fields(info_dict)

2503

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2504

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2505

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2506

self.report_warning('"duration" field is negative, there is an error in extractor')

2507

2508

chapters = info_dict.get('chapters') or []

2509

if chapters and chapters[0].get('start_time'):

2510

chapters.insert(0, {'start_time': 0})

2511

2512

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2513

for idx, (prev, current, next_) in enumerate(zip(

2514

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2515

if current.get('start_time') is None:

2516

current['start_time'] = prev.get('end_time')

2517

if not current.get('end_time'):

2518

current['end_time'] = next_.get('start_time')

2519

if not current.get('title'):

2520

current['title'] = f'<Untitled Chapter {idx}>'

2521

2522

if 'playlist' not in info_dict:

2523

# It isn't part of a playlist

2524

info_dict['playlist'] = None

2525

info_dict['playlist_index'] = None

2526

2527

self._sanitize_thumbnails(info_dict)

2528

2529

thumbnail = info_dict.get('thumbnail')

2530

thumbnails = info_dict.get('thumbnails')

2531

if thumbnail:

2532

info_dict['thumbnail'] = sanitize_url(thumbnail)

2533

elif thumbnails:

2534

info_dict['thumbnail'] = thumbnails[-1]['url']

2535

2536

if info_dict.get('display_id') is None and 'id' in info_dict:

2537

info_dict['display_id'] = info_dict['id']

2538

2539

self._fill_common_fields(info_dict)

2540

2541

for cc_kind in ('subtitles', 'automatic_captions'):

2542

cc = info_dict.get(cc_kind)

2543

if cc:

2544

for _, subtitle in cc.items():

2545

for subtitle_format in subtitle:

2546

if subtitle_format.get('url'):

2547

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2548

if subtitle_format.get('ext') is None:

2549

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2550

2551

automatic_captions = info_dict.get('automatic_captions')

2552

subtitles = info_dict.get('subtitles')

2553

2554

info_dict['requested_subtitles'] = self.process_subtitles(

2555

info_dict['id'], subtitles, automatic_captions)

2556

2557

formats = self._get_formats(info_dict)

2558

2559

# Backward compatibility with InfoExtractor._sort_formats

2560

field_preference = (formats or [{}])[0].pop('__sort_fields', None)

2561

if field_preference:

2562

info_dict['_format_sort_fields'] = field_preference

2563

2564

# or None ensures --clean-infojson removes it

2565

info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None

2566

if not self.params.get('allow_unplayable_formats'):

2567

formats = [f for f in formats if not f.get('has_drm')]

2568

2569

if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2570

self.report_warning(

2571

f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'

2572

'only images are available for download. Use --list-formats to see them'.capitalize())

2573

2574

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2575

if not get_from_start:

2576

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2577

if info_dict.get('is_live') and formats:

2578

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2579

if get_from_start and not formats:

2580

self.raise_no_formats(info_dict, msg=(

2581

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2582

'If you want to download from the current time, use --no-live-from-start'))

2583

2584

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2589

'there is an error in extractor')

2590

return False

2591

if isinstance(url, bytes):

2592

sanitize_string_field(f, 'url')

2593

return True

2594

2595

# Filter out malformed formats for better extraction robustness

2596

formats = list(filter(is_wellformed, formats or []))

2597

2598

if not formats:

2599

self.raise_no_formats(info_dict)

2600

2601

for format in formats:

2602

sanitize_string_field(format, 'format_id')

2603

sanitize_numeric_fields(format)

2604

format['url'] = sanitize_url(format['url'])

2605

if format.get('ext') is None:

2606

format['ext'] = determine_ext(format['url']).lower()

2607

if format.get('protocol') is None:

2608

format['protocol'] = determine_protocol(format)

2609

if format.get('resolution') is None:

2610

format['resolution'] = self.format_resolution(format, default=None)

2611

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2612

format['dynamic_range'] = 'SDR'

2613

if format.get('aspect_ratio') is None:

2614

format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))

2615

if (info_dict.get('duration') and format.get('tbr')

2616

and not format.get('filesize') and not format.get('filesize_approx')):

2617

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2618

format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))

2619

2620

# This is copied to http_headers by the above _calc_headers and can now be removed

2621

if '__x_forwarded_for_ip' in info_dict:

2622

del info_dict['__x_forwarded_for_ip']

self.sort_formats({

'formats': formats,

'_format_sort_fields': info_dict.get('_format_sort_fields')

2627

})

2628

2629

# Sanitize and group by format_id

2630

formats_dict = {}

2631

for i, format in enumerate(formats):

2632

if not format.get('format_id'):

2633

format['format_id'] = str(i)

2634

else:

2635

# Sanitize format_id from characters used in format selector expression

2636

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2637

formats_dict.setdefault(format['format_id'], []).append(format)

2638

2639

# Make sure all formats have unique format_id

2640

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2641

for format_id, ambiguous_formats in formats_dict.items():

2642

ambigious_id = len(ambiguous_formats) > 1

2643

for i, format in enumerate(ambiguous_formats):

2644

if ambigious_id:

2645

format['format_id'] = '%s-%d' % (format_id, i)

2646

# Ensure there is no conflict between id and ext in format selection

2647

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2648

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2649

format['format_id'] = 'f%s' % format['format_id']

2650

2651

if format.get('format') is None:

2652

format['format'] = '{id} - {res}{note}'.format(

2653

id=format['format_id'],

2654

res=self.format_resolution(format),

2655

note=format_field(format, 'format_note', ' (%s)'),

2656

)

2657

2658

if self.params.get('check_formats') is True:

2659

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2660

2661

if not formats or formats[0] is not info_dict:

2662

# only set the 'formats' fields if the original info_dict list them

2663

# otherwise we end up with a circular reference, the first (and unique)

2664

# element in the 'formats' field in info_dict is info_dict itself,

2665

# which can't be exported to json

2666

info_dict['formats'] = formats

2667

2668

info_dict, _ = self.pre_process(info_dict)

2669

2670

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2671

return info_dict

2672

2673

self.post_extract(info_dict)

2674

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2675

2676

# The pre-processors may have modified the formats

2677

formats = self._get_formats(info_dict)

2678

2679

list_only = self.params.get('simulate') == 'list_only'

2680

interactive_format_selection = not list_only and self.format_selector == '-'

2681

if self.params.get('list_thumbnails'):

2682

self.list_thumbnails(info_dict)

2683

if self.params.get('listsubtitles'):

2684

if 'automatic_captions' in info_dict:

2685

self.list_subtitles(

2686

info_dict['id'], automatic_captions, 'automatic captions')

2687

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2688

if self.params.get('listformats') or interactive_format_selection:

2689

self.list_formats(info_dict)

2690

if list_only:

2691

# Without this printing, -F --print-json will not work

2692

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2693

return info_dict

2694

2695

format_selector = self.format_selector

2696

if format_selector is None:

2697

req_format = self._default_format_spec(info_dict, download=download)

2698

self.write_debug('Default format spec: %s' % req_format)

2699

format_selector = self.build_format_selector(req_format)

2700

2701

while True:

2702

if interactive_format_selection:

2703

req_format = input(

2704

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2705

try:

2706

format_selector = self.build_format_selector(req_format)

2707

except SyntaxError as err:

2708

self.report_error(err, tb=False, is_error=False)

2709

continue

2710

2711

formats_to_download = list(format_selector({

2712

'formats': formats,

2713

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2714

'incomplete_formats': (

2715

# All formats are video-only or

2716

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2717

# all formats are audio-only

2718

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),

2719

}))

2720

if interactive_format_selection and not formats_to_download:

2721

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2726

if not self.params.get('ignore_no_formats_error'):

2727

raise ExtractorError(

2728

'Requested format is not available. Use --list-formats for a list of available formats',

2729

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2730

self.report_warning('Requested format is not available')

2731

# Process what we can, even without any available formats.

2732

formats_to_download = [{}]

2733

2734

requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))

2735

best_format, downloaded_formats = formats_to_download[-1], []

2736

if download:

2737

if best_format and requested_ranges:

2738

def to_screen(*msg):

2739

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2740

2741

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2742

(f['format_id'] for f in formats_to_download))

2743

if requested_ranges != ({}, ):

2744

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2745

(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))

2746

max_downloads_reached = False

2747

2748

for fmt, chapter in itertools.product(formats_to_download, requested_ranges):

2749

new_info = self._copy_infodict(info_dict)

2750

new_info.update(fmt)

2751

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2752

end_time = offset + min(chapter.get('end_time', duration), duration)

2753

if chapter or offset:

2754

new_info.update({

2755

'section_start': offset + chapter.get('start_time', 0),

2756

# duration may not be accurate. So allow deviations <1sec

2757

'section_end': end_time if end_time <= offset + duration + 1 else None,

2758

'section_title': chapter.get('title'),

2759

'section_number': chapter.get('index'),

2760

})

2761

downloaded_formats.append(new_info)

2762

try:

2763

self.process_info(new_info)

2764

except MaxDownloadsReached:

2765

max_downloads_reached = True

2766

self._raise_pending_errors(new_info)

2767

# Remove copied info

2768

for key, val in tuple(new_info.items()):

2769

if info_dict.get(key) == val:

2770

new_info.pop(key)

2771

if max_downloads_reached:

2772

break

2773

2774

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2775

assert write_archive.issubset({True, False, 'ignore'})

2776

if True in write_archive and False not in write_archive:

2777

self.record_download_archive(info_dict)

2778

2779

info_dict['requested_downloads'] = downloaded_formats

2780

info_dict = self.run_all_pps('after_video', info_dict)

2781

if max_downloads_reached:

2782

raise MaxDownloadsReached()

2783

2784

# We update the info dict with the selected best quality format (backwards compatibility)

2785

info_dict.update(best_format)

2786

return info_dict

2787

2788

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2789

"""Select the requested subtitles and their format"""

2790

available_subs, normal_sub_langs = {}, []

2791

if normal_subtitles and self.params.get('writesubtitles'):

2792

available_subs.update(normal_subtitles)

2793

normal_sub_langs = tuple(normal_subtitles.keys())

2794

if automatic_captions and self.params.get('writeautomaticsub'):

2795

for lang, cap_info in automatic_captions.items():

2796

if lang not in available_subs:

2797

available_subs[lang] = cap_info

2798

2799

if not available_subs or (

2800

not self.params.get('writesubtitles')

2801

and not self.params.get('writeautomaticsub')):

2802

return None

2803

2804

all_sub_langs = tuple(available_subs.keys())

2805

if self.params.get('allsubtitles', False):

2806

requested_langs = all_sub_langs

2807

elif self.params.get('subtitleslangs', False):

2808

try:

2809

requested_langs = orderedSet_from_options(

2810

self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)

2811

except re.error as e:

2812

raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')

2813

elif normal_sub_langs:

2814

requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]

2815

else:

2816

requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]

2817

if requested_langs:

2818

self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')

2819

2820

formats_query = self.params.get('subtitlesformat', 'best')

2821

formats_preference = formats_query.split('/') if formats_query else []

2822

subs = {}

2823

for lang in requested_langs:

2824

formats = available_subs.get(lang)

2825

if formats is None:

2826

self.report_warning(f'{lang} subtitles not available for {video_id}')

2827

continue

2828

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2840

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2845

if info_dict is None:

2846

return

2847

info_copy = info_dict.copy()

2848

info_copy['formats_table'] = self.render_formats_table(info_dict)

2849

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2850

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2851

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2852

2853

def format_tmpl(tmpl):

2854

mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)

if not mobj:

return tmpl

fmt = '%({})s'

if tmpl.startswith('{'):

2860

tmpl = f'.{tmpl}'

2861

if tmpl.endswith('='):

2862

tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'

2863

return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))

2864

2865

for tmpl in self.params['forceprint'].get(key, []):

2866

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2867

2868

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2869

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2870

tmpl = format_tmpl(tmpl)

2871

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2872

if self._ensure_dir_exists(filename):

2873

with open(filename, 'a', encoding='utf-8') as f:

2874

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2875

2876

def __forced_printings(self, info_dict, filename, incomplete):

2877

def print_mandatory(field, actual_field=None):

2878

if actual_field is None:

2879

actual_field = field

2880

if (self.params.get('force%s' % field, False)

2881

and (not incomplete or info_dict.get(actual_field) is not None)):

2882

self.to_stdout(info_dict[actual_field])

2883

2884

def print_optional(field):

2885

if (self.params.get('force%s' % field, False)

2886

and info_dict.get(field) is not None):

2887

self.to_stdout(info_dict[field])

2888

2889

info_dict = info_dict.copy()

2890

if filename is not None:

2891

info_dict['filename'] = filename

2892

if info_dict.get('requested_formats') is not None:

2893

# For RTMP URLs, also include the playpath

2894

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2895

elif info_dict.get('url'):

2896

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2897

2898

if (self.params.get('forcejson')

2899

or self.params['forceprint'].get('video')

2900

or self.params['print_to_file'].get('video')):

2901

self.post_extract(info_dict)

2902

self._forceprint('video', info_dict)

2903

2904

print_mandatory('title')

2905

print_mandatory('id')

2906

print_mandatory('url', 'urls')

2907

print_optional('thumbnail')

2908

print_optional('description')

2909

print_optional('filename')

2910

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2911

self.to_stdout(formatSeconds(info_dict['duration']))

2912

print_mandatory('format')

2913

2914

if self.params.get('forcejson'):

2915

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2916

2917

def dl(self, name, info, subtitle=False, test=False):

2918

if not info.get('url'):

2919

self.raise_no_formats(info, True)

2920

2921

if test:

2922

verbose = self.params.get('verbose')

2923

params = {

2924

'test': True,

2925

'quiet': self.params.get('quiet') or not verbose,

2926

'verbose': verbose,

2927

'noprogress': not verbose,

2928

'nopart': True,

2929

'skip_unavailable_fragments': False,

2930

'keep_fragments': False,

2931

'overwrites': True,

2932

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2937

if not test:

2938

for ph in self._progress_hooks:

2939

fd.add_progress_hook(ph)

2940

urls = '", "'.join(

2941

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2942

for f in info.get('requested_formats', []) or [info])

2943

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

2944

2945

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2946

# But it may contain objects that are not deep-copyable

2947

new_info = self._copy_infodict(info)

2948

if new_info.get('http_headers') is None:

2949

new_info['http_headers'] = self._calc_headers(new_info)

2950

return fd.download(name, new_info, subtitle)

2951

2952

def existing_file(self, filepaths, *, default_overwrite=True):

2953

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2954

if existing_files and not self.params.get('overwrites', default_overwrite):

2955

return existing_files[0]

2956

2957

for file in existing_files:

2958

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2963

"""Process a single resolved IE result. (Modifies it in-place)"""

2964

2965

assert info_dict.get('_type', 'video') == 'video'

2966

original_infodict = info_dict

2967

2968

if 'format' not in info_dict and 'ext' in info_dict:

2969

info_dict['format'] = info_dict['ext']

2970

2971

if self._match_entry(info_dict) is not None:

2972

info_dict['__write_download_archive'] = 'ignore'

2973

return

2974

2975

# Does nothing under normal operation - for backward compatibility of process_info

2976

self.post_extract(info_dict)

2977

2978

def replace_info_dict(new_info):

2979

nonlocal info_dict

2980

if new_info == info_dict:

2981

return

2982

info_dict.clear()

2983

info_dict.update(new_info)

2984

2985

new_info, _ = self.pre_process(info_dict, 'video')

2986

replace_info_dict(new_info)

2987

self._num_downloads += 1

2988

2989

# info_dict['_filename'] needs to be set for backward compatibility

2990

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2991

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2996

2997

def check_max_downloads():

2998

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

2999

raise MaxDownloadsReached()

3000

3001

if self.params.get('simulate'):

3002

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3003

check_max_downloads()

3004

return

3005

3006

if full_filename is None:

3007

return

3008

if not self._ensure_dir_exists(encodeFilename(full_filename)):

3009

return

3010

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

3011

return

3012

3013

if self._write_description('video', info_dict,

3014

self.prepare_filename(info_dict, 'description')) is None:

3015

return

3016

3017

sub_files = self._write_subtitles(info_dict, temp_filename)

3018

if sub_files is None:

3019

return

3020

files_to_move.update(dict(sub_files))

3021

3022

thumb_files = self._write_thumbnails(

3023

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

3024

if thumb_files is None:

3025

return

3026

files_to_move.update(dict(thumb_files))

3027

3028

infofn = self.prepare_filename(info_dict, 'infojson')

3029

_infojson_written = self._write_info_json('video', info_dict, infofn)

3030

if _infojson_written:

3031

info_dict['infojson_filename'] = infofn

3032

# For backward compatibility, even though it was a private field

3033

info_dict['__infojson_filename'] = infofn

3034

elif _infojson_written is None:

3035

return

3036

3037

# Note: Annotations are deprecated

3038

annofn = None

3039

if self.params.get('writeannotations', False):

3040

annofn = self.prepare_filename(info_dict, 'annotation')

3041

if annofn:

3042

if not self._ensure_dir_exists(encodeFilename(annofn)):

3043

return

3044

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

3045

self.to_screen('[info] Video annotations are already present')

3046

elif not info_dict.get('annotations'):

3047

self.report_warning('There are no annotations to write.')

3048

else:

3049

try:

3050

self.to_screen('[info] Writing video annotations to: ' + annofn)

3051

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

3052

annofile.write(info_dict['annotations'])

3053

except (KeyError, TypeError):

3054

self.report_warning('There are no annotations to write.')

3055

except OSError:

3056

self.report_error('Cannot write annotations file: ' + annofn)

3057

return

3058

3059

# Write internet shortcut files

3060

def _write_link_file(link_type):

3061

url = try_get(info_dict['webpage_url'], iri_to_uri)

3062

if not url:

3063

self.report_warning(

3064

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3065

return True

3066

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3067

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3068

return False

3069

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3070

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3071

return True

3072

try:

3073

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3074

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3075

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3076

template_vars = {'url': url}

3077

if link_type == 'desktop':

3078

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3079

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3080

except OSError:

3081

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3087

'webloc': self.params.get('writewebloclink'),

3088

'desktop': self.params.get('writedesktoplink'),

3089

}

3090

if self.params.get('writelink'):

3091

link_type = ('webloc' if sys.platform == 'darwin'

3092

else 'desktop' if sys.platform.startswith('linux')

3093

else 'url')

3094

write_links[link_type] = True

3095

3096

if any(should_write and not _write_link_file(link_type)

3097

for link_type, should_write in write_links.items()):

3098

return

3099

3100

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3101

replace_info_dict(new_info)

3102

3103

if self.params.get('skip_download'):

3104

info_dict['filepath'] = temp_filename

3105

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3106

info_dict['__files_to_move'] = files_to_move

3107

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3108

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3109

else:

3110

# Download

3111

info_dict.setdefault('__postprocessors', [])

3112

try:

3113

3114

def existing_video_file(*filepaths):

3115

ext = info_dict.get('ext')

3116

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3117

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3118

default_overwrite=False)

3119

if file:

3120

info_dict['ext'] = os.path.splitext(file)[1][1:]

3121

return file

3122

3123

fd, success = None, True

3124

if info_dict.get('protocol') or info_dict.get('url'):

3125

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3126

if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (

3127

info_dict.get('section_start') or info_dict.get('section_end')):

3128

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3129

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3130

self.report_error(f'{msg}. Aborting')

3131

return

3132

3133

if info_dict.get('requested_formats') is not None:

3134

requested_formats = info_dict['requested_formats']

3135

old_ext = info_dict['ext']

3136

if self.params.get('merge_output_format') is None:

3137

if (info_dict['ext'] == 'webm'

3138

and info_dict.get('thumbnails')

3139

# check with type instead of pp_key, __name__, or isinstance

3140

# since we dont want any custom PPs to trigger this

3141

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3142

info_dict['ext'] = 'mkv'

3143

self.report_warning(

3144

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3145

new_ext = info_dict['ext']

3146

3147

def correct_ext(filename, ext=new_ext):

3148

if filename == '-':

3149

return filename

3150

filename_real_ext = os.path.splitext(filename)[1][1:]

3151

filename_wo_ext = (

3152

os.path.splitext(filename)[0]

3153

if filename_real_ext in (old_ext, new_ext)

3154

else filename)

3155

return f'{filename_wo_ext}.{ext}'

3156

3157

# Ensure filename always has a correct extension for successful merge

3158

full_filename = correct_ext(full_filename)

3159

temp_filename = correct_ext(temp_filename)

3160

dl_filename = existing_video_file(full_filename, temp_filename)

3161

info_dict['__real_download'] = False

3162

3163

merger = FFmpegMergerPP(self)

3164

downloaded = []

3165

if dl_filename is not None:

3166

self.report_file_already_downloaded(dl_filename)

3167

elif fd:

3168

for f in requested_formats if fd != FFmpegFD else []:

3169

f['filepath'] = fname = prepend_extension(

3170

correct_ext(temp_filename, info_dict['ext']),

3171

'f%s' % f['format_id'], info_dict['ext'])

3172

downloaded.append(fname)

3173

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3174

success, real_download = self.dl(temp_filename, info_dict)

3175

info_dict['__real_download'] = real_download

3176

else:

3177

if self.params.get('allow_unplayable_formats'):

3178

self.report_warning(

3179

'You have requested merging of multiple formats '

3180

'while also allowing unplayable formats to be downloaded. '

3181

'The formats won\'t be merged to prevent data corruption.')

3182

elif not merger.available:

3183

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3184

if not self.params.get('ignoreerrors'):

3185

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3186

return

3187

self.report_warning(f'{msg}. The formats won\'t be merged')

3188

3189

if temp_filename == '-':

3190

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3191

else 'but the formats are incompatible for simultaneous download' if merger.available

3192

else 'but ffmpeg is not installed')

3193

self.report_warning(

3194

f'You have requested downloading multiple formats to stdout {reason}. '

3195

'The formats will be streamed one after the other')

3196

fname = temp_filename

3197

for f in requested_formats:

3198

new_info = dict(info_dict)

3199

del new_info['requested_formats']

3200

new_info.update(f)

3201

if temp_filename != '-':

3202

fname = prepend_extension(

3203

correct_ext(temp_filename, new_info['ext']),

3204

'f%s' % f['format_id'], new_info['ext'])

3205

if not self._ensure_dir_exists(fname):

3206

return

3207

f['filepath'] = fname

3208

downloaded.append(fname)

3209

partial_success, real_download = self.dl(fname, new_info)

3210

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3211

success = success and partial_success

3212

3213

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3214

info_dict['__postprocessors'].append(merger)

3215

info_dict['__files_to_merge'] = downloaded

3216

# Even if there were no downloads, it is being merged only now

3217

info_dict['__real_download'] = True

3218

else:

3219

for file in downloaded:

3220

files_to_move[file] = None

3221

else:

3222

# Just a single file

3223

dl_filename = existing_video_file(full_filename, temp_filename)

3224

if dl_filename is None or dl_filename == temp_filename:

3225

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3226

# So we should try to resume the download

3227

success, real_download = self.dl(temp_filename, info_dict)

3228

info_dict['__real_download'] = real_download

3229

else:

3230

self.report_file_already_downloaded(dl_filename)

3231

3232

dl_filename = dl_filename or temp_filename

3233

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3234

3235

except network_exceptions as err:

3236

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3237

return

3238

except OSError as err:

3239

raise UnavailableVideoError(err)

3240

except (ContentTooShortError, ) as err:

3241

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3242

return

3243

3244

self._raise_pending_errors(info_dict)

3245

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3250

vid = info_dict['id']

3251

3252

if fixup_policy in ('ignore', 'never'):

3253

return

3254

elif fixup_policy == 'warn':

3255

do_fixup = 'warn'

3256

elif fixup_policy != 'force':

3257

assert fixup_policy in ('detect_or_warn', None)

3258

if not info_dict.get('__real_download'):

3259

do_fixup = False

3260

3261

def ffmpeg_fixup(cndn, msg, cls):

3262

if not (do_fixup and cndn):

3263

return

3264

elif do_fixup == 'warn':

3265

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3270

else:

3271

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3272

3273

stretched_ratio = info_dict.get('stretched_ratio')

3274

ffmpeg_fixup(stretched_ratio not in (1, None),

3275

f'Non-uniform pixel ratio {stretched_ratio}',

3276

FFmpegFixupStretchedPP)

3277

3278

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3279

downloader = downloader.FD_NAME if downloader else None

3280

3281

ext = info_dict.get('ext')

3282

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3283

isinstance(pp, FFmpegVideoConvertorPP)

3284

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3285

) for pp in self._pps['post_process'])

3286

3287

if not postprocessed_by_ffmpeg:

3288

ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',

3289

'writing DASH m4a. Only some players support this container',

3290

FFmpegFixupM4aPP)

3291

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3292

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3293

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3294

FFmpegFixupM3u8PP)

3295

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3296

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3297

3298

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3299

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3304

except PostProcessingError as err:

3305

self.report_error('Postprocessing: %s' % str(err))

3306

return

3307

try:

3308

for ph in self._post_hooks:

3309

ph(info_dict['filepath'])

3310

except Exception as err:

3311

self.report_error('post hooks: %s' % str(err))

3312

return

3313

info_dict['__write_download_archive'] = True

3314

3315

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3316

if self.params.get('force_write_download_archive'):

3317

info_dict['__write_download_archive'] = True

3318

check_max_downloads()

3319

3320

def __download_wrapper(self, func):

3321

@functools.wraps(func)

3322

def wrapper(*args, **kwargs):

3323

try:

3324

res = func(*args, **kwargs)

3325

except UnavailableVideoError as e:

3326

self.report_error(e)

3327

except DownloadCancelled as e:

3328

self.to_screen(f'[info] {e}')

3329

if not self.params.get('break_per_url'):

3330

raise

3331

self._num_downloads = 0

3332

else:

3333

if self.params.get('dump_single_json', False):

3334

self.post_extract(res)

3335

self.to_stdout(json.dumps(self.sanitize_info(res)))

3336

return wrapper

3337

3338

def download(self, url_list):

3339

"""Download a given list of URLs."""

3340

url_list = variadic(url_list) # Passing a single URL is a common mistake

3341

outtmpl = self.params['outtmpl']['default']

3342

if (len(url_list) > 1

3343

and outtmpl != '-'

3344

and '%' not in outtmpl

3345

and self.params.get('max_downloads') != 1):

3346

raise SameFileError(outtmpl)

3347

3348

for url in url_list:

3349

self.__download_wrapper(self.extract_info)(

3350

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3351

3352

return self._download_retcode

3353

3354

def download_with_info_file(self, info_filename):

3355

with contextlib.closing(fileinput.FileInput(

3356

[info_filename], mode='r',

3357

openhook=fileinput.hook_encoded('utf-8'))) as f:

3358

# FileInput doesn't have a read method, we can't call json.load

3359

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3360

try:

3361

self.__download_wrapper(self.process_ie_result)(info, download=True)

3362

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3363

if not isinstance(e, EntryNotInPlaylist):

3364

self.to_stderr('\r')

3365

webpage_url = info.get('webpage_url')

3366

if webpage_url is not None:

3367

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3368

return self.download([webpage_url])

3369

else:

3370

raise

3371

return self._download_retcode

3372

3373

@staticmethod

3374

def sanitize_info(info_dict, remove_private_keys=False):

3375

''' Sanitize the infodict for converting to json '''

3376

if info_dict is None:

3377

return info_dict

3378

info_dict.setdefault('epoch', int(time.time()))

3379

info_dict.setdefault('_type', 'video')

3380

info_dict.setdefault('_version', {

3381

'version': __version__,

3382

'current_git_head': current_git_head(),

3383

'release_git_head': RELEASE_GIT_HEAD,

3384

'repository': REPOSITORY,

3385

})

3386

3387

if remove_private_keys:

3388

reject = lambda k, v: v is None or k.startswith('__') or k in {

3389

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3390

'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',

3391

'_format_sort_fields',

3392

}

3393

else:

3394

reject = lambda k, v: False

3395

3396

def filter_fn(obj):

3397

if isinstance(obj, dict):

3398

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3399

elif isinstance(obj, (list, tuple, set, LazyList)):

3400

return list(map(filter_fn, obj))

3401

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3407

3408

@staticmethod

3409

def filter_requested_info(info_dict, actually_filter=True):

3410

''' Alias of sanitize_info for backward compatibility '''

3411

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3412

3413

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3414

for filename in set(filter(None, files_to_delete)):

3415

if msg:

3416

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3421

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3422

del info['__files_to_move'][filename]

3423

3424

@staticmethod

3425

def post_extract(info_dict):

3426

def actual_post_extract(info_dict):

3427

if info_dict.get('_type') in ('playlist', 'multi_video'):

3428

for video_dict in info_dict.get('entries', {}):

3429

actual_post_extract(video_dict or {})

3430

return

3431

3432

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3433

info_dict.update(post_extractor())

3434

3435

actual_post_extract(info_dict or {})

3436

3437

def run_pp(self, pp, infodict):

3438

files_to_delete = []

3439

if '__files_to_move' not in infodict:

3440

infodict['__files_to_move'] = {}

3441

try:

3442

files_to_delete, infodict = pp.run(infodict)

3443

except PostProcessingError as e:

3444

# Must be True and not 'only_download'

3445

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3451

return infodict

3452

if self.params.get('keepvideo', False):

3453

for f in files_to_delete:

3454

infodict['__files_to_move'].setdefault(f, '')

3455

else:

3456

self._delete_downloaded_files(

3457

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3458

return infodict

3459

3460

def run_all_pps(self, key, info, *, additional_pps=None):

3461

if key != 'video':

3462

self._forceprint(key, info)

3463

for pp in (additional_pps or []) + self._pps[key]:

3464

info = self.run_pp(pp, info)

3465

return info

3466

3467

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3468

info = dict(ie_info)

3469

info['__files_to_move'] = files_to_move or {}

3470

try:

3471

info = self.run_all_pps(key, info)

3472

except PostProcessingError as err:

3473

msg = f'Preprocessing: {err}'

3474

info.setdefault('__pending_error', msg)

3475

self.report_error(msg, is_error=False)

3476

return info, info.pop('__files_to_move', None)

3477

3478

def post_process(self, filename, info, files_to_move=None):

3479

"""Run all the postprocessors on the given file."""

3480

info['filepath'] = filename

3481

info['__files_to_move'] = files_to_move or {}

3482

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3483

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3484

del info['__files_to_move']

3485

return self.run_all_pps('after_move', info)

3486

3487

def _make_archive_id(self, info_dict):

3488

video_id = info_dict.get('id')

3489

if not video_id:

3490

return

3491

# Future-proof against any change in case

3492

# and backwards compatibility with prior versions

3493

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3494

if extractor is None:

3495

url = str_or_none(info_dict.get('url'))

3496

if not url:

3497

return

3498

# Try to find matching extractor for the URL and take its ie_key

3499

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3506

3507

def in_download_archive(self, info_dict):

if not self.archive:

return False

vid_ids = [self._make_archive_id(info_dict)]

3512

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3513

return any(id_ in self.archive for id_ in vid_ids)

3514

3515

def record_download_archive(self, info_dict):

3516

fn = self.params.get('download_archive')

3517

if fn is None:

3518

return

3519

vid_id = self._make_archive_id(info_dict)

3520

assert vid_id

3521

3522

self.write_debug(f'Adding to archive: {vid_id}')

3523

if is_path_like(fn):

3524

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3525

archive_file.write(vid_id + '\n')

3526

self.archive.add(vid_id)

3527

3528

@staticmethod

3529

def format_resolution(format, default='unknown'):

3530

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3531

return 'audio only'

3532

if format.get('resolution') is not None:

3533

return format['resolution']

3534

if format.get('width') and format.get('height'):

3535

return '%dx%d' % (format['width'], format['height'])

3536

elif format.get('height'):

3537

return '%sp' % format['height']

3538

elif format.get('width'):

3539

return '%dx?' % format['width']

3540

return default

3541

3542

def _list_format_headers(self, *headers):

3543

if self.params.get('listformats_table', True) is not False:

3544

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3545

return headers

3546

3547

def _format_note(self, fdict):

3548

res = ''

3549

if fdict.get('ext') in ['f4f', 'f4m']:

3550

res += '(unsupported)'

3551

if fdict.get('language'):

3552

if res:

3553

res += ' '

3554

res += '[%s]' % fdict['language']

3555

if fdict.get('format_note') is not None:

3556

if res:

3557

res += ' '

3558

res += fdict['format_note']

3559

if fdict.get('tbr') is not None:

3560

if res:

3561

res += ', '

3562

res += '%4dk' % fdict['tbr']

3563

if fdict.get('container') is not None:

3564

if res:

3565

res += ', '

3566

res += '%s container' % fdict['container']

3567

if (fdict.get('vcodec') is not None

3568

and fdict.get('vcodec') != 'none'):

3569

if res:

3570

res += ', '

3571

res += fdict['vcodec']

3572

if fdict.get('vbr') is not None:

3573

res += '@'

3574

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3575

res += 'video@'

3576

if fdict.get('vbr') is not None:

3577

res += '%4dk' % fdict['vbr']

3578

if fdict.get('fps') is not None:

3579

if res:

3580

res += ', '

3581

res += '%sfps' % fdict['fps']

3582

if fdict.get('acodec') is not None:

3583

if res:

3584

res += ', '

3585

if fdict['acodec'] == 'none':

3586

res += 'video only'

3587

else:

3588

res += '%-5s' % fdict['acodec']

3589

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3594

res += '@%3dk' % fdict['abr']

3595

if fdict.get('asr') is not None:

3596

res += ' (%5dHz)' % fdict['asr']

3597

if fdict.get('filesize') is not None:

3598

if res:

3599

res += ', '

3600

res += format_bytes(fdict['filesize'])

3601

elif fdict.get('filesize_approx') is not None:

3602

if res:

3603

res += ', '

3604

res += '~' + format_bytes(fdict['filesize_approx'])

3605

return res

3606

3607

def _get_formats(self, info_dict):

3608

if info_dict.get('formats') is None:

3609

if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':

3610

return [info_dict]

3611

return []

3612

return info_dict['formats']

3613

3614

def render_formats_table(self, info_dict):

3615

formats = self._get_formats(info_dict)

3616

if not formats:

3617

return

3618

if not self.params.get('listformats_table', True) is not False:

3619

table = [

3620

[

3621

format_field(f, 'format_id'),

3622

format_field(f, 'ext'),

3623

self.format_resolution(f),

3624

self._format_note(f)

3625

] for f in formats if (f.get('preference') or 0) >= -1000]

3626

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3627

3628

def simplified_codec(f, field):

3629

assert field in ('acodec', 'vcodec')

3630

codec = f.get(field, 'unknown')

3631

if not codec:

3632

return 'unknown'

3633

elif codec != 'none':

3634

return '.'.join(codec.split('.')[:4])

3635

3636

if field == 'vcodec' and f.get('acodec') == 'none':

3637

return 'images'

3638

elif field == 'acodec' and f.get('vcodec') == 'none':

3639

return ''

3640

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3641

self.Styles.SUPPRESS)

3642

3643

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3644

table = [

3645

[

3646

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3647

format_field(f, 'ext'),

3648

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3649

format_field(f, 'fps', '\t%d', func=round),

3650

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3651

format_field(f, 'audio_channels', '\t%s'),

3652

delim,

3653

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3654

format_field(f, 'tbr', '\t%dk', func=round),

3655

shorten_protocol_name(f.get('protocol', '')),

3656

delim,

3657

simplified_codec(f, 'vcodec'),

3658

format_field(f, 'vbr', '\t%dk', func=round),

3659

simplified_codec(f, 'acodec'),

3660

format_field(f, 'abr', '\t%dk', func=round),

3661

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3662

join_nonempty(

3663

self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3664

format_field(f, 'language', '[%s]'),

3665

join_nonempty(format_field(f, 'format_note'),

3666

format_field(f, 'container', ignore=(None, f.get('ext'))),

3667

delim=', '),

3668

delim=' '),

3669

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3670

header_line = self._list_format_headers(

3671

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3672

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3673

3674

return render_table(

3675

header_line, table, hide_empty=True,

3676

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3677

3678

def render_thumbnails_table(self, info_dict):

3679

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3684

[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])

3685

3686

def render_subtitles_table(self, video_id, subtitles):

3687

def _row(lang, formats):

3688

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3689

if len(set(names)) == 1:

3690

names = [] if names[0] == 'unknown' else names[:1]

3691

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3697

[_row(lang, formats) for lang, formats in subtitles.items()],

3698

hide_empty=True)

3699

3700

def __list_table(self, video_id, name, func, *args):

3701

table = func(*args)

3702

if not table:

3703

self.to_screen(f'{video_id} has no {name}')

3704

return

3705

self.to_screen(f'[info] Available {name} for {video_id}:')

3706

self.to_stdout(table)

3707

3708

def list_formats(self, info_dict):

3709

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3710

3711

def list_thumbnails(self, info_dict):

3712

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3713

3714

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3715

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3716

3717

def urlopen(self, req):

3718

""" Start an HTTP download """

3719

if isinstance(req, str):

3720

req = sanitized_Request(req)

3721

return self._opener.open(req, timeout=self._socket_timeout)

3722

3723

def print_debug_header(self):

3724

if not self.params.get('verbose'):

3725

return

3726

3727

from . import _IN_CLI # Must be delayed import

3728

3729

# These imports can be slow. So import them only as needed

3730

from .extractor.extractors import _LAZY_LOADER

3731

from .extractor.extractors import (

3732

_PLUGIN_CLASSES as plugin_ies,

3733

_PLUGIN_OVERRIDES as plugin_ie_overrides

3734

)

3735

3736

def get_encoding(stream):

3737

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3738

if not supports_terminal_sequences(stream):

3739

from .utils import WINDOWS_VT_MODE # Must be imported locally

3740

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3741

return ret

3742

3743

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3744

locale.getpreferredencoding(),

3745

sys.getfilesystemencoding(),

3746

self.get_encoding(),

3747

', '.join(

3748

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3749

if stream is not None and key != 'console')

3750

)

3751

3752

logger = self.params.get('logger')

3753

if logger:

3754

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3755

write_debug(encoding_str)

3756

else:

3757

write_string(f'[debug] {encoding_str}\n', encoding=None)

3758

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3759

3760

source = detect_variant()

3761

if VARIANT not in (None, 'pip'):

3762

source += '*'

3763

write_debug(join_nonempty(

3764

f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',

3765

__version__,

3766

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3767

'' if source == 'unknown' else f'({source})',

3768

'' if _IN_CLI else 'API',

delim=' '))

if not _IN_CLI:

write_debug(f'params: {self.params}')

3773

3774

if not _LAZY_LOADER:

3775

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3776

write_debug('Lazy loading extractors is forcibly disabled')

3777

else:

3778

write_debug('Lazy loading extractors is disabled')

3779

if self.params['compat_opts']:

3780

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3781

3782

if current_git_head():

3783

write_debug(f'Git HEAD: {current_git_head()}')

3784

write_debug(system_identifier())

3785

3786

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3787

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3788

if ffmpeg_features:

3789

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3790

3791

exe_versions['rtmpdump'] = rtmpdump_version()

3792

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3793

exe_str = ', '.join(

3794

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3795

) or 'none'

3796

write_debug('exe versions: %s' % exe_str)

3797

3798

from .compat.compat_utils import get_package_info

3799

from .dependencies import available_dependencies

3800

3801

write_debug('Optional libraries: %s' % (', '.join(sorted({

3802

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3808

if hasattr(handler, 'proxies'):

3809

proxy_map.update(handler.proxies)

3810

write_debug(f'Proxy map: {proxy_map}')

3811

3812

for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():

3813

display_list = ['%s%s' % (

3814

klass.__name__, '' if klass.__name__ == name else f' as {name}')

3815

for name, klass in plugins.items()]

3816

if plugin_type == 'Extractor':

3817

display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'

3818

for parent, plugins in plugin_ie_overrides.items())

3819

if not display_list:

3820

continue

3821

write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')

3822

3823

plugin_dirs = plugin_directories()

3824

if plugin_dirs:

3825

write_debug(f'Plugin directories: {plugin_dirs}')

3826

3827

# Not implemented

3828

if False and self.params.get('call_home'):

3829

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

3830

write_debug('Public IP address: %s' % ipaddr)

3831

latest_version = self.urlopen(

3832

'https://yt-dl.org/latest/version').read().decode()

3833

if version_tuple(latest_version) > version_tuple(__version__):

3834

self.report_warning(

3835

'You are using an outdated version (newest version: %s)! '

3836

'See https://yt-dl.org/update if you need help updating.' %

3837

latest_version)

3838

3839

def _setup_opener(self):

3840

if hasattr(self, '_opener'):

3841

return

3842

timeout_val = self.params.get('socket_timeout')

3843

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3844

3845

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3846

opts_cookiefile = self.params.get('cookiefile')

3847

opts_proxy = self.params.get('proxy')

3848

3849

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3850

3851

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3852

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3857

else:

3858

proxies = urllib.request.getproxies()

3859

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3860

if 'http' in proxies and 'https' not in proxies:

3861

proxies['https'] = proxies['http']

3862

proxy_handler = PerRequestProxyHandler(proxies)

3863

3864

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3865

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3866

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3867

redirect_handler = YoutubeDLRedirectHandler()

3868

data_handler = urllib.request.DataHandler()

3869

3870

# When passing our own FileHandler instance, build_opener won't add the

3871

# default FileHandler and allows us to disable the file protocol, which

3872

# can be used for malicious purposes (see

3873

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3874

file_handler = urllib.request.FileHandler()

3875

3876

if not self.params.get('enable_file_urls'):

3877

def file_open(*args, **kwargs):

3878

raise urllib.error.URLError(

3879

'file:// URLs are explicitly disabled in yt-dlp for security reasons. '

3880

'Use --enable-file-urls to enable at your own risk.')

3881

file_handler.file_open = file_open

3882

3883

opener = urllib.request.build_opener(

3884

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3885

3886

# Delete the default user-agent header, which would otherwise apply in

3887

# cases where our custom HTTP handler doesn't come into play

3888

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3889

opener.addheaders = []

3890

self._opener = opener

3891

3892

def encode(self, s):

3893

if isinstance(s, bytes):

3894

return s # Already encoded

3895

3896

try:

3897

return s.encode(self.get_encoding())

3898

except UnicodeEncodeError as err:

3899

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3900

raise

3901

3902

def get_encoding(self):

3903

encoding = self.params.get('encoding')

3904

if encoding is None:

3905

encoding = preferredencoding()

3906

return encoding

3907

3908

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3909

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3910

if overwrite is None:

3911

overwrite = self.params.get('overwrites', True)

3912

if not self.params.get('writeinfojson'):

3913

return False

3914

elif not infofn:

3915

self.write_debug(f'Skipping writing {label} infojson')

3916

return False

3917

elif not self._ensure_dir_exists(infofn):

3918

return None

3919

elif not overwrite and os.path.exists(infofn):

3920

self.to_screen(f'[info] {label.title()} metadata is already present')

3921

return 'exists'

3922

3923

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3924

try:

3925

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3926

return True

3927

except OSError:

3928

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

3929

return None

3930

3931

def _write_description(self, label, ie_result, descfn):

3932

''' Write description and returns True = written, False = skip, None = error '''

3933

if not self.params.get('writedescription'):

3934

return False

3935

elif not descfn:

3936

self.write_debug(f'Skipping writing {label} description')

3937

return False

3938

elif not self._ensure_dir_exists(descfn):

3939

return None

3940

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3941

self.to_screen(f'[info] {label.title()} description is already present')

3942

elif ie_result.get('description') is None:

3943

self.to_screen(f'[info] There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3948

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3949

descfile.write(ie_result['description'])

3950

except OSError:

3951

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3956

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3957

ret = []

3958

subtitles = info_dict.get('requested_subtitles')

3959

if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3960

# subtitles download errors are already managed as troubles in relevant IE

3961

# that way it will silently go on when used with unsupporting IE

3962

return ret

3963

elif not subtitles:

3964

self.to_screen('[info] There\'s no subtitles for the requested languages')

3965

return ret

3966

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3967

if not sub_filename_base:

3968

self.to_screen('[info] Skipping writing video subtitles')

3969

return ret

3970

3971

for sub_lang, sub_info in subtitles.items():

3972

sub_format = sub_info['ext']

3973

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3974

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3975

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3976

if existing_sub:

3977

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3978

sub_info['filepath'] = existing_sub

3979

ret.append((existing_sub, sub_filename_final))

3980

continue

3981

3982

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3983

if sub_info.get('data') is not None:

3984

try:

3985

# Use newline='' to prevent conversion of newline characters

3986

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3987

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3988

subfile.write(sub_info['data'])

3989

sub_info['filepath'] = sub_filename

3990

ret.append((sub_filename, sub_filename_final))

3991

continue

3992

except OSError:

3993

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3998

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3999

self.dl(sub_filename, sub_copy, subtitle=True)

4000

sub_info['filepath'] = sub_filename

4001

ret.append((sub_filename, sub_filename_final))

4002

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

4003

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

4004

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

4005

if not self.params.get('ignoreerrors'):

4006

self.report_error(msg)

4007

raise DownloadError(msg)

4008

self.report_warning(msg)

4009

return ret

4010

4011

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

4012

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

4013

write_all = self.params.get('write_all_thumbnails', False)

4014

thumbnails, ret = [], []

4015

if write_all or self.params.get('writethumbnail', False):

4016

thumbnails = info_dict.get('thumbnails') or []

4017

if not thumbnails:

4018

self.to_screen(f'[info] There\'s no {label} thumbnails to download')

4019

return ret

4020

multiple = write_all and len(thumbnails) > 1

4021

4022

if thumb_filename_base is None:

4023

thumb_filename_base = filename

4024

if thumbnails and not thumb_filename_base:

4025

self.write_debug(f'Skipping writing {label} thumbnail')

4026

return ret

4027

4028

for idx, t in list(enumerate(thumbnails))[::-1]:

4029

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

4030

thumb_display_id = f'{label} thumbnail {t["id"]}'

4031

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

4032

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

4033

4034

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

4035

if existing_thumb:

4036

self.to_screen('[info] %s is already present' % (

4037

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

4038

t['filepath'] = existing_thumb

4039

ret.append((existing_thumb, thumb_filename_final))

4040

else:

4041

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

4042

try:

4043

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

4044

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

4045

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

4046

shutil.copyfileobj(uf, thumbf)

4047

ret.append((thumb_filename, thumb_filename_final))

4048

t['filepath'] = thumb_filename

4049

except network_exceptions as err:

4050

thumbnails.pop(idx)

4051

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

4052

if ret and not write_all:

4053

break

4054

return ret