jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import datetime
	4	import errno
	5	import fileinput
	6	import functools
	7	import io
	8	import itertools
	9	import json
	10	import locale
	11	import operator
	12	import os
	13	import random
	14	import re
	15	import shutil
	16	import subprocess
	17	import sys
	18	import tempfile
	19	import time
	20	import tokenize
	21	import traceback
	22	import unicodedata
	23	import urllib.request
	24	from string import ascii_letters
	25
	26	from .cache import Cache
	27	from .compat import compat_os_name, compat_shlex_quote
	28	from .cookies import load_cookies
	29	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	30	from .downloader.rtmp import rtmpdump_version
	31	from .extractor import gen_extractor_classes, get_info_extractor
	32	from .extractor.common import UnsupportedURLIE
	33	from .extractor.openload import PhantomJSwrapper
	34	from .minicurses import format_text
	35	from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
	36	from .postprocessor import (
	37	EmbedThumbnailPP,
	38	FFmpegFixupDuplicateMoovPP,
	39	FFmpegFixupDurationPP,
	40	FFmpegFixupM3u8PP,
	41	FFmpegFixupM4aPP,
	42	FFmpegFixupStretchedPP,
	43	FFmpegFixupTimestampPP,
	44	FFmpegMergerPP,
	45	FFmpegPostProcessor,
	46	FFmpegVideoConvertorPP,
	47	MoveFilesAfterDownloadPP,
	48	get_postprocessor,
	49	)
	50	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	51	from .update import REPOSITORY, current_git_head, detect_variant
	52	from .utils import (
	53	DEFAULT_OUTTMPL,
	54	IDENTITY,
	55	LINK_TEMPLATES,
	56	MEDIA_EXTENSIONS,
	57	NO_DEFAULT,
	58	NUMBER_RE,
	59	OUTTMPL_TYPES,
	60	POSTPROCESS_WHEN,
	61	STR_FORMAT_RE_TMPL,
	62	STR_FORMAT_TYPES,
	63	ContentTooShortError,
	64	DateRange,
	65	DownloadCancelled,
	66	DownloadError,
	67	EntryNotInPlaylist,
	68	ExistingVideoReached,
	69	ExtractorError,
	70	GeoRestrictedError,
	71	HEADRequest,
	72	ISO3166Utils,
	73	LazyList,
	74	MaxDownloadsReached,
	75	Namespace,
	76	PagedList,
	77	PerRequestProxyHandler,
	78	PlaylistEntries,
	79	Popen,
	80	PostProcessingError,
	81	ReExtractInfo,
	82	RejectedVideoReached,
	83	SameFileError,
	84	UnavailableVideoError,
	85	UserNotLive,
	86	YoutubeDLCookieProcessor,
	87	YoutubeDLHandler,
	88	YoutubeDLRedirectHandler,
	89	age_restricted,
	90	args_to_str,
	91	bug_reports_message,
	92	date_from_str,
	93	deprecation_warning,
	94	determine_ext,
	95	determine_protocol,
	96	encode_compat_str,
	97	encodeFilename,
	98	error_to_compat_str,
	99	escapeHTML,
	100	expand_path,
	101	filter_dict,
	102	float_or_none,
	103	format_bytes,
	104	format_decimal_suffix,
	105	format_field,
	106	formatSeconds,
	107	get_compatible_ext,
	108	get_domain,
	109	int_or_none,
	110	iri_to_uri,
	111	is_path_like,
	112	join_nonempty,
	113	locked_file,
	114	make_archive_id,
	115	make_dir,
	116	make_HTTPS_handler,
	117	merge_headers,
	118	network_exceptions,
	119	number_of_digits,
	120	orderedSet,
	121	orderedSet_from_options,
	122	parse_filesize,
	123	preferredencoding,
	124	prepend_extension,
	125	register_socks_protocols,
	126	remove_terminal_sequences,
	127	render_table,
	128	replace_extension,
	129	sanitize_filename,
	130	sanitize_path,
	131	sanitize_url,
	132	sanitized_Request,
	133	std_headers,
	134	str_or_none,
	135	strftime_or_none,
	136	subtitles_filename,
	137	supports_terminal_sequences,
	138	system_identifier,
	139	timetuple_from_msec,
	140	to_high_limit_path,
	141	traverse_obj,
	142	try_call,
	143	try_get,
	144	url_basename,
	145	variadic,
	146	version_tuple,
	147	windows_enable_vt_mode,
	148	write_json_file,
	149	write_string,
	150	)
	151	from .version import RELEASE_GIT_HEAD, VARIANT, __version__
	152
	153	if compat_os_name == 'nt':
	154	import ctypes
	155
	156
	157	class YoutubeDL:
	158	"""YoutubeDL class.
	159
	160	YoutubeDL objects are the ones responsible of downloading the
	161	actual video file and writing it to disk if the user has requested
	162	it, among some other tasks. In most cases there should be one per
	163	program. As, given a video URL, the downloader doesn't know how to
	164	extract all the needed information, task that InfoExtractors do, it
	165	has to pass the URL to one of them.
	166
	167	For this, YoutubeDL objects have a method that allows
	168	InfoExtractors to be registered in a given order. When it is passed
	169	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	170	finds that reports being able to handle it. The InfoExtractor extracts
	171	all the information about the video or videos the URL refers to, and
	172	YoutubeDL process the extracted information, possibly using a File
	173	Downloader to download the video.
	174
	175	YoutubeDL objects accept a lot of parameters. In order not to saturate
	176	the object constructor with arguments, it receives a dictionary of
	177	options instead. These options are available through the params
	178	attribute for the InfoExtractors to use. The YoutubeDL also
	179	registers itself as the downloader in charge for the InfoExtractors
	180	that are added to it, so this is a "mutual registration".
	181
	182	Available options:
	183
	184	username: Username for authentication purposes.
	185	password: Password for authentication purposes.
	186	videopassword: Password for accessing a video.
	187	ap_mso: Adobe Pass multiple-system operator identifier.
	188	ap_username: Multiple-system operator account username.
	189	ap_password: Multiple-system operator account password.
	190	usenetrc: Use netrc for authentication instead.
	191	verbose: Print additional info to stdout.
	192	quiet: Do not print messages to stdout.
	193	no_warnings: Do not print out anything for warnings.
	194	forceprint: A dict with keys WHEN mapped to a list of templates to
	195	print to stdout. The allowed keys are video or any of the
	196	items in utils.POSTPROCESS_WHEN.
	197	For compatibility, a single list is also accepted
	198	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	199	a list of tuples with (template, filename)
	200	forcejson: Force printing info_dict as JSON.
	201	dump_single_json: Force printing the info_dict of the whole playlist
	202	(or video) as a single JSON line.
	203	force_write_download_archive: Force writing download archive regardless
	204	of 'skip_download' or 'simulate'.
	205	simulate: Do not download the video files. If unset (or None),
	206	simulate only if listsubtitles, listformats or list_thumbnails is used
	207	format: Video format code. see "FORMAT SELECTION" for more details.
	208	You can also pass a function. The function takes 'ctx' as
	209	argument and returns the formats to download.
	210	See "build_format_selector" for an implementation
	211	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	212	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	213	extracting metadata even if the video is not actually
	214	available for download (experimental)
	215	format_sort: A list of fields by which to sort the video formats.
	216	See "Sorting Formats" for more details.
	217	format_sort_force: Force the given format_sort. see "Sorting Formats"
	218	for more details.
	219	prefer_free_formats: Whether to prefer video formats with free containers
	220	over non-free ones of same quality.
	221	allow_multiple_video_streams: Allow multiple video streams to be merged
	222	into a single file
	223	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	224	into a single file
	225	check_formats Whether to test if the formats are downloadable.
	226	Can be True (check all), False (check none),
	227	'selected' (check selected formats),
	228	or None (check only if requested by extractor)
	229	paths: Dictionary of output paths. The allowed keys are 'home'
	230	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	231	outtmpl: Dictionary of templates for output names. Allowed keys
	232	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	233	For compatibility with youtube-dl, a single string can also be used
	234	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	235	restrictfilenames: Do not allow "&" and spaces in file names
	236	trim_file_name: Limit length of filename (extension excluded)
	237	windowsfilenames: Force the filenames to be windows compatible
	238	ignoreerrors: Do not stop on download/postprocessing errors.
	239	Can be 'only_download' to ignore only download errors.
	240	Default is 'only_download' for CLI, but False for API
	241	skip_playlist_after_errors: Number of allowed failures until the rest of
	242	the playlist is skipped
	243	allowed_extractors: List of regexes to match against extractor names that are allowed
	244	overwrites: Overwrite all video and metadata files if True,
	245	overwrite only non-video files if None
	246	and don't overwrite any file if False
	247	For compatibility with youtube-dl,
	248	"nooverwrites" may also be used instead
	249	playlist_items: Specific indices of playlist to download.
	250	playlistrandom: Download playlist items in random order.
	251	lazy_playlist: Process playlist entries as they are received.
	252	matchtitle: Download only matching titles.
	253	rejecttitle: Reject downloads for matching titles.
	254	logger: Log messages to a logging.Logger instance.
	255	logtostderr: Print everything to stderr instead of stdout.
	256	consoletitle: Display progress in console window's titlebar.
	257	writedescription: Write the video description to a .description file
	258	writeinfojson: Write the video description to a .info.json file
	259	clean_infojson: Remove private fields from the infojson
	260	getcomments: Extract video comments. This will not be written to disk
	261	unless writeinfojson is also given
	262	writeannotations: Write the video annotations to a .annotations.xml file
	263	writethumbnail: Write the thumbnail image to a file
	264	allow_playlist_files: Whether to write playlists' description, infojson etc
	265	also to disk when using the 'write*' options
	266	write_all_thumbnails: Write all thumbnail formats to files
	267	writelink: Write an internet shortcut file, depending on the
	268	current platform (.url/.webloc/.desktop)
	269	writeurllink: Write a Windows internet shortcut file (.url)
	270	writewebloclink: Write a macOS internet shortcut file (.webloc)
	271	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	272	writesubtitles: Write the video subtitles to a file
	273	writeautomaticsub: Write the automatically generated subtitles to a file
	274	listsubtitles: Lists all available subtitles for the video
	275	subtitlesformat: The format code for subtitles
	276	subtitleslangs: List of languages of the subtitles to download (can be regex).
	277	The list may contain "all" to refer to all the available
	278	subtitles. The language can be prefixed with a "-" to
	279	exclude it from the requested languages, e.g. ['all', '-live_chat']
	280	keepvideo: Keep the video file after post-processing
	281	daterange: A DateRange object, download only if the upload_date is in the range.
	282	skip_download: Skip the actual download of the video file
	283	cachedir: Location of the cache files in the filesystem.
	284	False to disable filesystem cache.
	285	noplaylist: Download single video instead of a playlist if in doubt.
	286	age_limit: An integer representing the user's age in years.
	287	Unsuitable videos for the given age are skipped.
	288	min_views: An integer representing the minimum view count the video
	289	must have in order to not be skipped.
	290	Videos without view count information are always
	291	downloaded. None for no limit.
	292	max_views: An integer representing the maximum view count.
	293	Videos that are more popular than that are not
	294	downloaded.
	295	Videos without view count information are always
	296	downloaded. None for no limit.
	297	download_archive: A set, or the name of a file where all downloads are recorded.
	298	Videos already present in the file are not downloaded again.
	299	break_on_existing: Stop the download process after attempting to download a
	300	file that is in the archive.
	301	break_on_reject: Stop the download process when encountering a video that
	302	has been filtered out.
	303	break_per_url: Whether break_on_reject and break_on_existing
	304	should act on each input URL as opposed to for the entire queue
	305	cookiefile: File name or text stream from where cookies should be read and dumped to
	306	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	307	name/path from where cookies are loaded, the name of the keyring,
	308	and the container name, e.g. ('chrome', ) or
	309	('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
	310	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	311	support RFC 5746 secure renegotiation
	312	nocheckcertificate: Do not verify SSL certificates
	313	client_certificate: Path to client certificate file in PEM format. May include the private key
	314	client_certificate_key: Path to private key file for client certificate
	315	client_certificate_password: Password for client certificate private key, if encrypted.
	316	If not provided and the key is encrypted, yt-dlp will ask interactively
	317	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	318	(Only supported by some extractors)
	319	http_headers: A dictionary of custom headers to be used for all requests
	320	proxy: URL of the proxy server to use
	321	geo_verification_proxy: URL of the proxy to use for IP address verification
	322	on geo-restricted sites.
	323	socket_timeout: Time to wait for unresponsive hosts, in seconds
	324	bidi_workaround: Work around buggy terminals without bidirectional text
	325	support, using fridibi
	326	debug_printtraffic:Print out sent and received HTTP traffic
	327	default_search: Prepend this string if an input url is not valid.
	328	'auto' for elaborate guessing
	329	encoding: Use this encoding instead of the system-specified.
	330	extract_flat: Whether to resolve and process url_results further
	331	* False: Always process (default)
	332	* True: Never process
	333	* 'in_playlist': Do not process inside playlist/multi_video
	334	* 'discard': Always process, but don't return the result
	335	from inside playlist/multi_video
	336	* 'discard_in_playlist': Same as "discard", but only for
	337	playlists (not multi_video)
	338	wait_for_video: If given, wait for scheduled streams to become available.
	339	The value should be a tuple containing the range
	340	(min_secs, max_secs) to wait between retries
	341	postprocessors: A list of dictionaries, each with an entry
	342	* key: The name of the postprocessor. See
	343	yt_dlp/postprocessor/__init__.py for a list.
	344	* when: When to run the postprocessor. Allowed values are
	345	the entries of utils.POSTPROCESS_WHEN
	346	Assumed to be 'post_process' if not given
	347	progress_hooks: A list of functions that get called on download
	348	progress, with a dictionary with the entries
	349	* status: One of "downloading", "error", or "finished".
	350	Check this first and ignore unknown values.
	351	* info_dict: The extracted info_dict
	352
	353	If status is one of "downloading", or "finished", the
	354	following properties may also be present:
	355	* filename: The final filename (always present)
	356	* tmpfilename: The filename we're currently writing to
	357	* downloaded_bytes: Bytes on disk
	358	* total_bytes: Size of the whole file, None if unknown
	359	* total_bytes_estimate: Guess of the eventual file size,
	360	None if unavailable.
	361	* elapsed: The number of seconds since download started.
	362	* eta: The estimated time in seconds, None if unknown
	363	* speed: The download speed in bytes/second, None if
	364	unknown
	365	* fragment_index: The counter of the currently
	366	downloaded video fragment.
	367	* fragment_count: The number of fragments (= individual
	368	files that will be merged)
	369
	370	Progress hooks are guaranteed to be called at least once
	371	(with status "finished") if the download is successful.
	372	postprocessor_hooks: A list of functions that get called on postprocessing
	373	progress, with a dictionary with the entries
	374	* status: One of "started", "processing", or "finished".
	375	Check this first and ignore unknown values.
	376	* postprocessor: Name of the postprocessor
	377	* info_dict: The extracted info_dict
	378
	379	Progress hooks are guaranteed to be called at least twice
	380	(with status "started" and "finished") if the processing is successful.
	381	merge_output_format: "/" separated list of extensions to use when merging formats.
	382	final_ext: Expected final extension; used to detect when the file was
	383	already downloaded and converted
	384	fixup: Automatically correct known faults of the file.
	385	One of:
	386	- "never": do nothing
	387	- "warn": only emit a warning
	388	- "detect_or_warn": check whether we can do anything
	389	about it, warn otherwise (default)
	390	source_address: Client-side IP address to bind to.
	391	sleep_interval_requests: Number of seconds to sleep between requests
	392	during extraction
	393	sleep_interval: Number of seconds to sleep before each download when
	394	used alone or a lower bound of a range for randomized
	395	sleep before each download (minimum possible number
	396	of seconds to sleep) when used along with
	397	max_sleep_interval.
	398	max_sleep_interval:Upper bound of a range for randomized sleep before each
	399	download (maximum possible number of seconds to sleep).
	400	Must only be used along with sleep_interval.
	401	Actual sleep time will be a random float from range
	402	[sleep_interval; max_sleep_interval].
	403	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	404	listformats: Print an overview of available video formats and exit.
	405	list_thumbnails: Print a table of all thumbnails and exit.
	406	match_filter: A function that gets called for every video with the signature
	407	(info_dict, *, incomplete: bool) -> Optional[str]
	408	For backward compatibility with youtube-dl, the signature
	409	(info_dict) -> Optional[str] is also allowed.
	410	- If it returns a message, the video is ignored.
	411	- If it returns None, the video is downloaded.
	412	- If it returns utils.NO_DEFAULT, the user is interactively
	413	asked whether to download the video.
	414	match_filter_func in utils.py is one example for this.
	415	no_color: Do not emit color codes in output.
	416	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	417	HTTP header
	418	geo_bypass_country:
	419	Two-letter ISO 3166-2 country code that will be used for
	420	explicit geographic restriction bypassing via faking
	421	X-Forwarded-For HTTP header
	422	geo_bypass_ip_block:
	423	IP range in CIDR notation that will be used similarly to
	424	geo_bypass_country
	425	external_downloader: A dictionary of protocol keys and the executable of the
	426	external downloader to use for it. The allowed protocols
	427	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	428	Set the value to 'native' to use the native downloader
	429	compat_opts: Compatibility options. See "Differences in default behavior".
	430	The following options do not work when used through the API:
	431	filename, abort-on-error, multistreams, no-live-chat, format-sort
	432	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	433	Refer __init__.py for their implementation
	434	progress_template: Dictionary of templates for progress outputs.
	435	Allowed keys are 'download', 'postprocess',
	436	'download-title' (console title) and 'postprocess-title'.
	437	The template is mapped on a dictionary with keys 'progress' and 'info'
	438	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	439	as argument and returns the time to sleep in seconds.
	440	Allowed keys are 'http', 'fragment', 'file_access'
	441	download_ranges: A callback function that gets called for every video with
	442	the signature (info_dict, ydl) -> Iterable[Section].
	443	Only the returned sections will be downloaded.
	444	Each Section is a dict with the following keys:
	445	* start_time: Start time of the section in seconds
	446	* end_time: End time of the section in seconds
	447	* title: Section title (Optional)
	448	* index: Section number (Optional)
	449	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	450	noprogress: Do not print the progress bar
	451	live_from_start: Whether to download livestreams videos from the start
	452
	453	The following parameters are not used by YoutubeDL itself, they are used by
	454	the downloader (see yt_dlp/downloader/common.py):
	455	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	456	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	457	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	458	external_downloader_args, concurrent_fragment_downloads.
	459
	460	The following options are used by the post processors:
	461	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	462	to the binary or its containing directory.
	463	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	464	and a list of additional command-line arguments for the
	465	postprocessor/executable. The dict can also have "PP+EXE" keys
	466	which are used when the given exe is used by the given PP.
	467	Use 'default' as the name for arguments to passed to all PP
	468	For compatibility with youtube-dl, a single list of args
	469	can also be used
	470
	471	The following options are used by the extractors:
	472	extractor_retries: Number of times to retry for known errors
	473	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	474	hls_split_discontinuity: Split HLS playlists to different formats at
	475	discontinuities such as ad breaks (default: False)
	476	extractor_args: A dictionary of arguments to be passed to the extractors.
	477	See "EXTRACTOR ARGUMENTS" for details.
	478	E.g. {'youtube': {'skip': ['dash', 'hls']}}
	479	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	480
	481	The following options are deprecated and may be removed in the future:
	482
	483	force_generic_extractor: Force downloader to use the generic extractor
	484	- Use allowed_extractors = ['generic', 'default']
	485	playliststart: - Use playlist_items
	486	Playlist item to start at.
	487	playlistend: - Use playlist_items
	488	Playlist item to end at.
	489	playlistreverse: - Use playlist_items
	490	Download playlist items in reverse order.
	491	forceurl: - Use forceprint
	492	Force printing final URL.
	493	forcetitle: - Use forceprint
	494	Force printing title.
	495	forceid: - Use forceprint
	496	Force printing ID.
	497	forcethumbnail: - Use forceprint
	498	Force printing thumbnail URL.
	499	forcedescription: - Use forceprint
	500	Force printing description.

1

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

import urllib.request

24

from string import ascii_letters

25

26

from .cache import Cache

27

from .compat import compat_os_name, compat_shlex_quote

28

from .cookies import load_cookies

29

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

30

from .downloader.rtmp import rtmpdump_version

31

from .extractor import gen_extractor_classes, get_info_extractor

32

from .extractor.common import UnsupportedURLIE

33

from .extractor.openload import PhantomJSwrapper

34

from .minicurses import format_text

35

from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors

36

from .postprocessor import (

37

EmbedThumbnailPP,

38

FFmpegFixupDuplicateMoovPP,

39

FFmpegFixupDurationPP,

40

FFmpegFixupM3u8PP,

41

FFmpegFixupM4aPP,

42

FFmpegFixupStretchedPP,

43

FFmpegFixupTimestampPP,

44

FFmpegMergerPP,

45

FFmpegPostProcessor,

46

FFmpegVideoConvertorPP,

47

MoveFilesAfterDownloadPP,

48

get_postprocessor,

49

)

50

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

51

from .update import REPOSITORY, current_git_head, detect_variant

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

GeoRestrictedError,

HEADRequest,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

83

SameFileError,

84

UnavailableVideoError,

85

UserNotLive,

86

YoutubeDLCookieProcessor,

87

YoutubeDLHandler,

88

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

bug_reports_message,

date_from_str,

deprecation_warning,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

escapeHTML,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

is_path_like,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

orderedSet_from_options,

parse_filesize,

preferredencoding,

prepend_extension,

register_socks_protocols,

126

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .version import RELEASE_GIT_HEAD, VARIANT, __version__

152

153

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

161

actual video file and writing it to disk if the user has requested

162

it, among some other tasks. In most cases there should be one per

163

program. As, given a video URL, the downloader doesn't know how to

164

extract all the needed information, task that InfoExtractors do, it

165

has to pass the URL to one of them.

166

167

For this, YoutubeDL objects have a method that allows

168

InfoExtractors to be registered in a given order. When it is passed

169

a URL, the YoutubeDL object handles it to the first InfoExtractor it

170

finds that reports being able to handle it. The InfoExtractor extracts

171

all the information about the video or videos the URL refers to, and

172

YoutubeDL process the extracted information, possibly using a File

173

Downloader to download the video.

174

175

YoutubeDL objects accept a lot of parameters. In order not to saturate

176

the object constructor with arguments, it receives a dictionary of

177

options instead. These options are available through the params

178

attribute for the InfoExtractors to use. The YoutubeDL also

179

registers itself as the downloader in charge for the InfoExtractors

180

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

185

password: Password for authentication purposes.

186

videopassword: Password for accessing a video.

187

ap_mso: Adobe Pass multiple-system operator identifier.

188

ap_username: Multiple-system operator account username.

189

ap_password: Multiple-system operator account password.

190

usenetrc: Use netrc for authentication instead.

191

verbose: Print additional info to stdout.

192

quiet: Do not print messages to stdout.

193

no_warnings: Do not print out anything for warnings.

194

forceprint: A dict with keys WHEN mapped to a list of templates to

195

print to stdout. The allowed keys are video or any of the

196

items in utils.POSTPROCESS_WHEN.

197

For compatibility, a single list is also accepted

198

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

199

a list of tuples with (template, filename)

200

forcejson: Force printing info_dict as JSON.

201

dump_single_json: Force printing the info_dict of the whole playlist

202

(or video) as a single JSON line.

203

force_write_download_archive: Force writing download archive regardless

204

of 'skip_download' or 'simulate'.

205

simulate: Do not download the video files. If unset (or None),

206

simulate only if listsubtitles, listformats or list_thumbnails is used

207

format: Video format code. see "FORMAT SELECTION" for more details.

208

You can also pass a function. The function takes 'ctx' as

209

argument and returns the formats to download.

210

See "build_format_selector" for an implementation

211

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

212

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

213

extracting metadata even if the video is not actually

214

available for download (experimental)

215

format_sort: A list of fields by which to sort the video formats.

216

See "Sorting Formats" for more details.

217

format_sort_force: Force the given format_sort. see "Sorting Formats"

218

for more details.

219

prefer_free_formats: Whether to prefer video formats with free containers

220

over non-free ones of same quality.

221

allow_multiple_video_streams: Allow multiple video streams to be merged

222

into a single file

223

allow_multiple_audio_streams: Allow multiple audio streams to be merged

224

into a single file

225

check_formats Whether to test if the formats are downloadable.

226

Can be True (check all), False (check none),

227

'selected' (check selected formats),

228

or None (check only if requested by extractor)

229

paths: Dictionary of output paths. The allowed keys are 'home'

230

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

231

outtmpl: Dictionary of templates for output names. Allowed keys

232

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

233

For compatibility with youtube-dl, a single string can also be used

234

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

235

restrictfilenames: Do not allow "&" and spaces in file names

236

trim_file_name: Limit length of filename (extension excluded)

237

windowsfilenames: Force the filenames to be windows compatible

238

ignoreerrors: Do not stop on download/postprocessing errors.

239

Can be 'only_download' to ignore only download errors.

240

Default is 'only_download' for CLI, but False for API

241

skip_playlist_after_errors: Number of allowed failures until the rest of

242

the playlist is skipped

243

allowed_extractors: List of regexes to match against extractor names that are allowed

244

overwrites: Overwrite all video and metadata files if True,

245

overwrite only non-video files if None

246

and don't overwrite any file if False

247

For compatibility with youtube-dl,

248

"nooverwrites" may also be used instead

249

playlist_items: Specific indices of playlist to download.

250

playlistrandom: Download playlist items in random order.

251

lazy_playlist: Process playlist entries as they are received.

252

matchtitle: Download only matching titles.

253

rejecttitle: Reject downloads for matching titles.

254

logger: Log messages to a logging.Logger instance.

255

logtostderr: Print everything to stderr instead of stdout.

256

consoletitle: Display progress in console window's titlebar.

257

writedescription: Write the video description to a .description file

258

writeinfojson: Write the video description to a .info.json file

259

clean_infojson: Remove private fields from the infojson

260

getcomments: Extract video comments. This will not be written to disk

261

unless writeinfojson is also given

262

writeannotations: Write the video annotations to a .annotations.xml file

263

writethumbnail: Write the thumbnail image to a file

264

allow_playlist_files: Whether to write playlists' description, infojson etc

265

also to disk when using the 'write*' options

266

write_all_thumbnails: Write all thumbnail formats to files

267

writelink: Write an internet shortcut file, depending on the

268

current platform (.url/.webloc/.desktop)

269

writeurllink: Write a Windows internet shortcut file (.url)

270

writewebloclink: Write a macOS internet shortcut file (.webloc)

271

writedesktoplink: Write a Linux internet shortcut file (.desktop)

272

writesubtitles: Write the video subtitles to a file

273

writeautomaticsub: Write the automatically generated subtitles to a file

274

listsubtitles: Lists all available subtitles for the video

275

subtitlesformat: The format code for subtitles

276

subtitleslangs: List of languages of the subtitles to download (can be regex).

277

The list may contain "all" to refer to all the available

278

subtitles. The language can be prefixed with a "-" to

279

exclude it from the requested languages, e.g. ['all', '-live_chat']

280

keepvideo: Keep the video file after post-processing

281

daterange: A DateRange object, download only if the upload_date is in the range.

282

skip_download: Skip the actual download of the video file

283

cachedir: Location of the cache files in the filesystem.

284

False to disable filesystem cache.

285

noplaylist: Download single video instead of a playlist if in doubt.

286

age_limit: An integer representing the user's age in years.

287

Unsuitable videos for the given age are skipped.

288

min_views: An integer representing the minimum view count the video

289

must have in order to not be skipped.

290

Videos without view count information are always

291

downloaded. None for no limit.

292

max_views: An integer representing the maximum view count.

293

Videos that are more popular than that are not

294

downloaded.

295

Videos without view count information are always

296

downloaded. None for no limit.

297

download_archive: A set, or the name of a file where all downloads are recorded.

298

Videos already present in the file are not downloaded again.

299

break_on_existing: Stop the download process after attempting to download a

300

file that is in the archive.

301

break_on_reject: Stop the download process when encountering a video that

302

has been filtered out.

303

break_per_url: Whether break_on_reject and break_on_existing

304

should act on each input URL as opposed to for the entire queue

305

cookiefile: File name or text stream from where cookies should be read and dumped to

306

cookiesfrombrowser: A tuple containing the name of the browser, the profile

307

name/path from where cookies are loaded, the name of the keyring,

308

and the container name, e.g. ('chrome', ) or

309

('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')

310

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

311

support RFC 5746 secure renegotiation

312

nocheckcertificate: Do not verify SSL certificates

313

client_certificate: Path to client certificate file in PEM format. May include the private key

314

client_certificate_key: Path to private key file for client certificate

315

client_certificate_password: Password for client certificate private key, if encrypted.

316

If not provided and the key is encrypted, yt-dlp will ask interactively

317

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

318

(Only supported by some extractors)

319

http_headers: A dictionary of custom headers to be used for all requests

320

proxy: URL of the proxy server to use

321

geo_verification_proxy: URL of the proxy to use for IP address verification

322

on geo-restricted sites.

323

socket_timeout: Time to wait for unresponsive hosts, in seconds

324

bidi_workaround: Work around buggy terminals without bidirectional text

325

support, using fridibi

326

debug_printtraffic:Print out sent and received HTTP traffic

327

default_search: Prepend this string if an input url is not valid.

328

'auto' for elaborate guessing

329

encoding: Use this encoding instead of the system-specified.

330

extract_flat: Whether to resolve and process url_results further

331

* False: Always process (default)

332

* True: Never process

333

* 'in_playlist': Do not process inside playlist/multi_video

334

* 'discard': Always process, but don't return the result

335

from inside playlist/multi_video

336

* 'discard_in_playlist': Same as "discard", but only for

337

playlists (not multi_video)

338

wait_for_video: If given, wait for scheduled streams to become available.

339

The value should be a tuple containing the range

340

(min_secs, max_secs) to wait between retries

341

postprocessors: A list of dictionaries, each with an entry

342

* key: The name of the postprocessor. See

343

yt_dlp/postprocessor/__init__.py for a list.

344

* when: When to run the postprocessor. Allowed values are

345

the entries of utils.POSTPROCESS_WHEN

346

Assumed to be 'post_process' if not given

347

progress_hooks: A list of functions that get called on download

348

progress, with a dictionary with the entries

349

* status: One of "downloading", "error", or "finished".

350

Check this first and ignore unknown values.

351

* info_dict: The extracted info_dict

352

353

If status is one of "downloading", or "finished", the

354

following properties may also be present:

355

* filename: The final filename (always present)

356

* tmpfilename: The filename we're currently writing to

357

* downloaded_bytes: Bytes on disk

358

* total_bytes: Size of the whole file, None if unknown

359

* total_bytes_estimate: Guess of the eventual file size,

360

None if unavailable.

361

* elapsed: The number of seconds since download started.

362

* eta: The estimated time in seconds, None if unknown

363

* speed: The download speed in bytes/second, None if

364

unknown

365

* fragment_index: The counter of the currently

366

downloaded video fragment.

367

* fragment_count: The number of fragments (= individual

368

files that will be merged)

369

370

Progress hooks are guaranteed to be called at least once

371

(with status "finished") if the download is successful.

372

postprocessor_hooks: A list of functions that get called on postprocessing

373

progress, with a dictionary with the entries

374

* status: One of "started", "processing", or "finished".

375

Check this first and ignore unknown values.

376

* postprocessor: Name of the postprocessor

377

* info_dict: The extracted info_dict

378

379

Progress hooks are guaranteed to be called at least twice

380

(with status "started" and "finished") if the processing is successful.

381

merge_output_format: "/" separated list of extensions to use when merging formats.

382

final_ext: Expected final extension; used to detect when the file was

383

already downloaded and converted

384

fixup: Automatically correct known faults of the file.

385

One of:

386

- "never": do nothing

387

- "warn": only emit a warning

388

- "detect_or_warn": check whether we can do anything

389

about it, warn otherwise (default)

390

source_address: Client-side IP address to bind to.

391

sleep_interval_requests: Number of seconds to sleep between requests

392

during extraction

393

sleep_interval: Number of seconds to sleep before each download when

394

used alone or a lower bound of a range for randomized

395

sleep before each download (minimum possible number

396

of seconds to sleep) when used along with

397

max_sleep_interval.

398

max_sleep_interval:Upper bound of a range for randomized sleep before each

399

download (maximum possible number of seconds to sleep).

400

Must only be used along with sleep_interval.

401

Actual sleep time will be a random float from range

402

[sleep_interval; max_sleep_interval].

403

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

404

listformats: Print an overview of available video formats and exit.

405

list_thumbnails: Print a table of all thumbnails and exit.

406

match_filter: A function that gets called for every video with the signature

407

(info_dict, *, incomplete: bool) -> Optional[str]

408

For backward compatibility with youtube-dl, the signature

409

(info_dict) -> Optional[str] is also allowed.

410

- If it returns a message, the video is ignored.

411

- If it returns None, the video is downloaded.

412

- If it returns utils.NO_DEFAULT, the user is interactively

413

asked whether to download the video.

414

match_filter_func in utils.py is one example for this.

415

no_color: Do not emit color codes in output.

416

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

417

HTTP header

418

geo_bypass_country:

419

Two-letter ISO 3166-2 country code that will be used for

420

explicit geographic restriction bypassing via faking

421

X-Forwarded-For HTTP header

422

geo_bypass_ip_block:

423

IP range in CIDR notation that will be used similarly to

424

geo_bypass_country

425

external_downloader: A dictionary of protocol keys and the executable of the

426

external downloader to use for it. The allowed protocols

427

428

Set the value to 'native' to use the native downloader

429

compat_opts: Compatibility options. See "Differences in default behavior".

430

The following options do not work when used through the API:

431

filename, abort-on-error, multistreams, no-live-chat, format-sort

432

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

433

Refer __init__.py for their implementation

434

progress_template: Dictionary of templates for progress outputs.

435

Allowed keys are 'download', 'postprocess',

436

'download-title' (console title) and 'postprocess-title'.

437

The template is mapped on a dictionary with keys 'progress' and 'info'

438

retry_sleep_functions: Dictionary of functions that takes the number of attempts

439

as argument and returns the time to sleep in seconds.

440

Allowed keys are 'http', 'fragment', 'file_access'

441

download_ranges: A callback function that gets called for every video with

442

the signature (info_dict, ydl) -> Iterable[Section].

443

Only the returned sections will be downloaded.

444

Each Section is a dict with the following keys:

445

* start_time: Start time of the section in seconds

446

* end_time: End time of the section in seconds

447

* title: Section title (Optional)

448

* index: Section number (Optional)

449

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

450

noprogress: Do not print the progress bar

451

live_from_start: Whether to download livestreams videos from the start

452

453

The following parameters are not used by YoutubeDL itself, they are used by

454

the downloader (see yt_dlp/downloader/common.py):

455

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

456

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

457

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

458

external_downloader_args, concurrent_fragment_downloads.

459

460

The following options are used by the post processors:

461

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

462

to the binary or its containing directory.

463

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

464

and a list of additional command-line arguments for the

465

postprocessor/executable. The dict can also have "PP+EXE" keys

466

which are used when the given exe is used by the given PP.

467

Use 'default' as the name for arguments to passed to all PP

468

For compatibility with youtube-dl, a single list of args

469

can also be used

470

471

The following options are used by the extractors:

472

extractor_retries: Number of times to retry for known errors

473

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

474

hls_split_discontinuity: Split HLS playlists to different formats at

475

discontinuities such as ad breaks (default: False)

476

extractor_args: A dictionary of arguments to be passed to the extractors.

477

See "EXTRACTOR ARGUMENTS" for details.

478

E.g. {'youtube': {'skip': ['dash', 'hls']}}

479

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

480

481

The following options are deprecated and may be removed in the future:

482

483

force_generic_extractor: Force downloader to use the generic extractor

484

- Use allowed_extractors = ['generic', 'default']

485

playliststart: - Use playlist_items

486

Playlist item to start at.

487

playlistend: - Use playlist_items

488

Playlist item to end at.

489

playlistreverse: - Use playlist_items

490

Download playlist items in reverse order.

491

forceurl: - Use forceprint

492

Force printing final URL.

493

forcetitle: - Use forceprint

494

Force printing title.

495

forceid: - Use forceprint

496

Force printing ID.

497

forcethumbnail: - Use forceprint

498

Force printing thumbnail URL.

499

forcedescription: - Use forceprint

500

Force printing description.

501

forcefilename: - Use forceprint

502

Force printing final filename.

503

forceduration: - Use forceprint

504

Force printing duration.

505

allsubtitles: - Use subtitleslangs = ['all']

506

Downloads all the subtitles of the video

507

(requires writesubtitles or writeautomaticsub)

508

include_ads: - Doesn't work

509

Download ads as well

510

call_home: - Not implemented

511

Boolean, true iff we are allowed to contact the

512

yt-dlp servers for debugging.

513

post_hooks: - Register a custom postprocessor

514

A list of functions that get called as the final step

515

for each video file, after all postprocessors have been

516

called. The filename will be passed as the only argument.

517

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

518

Use the native HLS downloader instead of ffmpeg/avconv

519

if True, otherwise use ffmpeg/avconv if False, otherwise

520

use downloader suggested by extractor if None.

521

prefer_ffmpeg: - avconv support is deprecated

522

If False, use avconv instead of ffmpeg if both are available,

523

otherwise prefer ffmpeg.

524

youtube_include_dash_manifest: - Use extractor_args

525

If True (default), DASH manifests and related

526

data will be downloaded and processed by extractor.

527

You can reduce network I/O by disabling it if you don't

528

care about DASH. (only for youtube)

529

youtube_include_hls_manifest: - Use extractor_args

530

If True (default), HLS manifests and related

531

data will be downloaded and processed by extractor.

532

You can reduce network I/O by disabling it if you don't

533

care about HLS. (only for youtube)

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

538

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

539

'timestamp', 'release_timestamp',

540

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

541

'average_rating', 'comment_count', 'age_limit',

542

'start_time', 'end_time',

543

'chapter_number', 'season_number', 'episode_number',

544

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

549

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

550

'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

551

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',

552

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

553

'preference', 'language', 'language_preference', 'quality', 'source_preference',

554

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',

555

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

556

}

557

_format_selection_exts = {

558

'audio': set(MEDIA_EXTENSIONS.common_audio),

559

'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),

560

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

561

}

562

563

def __init__(self, params=None, auto_init=True):

564

"""Create a FileDownloader object with the given options.

565

@param auto_init Whether to load the default extractors and print header (if verbose).

566

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

573

self._pps = {k: [] for k in POSTPROCESS_WHEN}

574

self._printed_messages = set()

575

self._first_webpage_request = True

576

self._post_hooks = []

577

self._progress_hooks = []

578

self._postprocessor_hooks = []

579

self._download_retcode = 0

580

self._num_downloads = 0

581

self._num_videos = 0

582

self._playlist_level = 0

583

self._playlist_urls = set()

584

self.cache = Cache(self)

585

586

windows_enable_vt_mode()

587

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

588

self._out_files = Namespace(

589

out=stdout,

590

error=sys.stderr,

591

screen=sys.stderr if self.params.get('quiet') else stdout,

592

console=None if compat_os_name == 'nt' else next(

593

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

594

)

595

self._allow_colors = Namespace(**{

596

type_: not self.params.get('no_color') and supports_terminal_sequences(stream)

597

for type_, stream in self._out_files.items_ if type_ != 'console'

598

})

599

600

# The code is left like this to be reused for future deprecations

601

MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)

602

current_version = sys.version_info[:2]

603

if current_version < MIN_RECOMMENDED:

604

msg = ('Support for Python version %d.%d has been deprecated. '

605

'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'

606

'\n You will no longer receive updates on this version')

607

if current_version < MIN_SUPPORTED:

608

msg = 'Python version %d.%d is no longer supported'

609

self.deprecation_warning(

610

f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))

611

612

if self.params.get('allow_unplayable_formats'):

613

self.report_warning(

614

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

615

'This is a developer option intended for debugging. \n'

616

' If you experience any issues while using this option, '

617

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

618

619

def check_deprecated(param, option, suggestion):

620

if self.params.get(param) is not None:

621

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

626

if self.params.get('geo_verification_proxy') is None:

627

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

628

629

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

630

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

631

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

632

633

for msg in self.params.get('_warnings', []):

634

self.report_warning(msg)

635

for msg in self.params.get('_deprecation_warnings', []):

636

self.deprecated_feature(msg)

637

638

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

639

if 'list-formats' in self.params['compat_opts']:

640

self.params['listformats_table'] = False

641

642

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

643

# nooverwrites was unnecessarily changed to overwrites

644

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

645

# This ensures compatibility with both keys

646

self.params['overwrites'] = not self.params['nooverwrites']

647

elif self.params.get('overwrites') is None:

648

self.params.pop('overwrites', None)

649

else:

650

self.params['nooverwrites'] = not self.params['overwrites']

651

652

self.params.setdefault('forceprint', {})

653

self.params.setdefault('print_to_file', {})

654

655

# Compatibility with older syntax

656

if not isinstance(params['forceprint'], dict):

657

self.params['forceprint'] = {'video': params['forceprint']}

658

659

if self.params.get('bidi_workaround', False):

660

try:

661

import pty

662

master, slave = pty.openpty()

663

width = shutil.get_terminal_size().columns

664

width_args = [] if width is None else ['-w', str(width)]

665

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

666

try:

667

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

668

except OSError:

669

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

670

self._output_channel = os.fdopen(master, 'rb')

671

except OSError as ose:

672

if ose.errno == errno.ENOENT:

673

self.report_warning(

674

'Could not find fribidi executable, ignoring --bidi-workaround. '

675

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if auto_init:

if auto_init != 'no_verbose_header':

681

self.print_debug_header()

682

self.add_default_info_extractors()

683

684

if (sys.platform != 'win32'

685

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

686

and not self.params.get('restrictfilenames', False)):

687

# Unicode filesystem API will throw errors (#1474, #13027)

688

self.report_warning(

689

'Assuming --restrict-filenames since file system encoding '

690

'cannot encode all characters. '

691

'Set the LC_ALL environment variable to fix this.')

692

self.params['restrictfilenames'] = True

693

694

self._parse_outtmpl()

695

696

# Creating format selector here allows us to catch syntax errors before the extraction

697

self.format_selector = (

698

self.params.get('format') if self.params.get('format') in (None, '-')

699

else self.params['format'] if callable(self.params['format'])

700

else self.build_format_selector(self.params['format']))

701

702

# Set http_headers defaults according to std_headers

703

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

704

705

hooks = {

706

'post_hooks': self.add_post_hook,

707

'progress_hooks': self.add_progress_hook,

708

'postprocessor_hooks': self.add_postprocessor_hook,

709

}

710

for opt, fn in hooks.items():

711

for ph in self.params.get(opt, []):

712

fn(ph)

713

714

for pp_def_raw in self.params.get('postprocessors', []):

715

pp_def = dict(pp_def_raw)

716

when = pp_def.pop('when', 'post_process')

717

self.add_post_processor(

718

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

register_socks_protocols()

723

724

def preload_download_archive(fn):

725

"""Preload the archive, if any is specified"""

archive = set()

if fn is None:

return archive

elif not is_path_like(fn):

730

return fn

731

732

self.write_debug(f'Loading archive file {fn!r}')

733

try:

734

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

735

for line in archive_file:

736

archive.add(line.strip())

737

except OSError as ioe:

738

if ioe.errno != errno.ENOENT:

raise

return archive

self.archive = preload_download_archive(self.params.get('download_archive'))

743

744

def warn_if_short_id(self, argv):

745

# short YouTube ID starting with dash?

746

idxs = [

747

i for i, a in enumerate(argv)

748

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

753

+ ['--'] + [argv[i] for i in idxs]

754

)

755

self.report_warning(

756

'Long argument string detected. '

757

'Use -- to separate parameters and URLs, like this:\n%s' %

758

args_to_str(correct_argv))

759

760

def add_info_extractor(self, ie):

761

"""Add an InfoExtractor object to the end of the list."""

762

ie_key = ie.ie_key()

763

self._ies[ie_key] = ie

764

if not isinstance(ie, type):

765

self._ies_instances[ie_key] = ie

766

ie.set_downloader(self)

767

768

def get_info_extractor(self, ie_key):

769

"""

770

Get an instance of an IE with name ie_key, it will try to get one from

771

the _ies list, if there's no instance it will create a new one and add

772

it to the extractor list.

773

"""

774

ie = self._ies_instances.get(ie_key)

775

if ie is None:

776

ie = get_info_extractor(ie_key)()

777

self.add_info_extractor(ie)

778

return ie

779

780

def add_default_info_extractors(self):

781

"""

782

Add the InfoExtractors returned by gen_extractors to the end of the list

783

"""

784

all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}

785

all_ies['end'] = UnsupportedURLIE()

786

try:

787

ie_names = orderedSet_from_options(

788

self.params.get('allowed_extractors', ['default']), {

789

'all': list(all_ies),

790

'default': [name for name, ie in all_ies.items() if ie._ENABLED],

791

}, use_regex=True)

792

except re.error as e:

793

raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')

794

for name in ie_names:

795

self.add_info_extractor(all_ies[name])

796

self.write_debug(f'Loaded {len(ie_names)} extractors')

797

798

def add_post_processor(self, pp, when='post_process'):

799

"""Add a PostProcessor object to the end of the chain."""

800

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

801

self._pps[when].append(pp)

802

pp.set_downloader(self)

803

804

def add_post_hook(self, ph):

805

"""Add the post hook"""

806

self._post_hooks.append(ph)

807

808

def add_progress_hook(self, ph):

809

"""Add the download progress hook"""

810

self._progress_hooks.append(ph)

811

812

def add_postprocessor_hook(self, ph):

813

"""Add the postprocessing progress hook"""

814

self._postprocessor_hooks.append(ph)

815

for pps in self._pps.values():

816

for pp in pps:

817

pp.add_progress_hook(ph)

818

819

def _bidi_workaround(self, message):

820

if not hasattr(self, '_output_channel'):

821

return message

822

823

assert hasattr(self, '_output_process')

824

assert isinstance(message, str)

825

line_count = message.count('\n') + 1

826

self._output_process.stdin.write((message + '\n').encode())

827

self._output_process.stdin.flush()

828

res = ''.join(self._output_channel.readline().decode()

829

for _ in range(line_count))

830

return res[:-len('\n')]

831

832

def _write_string(self, message, out=None, only_once=False):

833

if only_once:

834

if message in self._printed_messages:

835

return

836

self._printed_messages.add(message)

837

write_string(message, out=out, encoding=self.params.get('encoding'))

838

839

def to_stdout(self, message, skip_eol=False, quiet=None):

840

"""Print message to stdout"""

841

if quiet is not None:

842

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '

843

'Use "YoutubeDL.to_screen" instead')

844

if skip_eol is not False:

845

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '

846

'Use "YoutubeDL.to_screen" instead')

847

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

848

849

def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):

850

"""Print message to screen if not in quiet mode"""

851

if self.params.get('logger'):

852

self.params['logger'].debug(message)

853

return

854

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

855

return

856

self._write_string(

857

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

858

self._out_files.screen, only_once=only_once)

859

860

def to_stderr(self, message, only_once=False):

861

"""Print message to stderr"""

862

assert isinstance(message, str)

863

if self.params.get('logger'):

864

self.params['logger'].error(message)

865

else:

866

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

867

868

def _send_console_code(self, code):

869

if compat_os_name == 'nt' or not self._out_files.console:

870

return

871

self._write_string(code, self._out_files.console)

872

873

def to_console_title(self, message):

874

if not self.params.get('consoletitle', False):

875

return

876

message = remove_terminal_sequences(message)

877

if compat_os_name == 'nt':

878

if ctypes.windll.kernel32.GetConsoleWindow():

879

# c_wchar_p() might not be necessary if `message` is

880

# already of type unicode()

881

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

882

else:

883

self._send_console_code(f'\033]0;{message}\007')

884

885

def save_console_title(self):

886

if not self.params.get('consoletitle') or self.params.get('simulate'):

887

return

888

self._send_console_code('\033[22;0t') # Save the title on stack

889

890

def restore_console_title(self):

891

if not self.params.get('consoletitle') or self.params.get('simulate'):

892

return

893

self._send_console_code('\033[23;0t') # Restore the title from stack

894

895

def __enter__(self):

896

self.save_console_title()

897

return self

898

899

def __exit__(self, *args):

900

self.restore_console_title()

901

902

if self.params.get('cookiefile') is not None:

903

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

904

905

def trouble(self, message=None, tb=None, is_error=True):

906

"""Determine action to take when a download problem appears.

907

908

Depending on if the downloader has been configured to ignore

909

download errors or not, this method may throw an exception or

910

not when errors are found, after printing the message.

911

912

@param tb If given, is additional traceback information

913

@param is_error Whether to raise error according to ignorerrors

914

"""

915

if message is not None:

916

self.to_stderr(message)

917

if self.params.get('verbose'):

918

if tb is None:

919

if sys.exc_info()[0]: # if .trouble has been called from an except block

920

tb = ''

921

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

922

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

923

tb += encode_compat_str(traceback.format_exc())

924

else:

925

tb_data = traceback.format_list(traceback.extract_stack())

926

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

932

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

933

exc_info = sys.exc_info()[1].exc_info

934

else:

935

exc_info = sys.exc_info()

936

raise DownloadError(message, exc_info)

937

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

WARNING='yellow',

SUPPRESS='light black',

948

)

949

950

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

955

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

956

text = text.encode(encoding, 'ignore').decode(encoding)

957

if fallback is not None and text != original_text:

958

text = fallback

959

return format_text(text, f) if allow_colors else text if fallback is None else fallback

960

961

def _format_out(self, *args, **kwargs):

962

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

963

964

def _format_screen(self, *args, **kwargs):

965

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

966

967

def _format_err(self, *args, **kwargs):

968

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

969

970

def report_warning(self, message, only_once=False):

971

'''

972

Print the message to stderr, it will be prefixed with 'WARNING:'

973

If stderr is a tty file the 'WARNING:' will be colored

974

'''

975

if self.params.get('logger') is not None:

976

self.params['logger'].warning(message)

977

else:

978

if self.params.get('no_warnings'):

979

return

980

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

981

982

def deprecation_warning(self, message, *, stacklevel=0):

983

deprecation_warning(

984

message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)

985

986

def deprecated_feature(self, message):

987

if self.params.get('logger') is not None:

988

self.params['logger'].warning(f'Deprecated Feature: {message}')

989

self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)

990

991

def report_error(self, message, *args, **kwargs):

992

'''

993

Do the same as trouble, but prefixes the message with 'ERROR:', colored

994

in red if stderr is a tty file.

995

'''

996

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

997

998

def write_debug(self, message, only_once=False):

999

'''Log debug message or Print message to stderr'''

1000

if not self.params.get('verbose', False):

1001

return

1002

message = f'[debug] {message}'

1003

if self.params.get('logger'):

1004

self.params['logger'].debug(message)

1005

else:

1006

self.to_stderr(message, only_once)

1007

1008

def report_file_already_downloaded(self, file_name):

1009

"""Report file has already been fully downloaded."""

1010

try:

1011

self.to_screen('[download] %s has already been downloaded' % file_name)

1012

except UnicodeEncodeError:

1013

self.to_screen('[download] The file has already been downloaded')

1014

1015

def report_file_delete(self, file_name):

1016

"""Report that existing file will be deleted."""

1017

try:

1018

self.to_screen('Deleting existing file %s' % file_name)

1019

except UnicodeEncodeError:

1020

self.to_screen('Deleting existing file')

1021

1022

def raise_no_formats(self, info, forced=False, *, msg=None):

1023

has_drm = info.get('_has_drm')

1024

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1025

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1026

if forced or not ignored:

1027

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1028

expected=has_drm or ignored or expected)

1029

else:

1030

self.report_warning(msg)

1031

1032

def parse_outtmpl(self):

1033

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1034

self._parse_outtmpl()

1035

return self.params['outtmpl']

1036

1037

def _parse_outtmpl(self):

1038

sanitize = IDENTITY

1039

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1040

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1041

1042

outtmpl = self.params.setdefault('outtmpl', {})

1043

if not isinstance(outtmpl, dict):

1044

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1045

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1046

1047

def get_output_path(self, dir_type='', filename=None):

1048

paths = self.params.get('paths', {})

1049

assert isinstance(paths, dict), '"paths" parameter must be a dictionary'

1050

path = os.path.join(

1051

expand_path(paths.get('home', '').strip()),

1052

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1053

filename or '')

1054

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1055

1056

@staticmethod

1057

def _outtmpl_expandpath(outtmpl):

1058

# expand_path translates '%%' into '%' and '$$' into '$'

1059

# correspondingly that is not what we want since we need to keep

1060

# '%%' intact for template dict substitution step. Working around

1061

# with boundary-alike separator hack.

1062

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

1063

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1064

1065

# outtmpl should be expand_path'ed before template dict substitution

1066

# because meta fields may contain env variables we don't want to

1067

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1068

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1069

return expand_path(outtmpl).replace(sep, '')

1070

1071

@staticmethod

1072

def escape_outtmpl(outtmpl):

1073

''' Escape any remaining strings like %s, %abc% etc. '''

1074

return re.sub(

1075

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1076

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1081

''' @return None or Exception object '''

1082

outtmpl = re.sub(

1083

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1084

lambda mobj: f'{mobj.group(0)[:-1]}s',

1085

cls._outtmpl_expandpath(outtmpl))

1086

try:

1087

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1088

return None

1089

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1094

info_dict = dict(info_dict)

1095

info_dict.pop('__postprocessors', None)

1096

info_dict.pop('__pending_error', None)

1097

return info_dict

1098

1099

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1100

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1101

@param sanitize Whether to sanitize the output as a filename.

1102

For backward compatibility, a function can also be passed

1103

"""

1104

1105

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1106

1107

info_dict = self._copy_infodict(info_dict)

1108

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1109

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1110

if info_dict.get('duration', None) is not None

1111

else None)

1112

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1113

info_dict['video_autonumber'] = self._num_videos

1114

if info_dict.get('resolution') is None:

1115

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1116

1117

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1118

# of %(field)s to %(field)0Nd for backward compatibility

1119

field_size_compat_map = {

1120

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1121

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1122

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1132

# where keys (except first) can be string, int, slice or "{field, ...}"

1133

FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}

1134

FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {

1135

'inner': FIELD_INNER_RE,

1136

'field': rf'\w*(?:\.{FIELD_INNER_RE})*'

1137

}

1138

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1139

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1140

INTERNAL_FORMAT_RE = re.compile(rf'''(?x)

1141

(?P<negate>-)?

1142

(?P<fields>{FIELD_RE})

1143

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1144

(?:>(?P<strf_format>.+?))?

1145

(?P<remaining>

1146

(?P<alternate>(?<!\\),[^|&)]+)?

1147

(?:&(?P<replacement>.*?))?

1148

(?:\|(?P<default>.*?))?

1149

)$''')

1150

1151

def _traverse_infodict(fields):

1152

fields = [f for x in re.split(r'\.({.+?})\.?', fields)

1153

for f in ([x] if x.startswith('{') else x.split('.'))]

1154

for i in (0, -1):

1155

if fields and not fields[i]:

1156

fields.pop(i)

1157

1158

for i, f in enumerate(fields):

1159

if not f.startswith('{'):

1160

continue

1161

assert f.endswith('}'), f'No closing brace for {f} in {fields}'

1162

fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}

1163

1164

return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)

1165

1166

def get_value(mdict):

1167

# Object traversal

1168

value = _traverse_infodict(mdict['fields'])

1169

# Negative

1170

if mdict['negate']:

1171

value = float_or_none(value)

1172

if value is not None:

1173

value *= -1

1174

# Do maths

1175

offset_key = mdict['maths']

1176

if offset_key:

1177

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1182

offset_key).group(0)

1183

offset_key = offset_key[len(item):]

1184

if operator is None:

1185

operator = MATH_FUNCTIONS[item]

1186

continue

1187

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1188

offset = float_or_none(item)

1189

if offset is None:

1190

offset = float_or_none(_traverse_infodict(item))

1191

try:

1192

value = operator(value, multiplier * offset)

1193

except (TypeError, ZeroDivisionError):

1194

return None

1195

operator = None

1196

# Datetime formatting

1197

if mdict['strf_format']:

1198

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1199

1200

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1201

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1206

1207

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1208

return sanitize_filename(str(value), restricted=restricted, is_id=(

1209

bool(re.search(r'(^|[_.])id(\.|$)', key))

1210

if 'filename-sanitization' in self.params['compat_opts']

1211

else NO_DEFAULT))

1212

1213

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1214

sanitize = bool(sanitize)

1215

1216

def _dumpjson_default(obj):

1217

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1222

if not outer_mobj.group('has_key'):

1223

return outer_mobj.group(0)

1224

key = outer_mobj.group('key')

1225

mobj = re.match(INTERNAL_FORMAT_RE, key)

1226

initial_field = mobj.group('fields') if mobj else ''

1227

value, replacement, default = None, None, na

1228

while mobj:

1229

mobj = mobj.groupdict()

1230

default = mobj['default'] if mobj['default'] is not None else default

1231

value = get_value(mobj)

1232

replacement = mobj['replacement']

1233

if value is None and mobj['alternate']:

1234

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1239

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1240

fmt = f'0{field_size_compat_map[key]:d}d'

1241

1242

value = default if value is None else value if replacement is None else replacement

1243

1244

flags = outer_mobj.group('conversion') or ''

1245

str_fmt = f'{fmt[:-1]}s'

1246

if fmt[-1] == 'l': # list

1247

delim = '\n' if '#' in flags else ', '

1248

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1249

elif fmt[-1] == 'j': # json

1250

value, fmt = json.dumps(

1251

value, default=_dumpjson_default,

1252

indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt

1253

elif fmt[-1] == 'h': # html

1254

value, fmt = escapeHTML(str(value)), str_fmt

1255

elif fmt[-1] == 'q': # quoted

1256

value = map(str, variadic(value) if '#' in flags else [value])

1257

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1258

elif fmt[-1] == 'B': # bytes

1259

value = f'%{str_fmt}'.encode() % str(value).encode()

1260

value, fmt = value.decode('utf-8', 'ignore'), 's'

1261

elif fmt[-1] == 'U': # unicode normalized

1262

value, fmt = unicodedata.normalize(

1263

# "+" = compatibility equivalence, "#" = NFD

1264

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1265

value), str_fmt

1266

elif fmt[-1] == 'D': # decimal suffix

1267

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1268

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1269

factor=1024 if '#' in flags else 1000)

1270

elif fmt[-1] == 'S': # filename sanitization

1271

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1272

elif fmt[-1] == 'c':

1273

if value:

1274

value = str(value)[0]

1275

else:

1276

fmt = str_fmt

1277

elif fmt[-1] not in 'rs': # numeric

1278

value = float_or_none(value)

1279

if value is None:

1280

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1285

# So we convert it to repr first

1286

value, fmt = repr(value), str_fmt

1287

if fmt[-1] in 'csr':

1288

value = sanitizer(initial_field, value)

1289

1290

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1291

TMPL_DICT[key] = value

1292

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1293

1294

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1295

1296

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1297

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1298

return self.escape_outtmpl(outtmpl) % info_dict

1299

1300

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1301

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1302

if outtmpl is None:

1303

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1304

try:

1305

outtmpl = self._outtmpl_expandpath(outtmpl)

1306

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1311

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1312

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1313

filename = replace_extension(filename, ext, final_ext)

1314

elif tmpl_type:

1315

force_ext = OUTTMPL_TYPES[tmpl_type]

1316

if force_ext:

1317

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1318

1319

# https://github.com/blackjack4494/youtube-dlc/issues/85

1320

trim_file_name = self.params.get('trim_file_name', False)

1321

if trim_file_name:

1322

no_ext, *ext = filename.rsplit('.', 2)

1323

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1324

1325

return filename

1326

except ValueError as err:

1327

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1328

return None

1329

1330

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1331

"""Generate the output filename"""

1332

if outtmpl:

1333

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1334

dir_type = None

1335

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1336

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1341

pass

1342

elif filename == '-':

1343

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1344

elif os.path.isabs(filename):

1345

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1346

if filename == '-' or not filename:

1347

return filename

1348

1349

return self.get_output_path(dir_type, filename)

1350

1351

def _match_entry(self, info_dict, incomplete=False, silent=False):

1352

""" Returns None if the file should be downloaded """

1353

1354

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1355

1356

def check_filter():

1357

if 'title' in info_dict:

1358

# This can happen when we're just evaluating the playlist

1359

title = info_dict['title']

1360

matchtitle = self.params.get('matchtitle', False)

1361

if matchtitle:

1362

if not re.search(matchtitle, title, re.IGNORECASE):

1363

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1364

rejecttitle = self.params.get('rejecttitle', False)

1365

if rejecttitle:

1366

if re.search(rejecttitle, title, re.IGNORECASE):

1367

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1368

date = info_dict.get('upload_date')

1369

if date is not None:

1370

dateRange = self.params.get('daterange', DateRange())

1371

if date not in dateRange:

1372

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1373

view_count = info_dict.get('view_count')

1374

if view_count is not None:

1375

min_views = self.params.get('min_views')

1376

if min_views is not None and view_count < min_views:

1377

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1378

max_views = self.params.get('max_views')

1379

if max_views is not None and view_count > max_views:

1380

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1381

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1382

return 'Skipping "%s" because it is age restricted' % video_title

1383

1384

match_filter = self.params.get('match_filter')

1385

if match_filter is not None:

1386

try:

1387

ret = match_filter(info_dict, incomplete=incomplete)

1388

except TypeError:

1389

# For backward compatibility

1390

ret = None if incomplete else match_filter(info_dict)

1391

if ret is NO_DEFAULT:

1392

while True:

1393

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1394

reply = input(self._format_screen(

1395

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1396

if reply in {'y', ''}:

1397

return None

1398

elif reply == 'n':

1399

return f'Skipping {video_title}'

1400

elif ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1405

reason = '%s has already been recorded in the archive' % video_title

1406

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1407

else:

1408

reason = check_filter()

1409

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1410

if reason is not None:

1411

if not silent:

1412

self.to_screen('[download] ' + reason)

1413

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1419

'''Set the keys from extra_info in info dict if they are missing'''

1420

for key, value in extra_info.items():

1421

info_dict.setdefault(key, value)

1422

1423

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1424

process=True, force_generic_extractor=False):

1425

"""

1426

Extract and return the information dictionary of the URL

1427

1428

Arguments:

1429

@param url URL to extract

1430

1431

Keyword arguments:

1432

@param download Whether to download videos

1433

@param process Whether to resolve all unresolved references (URLs, playlist items).

1434

Must be True for download to work

1435

@param ie_key Use only the extractor with this key

1436

1437

@param extra_info Dictionary containing the extra values to add to the info (For internal use only)

1438

@force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')

1439

"""

1440

1441

if extra_info is None:

1442

extra_info = {}

1443

1444

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}

else:

ies = self._ies

for key, ie in ies.items():

1453

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1458

'and will probably not work.')

1459

1460

temp_id = ie.get_temp_id(url)

1461

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):

1462

self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')

1463

if self.params.get('break_on_existing', False):

1464

raise ExistingVideoReached()

1465

break

1466

return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)

1467

else:

1468

extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

1469

self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',

1470

tb=False if extractors_restricted else None)

1471

1472

def _handle_extraction_exceptions(func):

1473

@functools.wraps(func)

1474

def wrapper(self, *args, **kwargs):

1475

while True:

1476

try:

1477

return func(self, *args, **kwargs)

1478

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1479

raise

1480

except ReExtractInfo as e:

1481

if e.expected:

1482

self.to_screen(f'{e}; Re-extracting data')

1483

else:

1484

self.to_stderr('\r')

1485

self.report_warning(f'{e}; Re-extracting data')

1486

continue

1487

except GeoRestrictedError as e:

1488

msg = e.msg

1489

if e.countries:

1490

msg += '\nThis video is available in %s.' % ', '.join(

1491

map(ISO3166Utils.short2full, e.countries))

1492

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1493

self.report_error(msg)

1494

except ExtractorError as e: # An error we somewhat expected

1495

self.report_error(str(e), e.format_traceback())

1496

except Exception as e:

1497

if self.params.get('ignoreerrors'):

1498

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1505

if (not self.params.get('wait_for_video')

1506

or ie_result.get('_type', 'video') != 'video'

1507

or ie_result.get('formats') or ie_result.get('url')):

1508

return

1509

1510

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1516

if not self.params.get('noprogress'):

1517

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1518

elif last_msg:

1519

return

1520

self.to_screen(full_msg, skip_eol=True)

1521

last_msg = msg

1522

1523

min_wait, max_wait = self.params.get('wait_for_video')

1524

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1525

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1526

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1527

self.report_warning('Release time of video is not known')

1528

elif ie_result and (diff or 0) <= 0:

1529

self.report_warning('Video should already be available according to extracted info')

1530

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1531

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1532

1533

wait_till = time.time() + diff

1534

try:

1535

while True:

1536

diff = wait_till - time.time()

1537

if diff <= 0:

1538

progress('')

1539

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1540

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1541

time.sleep(1)

1542

except KeyboardInterrupt:

1543

progress('')

1544

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1545

except BaseException as e:

1546

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@_handle_extraction_exceptions

1551

def __extract_info(self, url, ie, download, extra_info, process):

1552

try:

1553

ie_result = ie.extract(url)

1554

except UserNotLive as e:

1555

if process:

1556

if self.params.get('wait_for_video'):

1557

self.report_warning(e)

1558

self._wait_for_video()

1559

raise

1560

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1561

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1562

return

1563

if isinstance(ie_result, list):

1564

# Backwards compatibility: old IE result format

1565

ie_result = {

1566

'_type': 'compat_list',

1567

'entries': ie_result,

1568

}

1569

if extra_info.get('original_url'):

1570

ie_result.setdefault('original_url', extra_info['original_url'])

1571

self.add_default_extra_info(ie_result, ie, url)

1572

if process:

1573

self._wait_for_video(ie_result)

1574

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1579

if url is not None:

1580

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1585

if webpage_url:

1586

self.add_extra_info(ie_result, {

1587

'webpage_url_basename': url_basename(webpage_url),

1588

'webpage_url_domain': get_domain(webpage_url),

1589

})

1590

if ie is not None:

1591

self.add_extra_info(ie_result, {

1592

'extractor': ie.IE_NAME,

1593

'extractor_key': ie.ie_key(),

1594

})

1595

1596

def process_ie_result(self, ie_result, download=True, extra_info=None):

1597

"""

1598

Take the result of the ie(may be modified) and resolve all unresolved

1599

references (URLs, playlist items).

1600

1601

It will also download the videos if 'download'.

1602

Returns the resolved ie_result.

1603

"""

1604

if extra_info is None:

1605

extra_info = {}

1606

result_type = ie_result.get('_type', 'video')

1607

1608

if result_type in ('url', 'url_transparent'):

1609

ie_result['url'] = sanitize_url(

1610

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1611

if ie_result.get('original_url'):

1612

extra_info.setdefault('original_url', ie_result['original_url'])

1613

1614

extract_flat = self.params.get('extract_flat', False)

1615

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1616

or extract_flat is True):

1617

info_copy = ie_result.copy()

1618

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1619

if ie and not ie_result.get('id'):

1620

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1621

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1622

self.add_extra_info(info_copy, extra_info)

1623

info_copy, _ = self.pre_process(info_copy)

1624

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1625

self._raise_pending_errors(info_copy)

1626

if self.params.get('force_write_download_archive', False):

1627

self.record_download_archive(info_copy)

1628

return ie_result

1629

1630

if result_type == 'video':

1631

self.add_extra_info(ie_result, extra_info)

1632

ie_result = self.process_video_result(ie_result, download=download)

1633

self._raise_pending_errors(ie_result)

1634

additional_urls = (ie_result or {}).get('additional_urls')

1635

if additional_urls:

1636

# TODO: Improve MetadataParserPP to allow setting a list

1637

if isinstance(additional_urls, str):

1638

additional_urls = [additional_urls]

1639

self.to_screen(

1640

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1641

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1642

ie_result['additional_entries'] = [

1643

self.extract_info(

1644

url, download, extra_info=extra_info,

1645

force_generic_extractor=self.params.get('force_generic_extractor'))

1646

for url in additional_urls

1647

]

1648

return ie_result

1649

elif result_type == 'url':

1650

# We have to add extra_info to the results because it may be

1651

# contained in a playlist

1652

return self.extract_info(

1653

ie_result['url'], download,

1654

ie_key=ie_result.get('ie_key'),

1655

extra_info=extra_info)

1656

elif result_type == 'url_transparent':

1657

# Use the information from the embedding page

1658

info = self.extract_info(

1659

ie_result['url'], ie_key=ie_result.get('ie_key'),

1660

extra_info=extra_info, download=False, process=False)

1661

1662

# extract_info may return None when ignoreerrors is enabled and

1663

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1669

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1670

# For video clips, the id etc of the clip extractor should be used

1671

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1672

1673

new_result = info.copy()

1674

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1675

1676

# Extracted info may not be a video result (i.e.

1677

# info.get('_type', 'video') != video) but rather an url or

1678

# url_transparent. In such cases outer metadata (from ie_result)

1679

# should be propagated to inner one (info). For this to happen

1680

# _type of info should be overridden with url_transparent. This

1681

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1682

if new_result.get('_type') == 'url':

1683

new_result['_type'] = 'url_transparent'

1684

1685

return self.process_ie_result(

1686

new_result, download=download, extra_info=extra_info)

1687

elif result_type in ('playlist', 'multi_video'):

1688

# Protect from infinite recursion due to recursively nested playlists

1689

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1690

webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url

1691

if webpage_url and webpage_url in self._playlist_urls:

1692

self.to_screen(

1693

'[download] Skipping already downloaded playlist: %s'

1694

% ie_result.get('title') or ie_result.get('id'))

1695

return

1696

1697

self._playlist_level += 1

1698

self._playlist_urls.add(webpage_url)

1699

self._fill_common_fields(ie_result, False)

1700

self._sanitize_thumbnails(ie_result)

1701

try:

1702

return self.__process_playlist(ie_result, download)

1703

finally:

1704

self._playlist_level -= 1

1705

if not self._playlist_level:

1706

self._playlist_urls.clear()

1707

elif result_type == 'compat_list':

1708

self.report_warning(

1709

'Extractor %s returned a compat_list result. '

1710

'It needs to be updated.' % ie_result.get('extractor'))

1711

1712

def _fixup(r):

1713

self.add_extra_info(r, {

1714

'extractor': ie_result['extractor'],

1715

'webpage_url': ie_result['webpage_url'],

1716

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1717

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1718

'extractor_key': ie_result['extractor_key'],

1719

})

1720

return r

1721

ie_result['entries'] = [

1722

self.process_ie_result(_fixup(r), download, extra_info)

1723

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1728

1729

def _ensure_dir_exists(self, path):

1730

return make_dir(path, self.report_error)

1731

1732

@staticmethod

1733

def _playlist_infodict(ie_result, strict=False, **kwargs):

1734

info = {

1735

'playlist_count': ie_result.get('playlist_count'),

1736

'playlist': ie_result.get('title') or ie_result.get('id'),

1737

'playlist_id': ie_result.get('id'),

1738

'playlist_title': ie_result.get('title'),

1739

'playlist_uploader': ie_result.get('uploader'),

1740

'playlist_uploader_id': ie_result.get('uploader_id'),

**kwargs,

}

if strict:

return info

if ie_result.get('webpage_url'):

1746

info.update({

1747

'webpage_url': ie_result['webpage_url'],

1748

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1749

'webpage_url_domain': get_domain(ie_result['webpage_url']),

})

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),

1755

'extractor': ie_result['extractor'],

1756

'extractor_key': ie_result['extractor_key'],

1757

}

1758

1759

def __process_playlist(self, ie_result, download):

1760

"""Process each entry in the playlist"""

1761

assert ie_result['_type'] in ('playlist', 'multi_video')

1762

1763

common_info = self._playlist_infodict(ie_result, strict=True)

1764

title = common_info.get('playlist') or '<Untitled>'

1765

if self._match_entry(common_info, incomplete=True) is not None:

1766

return

1767

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1768

1769

all_entries = PlaylistEntries(self, ie_result)

1770

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1771

1772

lazy = self.params.get('lazy_playlist')

1773

if lazy:

1774

resolved_entries, n_entries = [], 'N/A'

1775

ie_result['requested_entries'], ie_result['entries'] = None, None

1776

else:

1777

entries = resolved_entries = list(entries)

1778

n_entries = len(resolved_entries)

1779

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1780

if not ie_result.get('playlist_count'):

1781

# Better to do this after potentially exhausting entries

1782

ie_result['playlist_count'] = all_entries.get_full_count()

1783

1784

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1785

ie_copy = collections.ChainMap(ie_result, extra)

1786

1787

_infojson_written = False

1788

write_playlist_files = self.params.get('allow_playlist_files', True)

1789

if write_playlist_files and self.params.get('list_thumbnails'):

1790

self.list_thumbnails(ie_result)

1791

if write_playlist_files and not self.params.get('simulate'):

1792

_infojson_written = self._write_info_json(

1793

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1794

if _infojson_written is None:

1795

return

1796

if self._write_description('playlist', ie_result,

1797

self.prepare_filename(ie_copy, 'pl_description')) is None:

1798

return

1799

# TODO: This should be passed to ThumbnailsConvertor if necessary

1800

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1801

1802

if lazy:

1803

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

1804

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

1805

elif self.params.get('playlistreverse'):

1806

entries.reverse()

1807

elif self.params.get('playlistrandom'):

1808

random.shuffle(entries)

1809

1810

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'

1811

f'{format_field(ie_result, "playlist_count", " of %s")}')

1812

1813

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

1814

if self.params.get('extract_flat') == 'discard_in_playlist':

1815

keep_resolved_entries = ie_result['_type'] != 'playlist'

1816

if keep_resolved_entries:

1817

self.write_debug('The information of all playlist entries will be held in memory')

1818

1819

failures = 0

1820

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1821

for i, (playlist_index, entry) in enumerate(entries):

1822

if lazy:

1823

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

1828

if not lazy and 'playlist-index' in self.params.get('compat_opts', []):

1829

playlist_index = ie_result['requested_entries'][i]

1830

1831

entry_copy = collections.ChainMap(entry, {

1832

**common_info,

1833

'n_entries': int_or_none(n_entries),

1834

'playlist_index': playlist_index,

1835

'playlist_autonumber': i + 1,

1836

})

1837

1838

if self._match_entry(entry_copy, incomplete=True) is not None:

1839

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

1840

resolved_entries[i] = (playlist_index, NO_DEFAULT)

1841

continue

1842

1843

self.to_screen('[download] Downloading video %s of %s' % (

1844

self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1845

1846

extra.update({

1847

'playlist_index': playlist_index,

1848

'playlist_autonumber': i + 1,

1849

})

1850

entry_result = self.__process_iterable_entry(entry, download, extra)

1851

if not entry_result:

1852

failures += 1

1853

if failures >= max_failures:

1854

self.report_error(

1855

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

1856

break

1857

if keep_resolved_entries:

1858

resolved_entries[i] = (playlist_index, entry_result)

1859

1860

# Update with processed data

1861

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

1862

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

1863

1864

# Write the updated info to json

1865

if _infojson_written is True and self._write_info_json(

1866

'updated playlist', ie_result,

1867

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1868

return

1869

1870

ie_result = self.run_all_pps('playlist', ie_result)

1871

self.to_screen(f'[download] Finished downloading playlist: {title}')

1872

return ie_result

1873

1874

@_handle_extraction_exceptions

1875

def __process_iterable_entry(self, entry, download, extra_info):

1876

return self.process_ie_result(

1877

entry, download=download, extra_info=extra_info)

1878

1879

def _build_format_filter(self, filter_spec):

1880

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1891

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1892

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1893

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1894

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1895

m = operator_rex.fullmatch(filter_spec)

1896

if m:

1897

try:

1898

comparison_value = int(m.group('value'))

1899

except ValueError:

1900

comparison_value = parse_filesize(m.group('value'))

1901

if comparison_value is None:

1902

comparison_value = parse_filesize(m.group('value') + 'B')

1903

if comparison_value is None:

1904

raise ValueError(

1905

'Invalid value %r in format specification %r' % (

1906

m.group('value'), filter_spec))

1907

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1913

'$=': lambda attr, value: attr.endswith(value),

1914

'*=': lambda attr, value: value in attr,

1915

'~=': lambda attr, value: value.search(attr) is not None

1916

}

1917

str_operator_rex = re.compile(r'''(?x)\s*

1918

(?P<key>[a-zA-Z0-9._-]+)\s*

1919

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1920

(?P<quote>["'])?

1921

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1922

(?(quote)(?P=quote))\s*

1923

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1924

m = str_operator_rex.fullmatch(filter_spec)

1925

if m:

1926

if m.group('op') == '~=':

1927

comparison_value = re.compile(m.group('value'))

1928

else:

1929

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1930

str_op = STR_OPERATORS[m.group('op')]

1931

if m.group('negation'):

1932

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1938

1939

def _filter(f):

1940

actual_value = f.get(m.group('key'))

1941

if actual_value is None:

1942

return m.group('none_inclusive')

1943

return op(actual_value, comparison_value)

1944

return _filter

1945

1946

def _check_formats(self, formats):

1947

for f in formats:

1948

self.to_screen('[info] Testing format %s' % f['format_id'])

1949

path = self.get_output_path('temp')

1950

if not self._ensure_dir_exists(f'{path}/'):

1951

continue

1952

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1953

temp_file.close()

1954

try:

1955

success, _ = self.dl(temp_file.name, f, test=True)

1956

except (DownloadError, OSError, ValueError) + network_exceptions:

1957

success = False

1958

finally:

1959

if os.path.exists(temp_file.name):

1960

try:

1961

os.remove(temp_file.name)

1962

except OSError:

1963

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1968

1969

def _default_format_spec(self, info_dict, download=True):

1970

1971

def can_merge():

1972

merger = FFmpegMergerPP(self)

1973

return merger.available and merger.can_merge()

1974

1975

prefer_best = (

1976

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

1981

or self.params['outtmpl']['default'] == '-'))

1982

compat = (

1983

prefer_best

1984

or self.params.get('allow_multiple_audio_streams', False)

1985

or 'format-spec' in self.params['compat_opts'])

1986

1987

return (

1988

'best/bestvideo+bestaudio' if prefer_best

1989

else 'bestvideo*+bestaudio/best' if not compat

1990

else 'bestvideo+bestaudio/best')

1991

1992

def build_format_selector(self, format_spec):

1993

def syntax_error(note, start):

1994

message = (

1995

'Invalid format specification: '

1996

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

1997

return SyntaxError(message)

1998

1999

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

2004

2005

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

2006

'video': self.params.get('allow_multiple_video_streams', False)}

2007

2008

check_formats = self.params.get('check_formats') == 'selected'

2009

2010

def _parse_filter(tokens):

2011

filter_parts = []

2012

for type, string, start, _, _ in tokens:

2013

if type == tokenize.OP and string == ']':

2014

return ''.join(filter_parts)

2015

else:

2016

filter_parts.append(string)

2017

2018

def _remove_unused_ops(tokens):

2019

# Remove operators that we don't use and join them with the surrounding strings.

2020

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

2021

ALLOWED_OPS = ('/', '+', ',', '(', ')')

2022

last_string, last_start, last_end, last_line = None, None, None, None

2023

for type, string, start, end, line in tokens:

2024

if type == tokenize.OP and string == '[':

2025

if last_string:

2026

yield tokenize.NAME, last_string, last_start, last_end, last_line

2027

last_string = None

2028

yield type, string, start, end, line

2029

# everything inside brackets will be handled by _parse_filter

2030

for type, string, start, end, line in tokens:

2031

yield type, string, start, end, line

2032

if type == tokenize.OP and string == ']':

2033

break

2034

elif type == tokenize.OP and string in ALLOWED_OPS:

2035

if last_string:

2036

yield tokenize.NAME, last_string, last_start, last_end, last_line

2037

last_string = None

2038

yield type, string, start, end, line

2039

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

2046

if last_string:

2047

yield tokenize.NAME, last_string, last_start, last_end, last_line

2048

2049

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2050

selectors = []

2051

current_selector = None

2052

for type, string, start, _, _ in tokens:

2053

# ENCODING is only defined in python 3.x

2054

if type == getattr(tokenize, 'ENCODING', None):

2055

continue

2056

elif type in [tokenize.NAME, tokenize.NUMBER]:

2057

current_selector = FormatSelector(SINGLE, string, [])

2058

elif type == tokenize.OP:

2059

if string == ')':

2060

if not inside_group:

2061

# ')' will be handled by the parentheses group

2062

tokens.restore_last_token()

2063

break

2064

elif inside_merge and string in ['/', ',']:

2065

tokens.restore_last_token()

2066

break

2067

elif inside_choice and string == ',':

2068

tokens.restore_last_token()

2069

break

2070

elif string == ',':

2071

if not current_selector:

2072

raise syntax_error('"," must follow a format selector', start)

2073

selectors.append(current_selector)

2074

current_selector = None

2075

elif string == '/':

2076

if not current_selector:

2077

raise syntax_error('"/" must follow a format selector', start)

2078

first_choice = current_selector

2079

second_choice = _parse_format_selection(tokens, inside_choice=True)

2080

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2081

elif string == '[':

2082

if not current_selector:

2083

current_selector = FormatSelector(SINGLE, 'best', [])

2084

format_filter = _parse_filter(tokens)

2085

current_selector.filters.append(format_filter)

2086

elif string == '(':

2087

if current_selector:

2088

raise syntax_error('Unexpected "("', start)

2089

group = _parse_format_selection(tokens, inside_group=True)

2090

current_selector = FormatSelector(GROUP, group, [])

2091

elif string == '+':

2092

if not current_selector:

2093

raise syntax_error('Unexpected "+"', start)

2094

selector_1 = current_selector

2095

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2096

if not selector_2:

2097

raise syntax_error('Expected a selector', start)

2098

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2099

else:

2100

raise syntax_error(f'Operator not recognized: "{string}"', start)

2101

elif type == tokenize.ENDMARKER:

2102

break

2103

if current_selector:

2104

selectors.append(current_selector)

2105

return selectors

2106

2107

def _merge(formats_pair):

2108

format_1, format_2 = formats_pair

2109

2110

formats_info = []

2111

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2112

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2113

2114

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2115

get_no_more = {'video': False, 'audio': False}

2116

for (i, fmt_info) in enumerate(formats_info):

2117

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2118

formats_info.pop(i)

2119

continue

2120

for aud_vid in ['audio', 'video']:

2121

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2122

if get_no_more[aud_vid]:

2123

formats_info.pop(i)

2124

break

2125

get_no_more[aud_vid] = True

2126

2127

if len(formats_info) == 1:

2128

return formats_info[0]

2129

2130

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2131

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2132

2133

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2134

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2135

2136

output_ext = get_compatible_ext(

2137

vcodecs=[f.get('vcodec') for f in video_fmts],

2138

acodecs=[f.get('acodec') for f in audio_fmts],

2139

vexts=[f['ext'] for f in video_fmts],

2140

aexts=[f['ext'] for f in audio_fmts],

2141

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2142

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2143

2144

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2145

2146

new_dict = {

2147

'requested_formats': formats_info,

2148

'format': '+'.join(filtered('format')),

2149

'format_id': '+'.join(filtered('format_id')),

2150

'ext': output_ext,

2151

'protocol': '+'.join(map(determine_protocol, formats_info)),

2152

'language': '+'.join(orderedSet(filtered('language'))) or None,

2153

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2154

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2155

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2161

'height': the_only_video.get('height'),

2162

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2163

'fps': the_only_video.get('fps'),

2164

'dynamic_range': the_only_video.get('dynamic_range'),

2165

'vcodec': the_only_video.get('vcodec'),

2166

'vbr': the_only_video.get('vbr'),

2167

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2173

'abr': the_only_audio.get('abr'),

2174

'asr': the_only_audio.get('asr'),

2175

'audio_channels': the_only_audio.get('audio_channels')

})

return new_dict

def _check_formats(formats):

2181

if not check_formats:

2182

yield from formats

2183

return

2184

yield from self._check_formats(formats)

2185

2186

def _build_selector_function(selector):

2187

if isinstance(selector, list): # ,

2188

fs = [_build_selector_function(s) for s in selector]

2189

2190

def selector_function(ctx):

2191

for f in fs:

2192

yield from f(ctx)

2193

return selector_function

2194

2195

elif selector.type == GROUP: # ()

2196

selector_function = _build_selector_function(selector.selector)

2197

2198

elif selector.type == PICKFIRST: # /

2199

fs = [_build_selector_function(s) for s in selector.selector]

2200

2201

def selector_function(ctx):

2202

for f in fs:

2203

picked_formats = list(f(ctx))

2204

if picked_formats:

2205

return picked_formats

2206

return []

2207

2208

elif selector.type == MERGE: # +

2209

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2210

2211

def selector_function(ctx):

2212

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2213

yield _merge(pair)

2214

2215

elif selector.type == SINGLE: # atom

2216

format_spec = selector.selector or 'best'

2217

2218

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2219

if format_spec == 'all':

2220

def selector_function(ctx):

2221

yield from _check_formats(ctx['formats'][::-1])

2222

elif format_spec == 'mergeall':

2223

def selector_function(ctx):

2224

formats = list(_check_formats(

2225

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2226

if not formats:

2227

return

2228

merged_format = formats[-1]

2229

for f in formats[-2::-1]:

2230

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2240

format_reverse = mobj.group('bw')[0] == 'b'

2241

format_type = (mobj.group('type') or [None])[0]

2242

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2243

format_modified = mobj.group('mod') is not None

2244

2245

format_fallback = not format_type and not format_modified # for b, w

2246

_filter_f = (

2247

(lambda f: f.get('%scodec' % format_type) != 'none')

2248

if format_type and format_modified # bv*, ba*, wv*, wa*

2249

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2250

if format_type # bv, ba, wv, wa

2251

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2252

if not format_modified # b, w

2253

else lambda f: True) # b*, w*

2254

filter_f = lambda f: _filter_f(f) and (

2255

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2256

else:

2257

if format_spec in self._format_selection_exts['audio']:

2258

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2259

elif format_spec in self._format_selection_exts['video']:

2260

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2261

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2262

elif format_spec in self._format_selection_exts['storyboards']:

2263

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2264

else:

2265

filter_f = lambda f: f.get('format_id') == format_spec # id

2266

2267

def selector_function(ctx):

2268

formats = list(ctx['formats'])

2269

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2270

if not matches:

2271

if format_fallback and ctx['incomplete_formats']:

2272

# for extractors with incomplete formats (audio only (soundcloud)

2273

# or video only (imgur)) best/worst will fallback to

2274

# best/worst {video,audio}-only format

2275

matches = formats

2276

elif seperate_fallback and not ctx['has_merged_format']:

2277

# for compatibility with youtube-dl when there is no pre-merged format

2278

matches = list(filter(seperate_fallback, formats))

2279

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2280

try:

2281

yield matches[format_idx - 1]

2282

except LazyList.IndexError:

2283

return

2284

2285

filters = [self._build_format_filter(f) for f in selector.filters]

2286

2287

def final_selector(ctx):

2288

ctx_copy = dict(ctx)

2289

for _filter in filters:

2290

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2291

return selector_function(ctx_copy)

2292

return final_selector

2293

2294

stream = io.BytesIO(format_spec.encode())

2295

try:

2296

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2297

except tokenize.TokenError:

2298

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2299

2300

class TokenIterator:

2301

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2310

raise StopIteration()

2311

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2318

self.counter -= 1

2319

2320

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2321

return _build_selector_function(parsed_selector)

2322

2323

def _calc_headers(self, info_dict):

2324

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2325

2326

cookies = self._calc_cookies(info_dict['url'])

2327

if cookies:

2328

res['Cookie'] = cookies

2329

2330

if 'X-Forwarded-For' not in res:

2331

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2332

if x_forwarded_for_ip:

2333

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2338

pr = sanitized_Request(url)

2339

self.cookiejar.add_cookie_header(pr)

2340

return pr.get_header('Cookie')

2341

2342

def _sort_thumbnails(self, thumbnails):

2343

thumbnails.sort(key=lambda t: (

2344

t.get('preference') if t.get('preference') is not None else -1,

2345

t.get('width') if t.get('width') is not None else -1,

2346

t.get('height') if t.get('height') is not None else -1,

2347

t.get('id') if t.get('id') is not None else '',

2348

t.get('url')))

2349

2350

def _sanitize_thumbnails(self, info_dict):

2351

thumbnails = info_dict.get('thumbnails')

2352

if thumbnails is None:

2353

thumbnail = info_dict.get('thumbnail')

2354

if thumbnail:

2355

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2360

for t in thumbnails:

2361

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2362

try:

2363

self.urlopen(HEADRequest(t['url']))

2364

except network_exceptions as err:

2365

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2370

for i, t in enumerate(thumbnails):

2371

if t.get('id') is None:

2372

t['id'] = '%d' % i

2373

if t.get('width') and t.get('height'):

2374

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2375

t['url'] = sanitize_url(t['url'])

2376

2377

if self.params.get('check_formats') is True:

2378

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2379

else:

2380

info_dict['thumbnails'] = thumbnails

2381

2382

def _fill_common_fields(self, info_dict, is_video=True):

2383

# TODO: move sanitization here

2384

if is_video:

2385

# playlists are allowed to lack "title"

2386

title = info_dict.get('title', NO_DEFAULT)

2387

if title is NO_DEFAULT:

2388

raise ExtractorError('Missing "title" field in extractor result',

2389

video_id=info_dict['id'], ie=info_dict['extractor'])

2390

info_dict['fulltitle'] = title

2391

if not title:

2392

if title == '':

2393

self.write_debug('Extractor gave empty title. Creating a generic title')

2394

else:

2395

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2396

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2397

2398

if info_dict.get('duration') is not None:

2399

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2400

2401

for ts_key, date_key in (

2402

('timestamp', 'upload_date'),

2403

('release_timestamp', 'release_date'),

2404

('modified_timestamp', 'modified_date'),

2405

):

2406

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2407

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2408

# see http://bugs.python.org/issue1646728)

2409

with contextlib.suppress(ValueError, OverflowError, OSError):

2410

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2411

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2412

2413

live_keys = ('is_live', 'was_live')

2414

live_status = info_dict.get('live_status')

2415

if live_status is None:

2416

for key in live_keys:

2417

if info_dict.get(key) is False:

2418

continue

2419

if info_dict.get(key):

2420

live_status = key

2421

break

2422

if all(info_dict.get(key) is False for key in live_keys):

2423

live_status = 'not_live'

2424

if live_status:

2425

info_dict['live_status'] = live_status

2426

for key in live_keys:

2427

if info_dict.get(key) is None:

2428

info_dict[key] = (live_status == key)

2429

if live_status == 'post_live':

2430

info_dict['was_live'] = True

2431

2432

# Auto generate title fields corresponding to the *_number fields when missing

2433

# in order to always have clean titles. This is very common for TV series.

2434

for field in ('chapter', 'season', 'episode'):

2435

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2436

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2437

2438

def _raise_pending_errors(self, info):

2439

err = info.pop('__pending_error', None)

2440

if err:

2441

self.report_error(err, tb=False)

2442

2443

def process_video_result(self, info_dict, download=True):

2444

assert info_dict.get('_type', 'video') == 'video'

2445

self._num_videos += 1

2446

2447

if 'id' not in info_dict:

2448

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2449

elif not info_dict.get('id'):

2450

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2451

2452

def report_force_conversion(field, field_not, conversion):

2453

self.report_warning(

2454

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2455

% (field, field_not, conversion))

2456

2457

def sanitize_string_field(info, string_field):

2458

field = info.get(string_field)

2459

if field is None or isinstance(field, str):

2460

return

2461

report_force_conversion(string_field, 'a string', 'string')

2462

info[string_field] = str(field)

2463

2464

def sanitize_numeric_fields(info):

2465

for numeric_field in self._NUMERIC_FIELDS:

2466

field = info.get(numeric_field)

2467

if field is None or isinstance(field, (int, float)):

2468

continue

2469

report_force_conversion(numeric_field, 'numeric', 'int')

2470

info[numeric_field] = int_or_none(field)

2471

2472

sanitize_string_field(info_dict, 'id')

2473

sanitize_numeric_fields(info_dict)

2474

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2475

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2476

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2477

self.report_warning('"duration" field is negative, there is an error in extractor')

2478

2479

chapters = info_dict.get('chapters') or []

2480

if chapters and chapters[0].get('start_time'):

2481

chapters.insert(0, {'start_time': 0})

2482

2483

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2484

for idx, (prev, current, next_) in enumerate(zip(

2485

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2486

if current.get('start_time') is None:

2487

current['start_time'] = prev.get('end_time')

2488

if not current.get('end_time'):

2489

current['end_time'] = next_.get('start_time')

2490

if not current.get('title'):

2491

current['title'] = f'<Untitled Chapter {idx}>'

2492

2493

if 'playlist' not in info_dict:

2494

# It isn't part of a playlist

2495

info_dict['playlist'] = None

2496

info_dict['playlist_index'] = None

2497

2498

self._sanitize_thumbnails(info_dict)

2499

2500

thumbnail = info_dict.get('thumbnail')

2501

thumbnails = info_dict.get('thumbnails')

2502

if thumbnail:

2503

info_dict['thumbnail'] = sanitize_url(thumbnail)

2504

elif thumbnails:

2505

info_dict['thumbnail'] = thumbnails[-1]['url']

2506

2507

if info_dict.get('display_id') is None and 'id' in info_dict:

2508

info_dict['display_id'] = info_dict['id']

2509

2510

self._fill_common_fields(info_dict)

2511

2512

for cc_kind in ('subtitles', 'automatic_captions'):

2513

cc = info_dict.get(cc_kind)

2514

if cc:

2515

for _, subtitle in cc.items():

2516

for subtitle_format in subtitle:

2517

if subtitle_format.get('url'):

2518

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2519

if subtitle_format.get('ext') is None:

2520

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2521

2522

automatic_captions = info_dict.get('automatic_captions')

2523

subtitles = info_dict.get('subtitles')

2524

2525

info_dict['requested_subtitles'] = self.process_subtitles(

2526

info_dict['id'], subtitles, automatic_captions)

2527

2528

if info_dict.get('formats') is None:

2529

# There's only one format available

2530

formats = [info_dict]

2531

else:

2532

formats = info_dict['formats']

2533

2534

# or None ensures --clean-infojson removes it

2535

info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None

2536

if not self.params.get('allow_unplayable_formats'):

2537

formats = [f for f in formats if not f.get('has_drm')]

2538

2539

if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2540

self.report_warning(

2541

f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'

2542

'only images are available for download. Use --list-formats to see them'.capitalize())

2543

2544

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2545

if not get_from_start:

2546

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2547

if info_dict.get('is_live') and formats:

2548

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2549

if get_from_start and not formats:

2550

self.raise_no_formats(info_dict, msg=(

2551

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2552

'If you want to download from the current time, use --no-live-from-start'))

2553

2554

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2559

'there is an error in extractor')

2560

return False

2561

if isinstance(url, bytes):

2562

sanitize_string_field(f, 'url')

2563

return True

2564

2565

# Filter out malformed formats for better extraction robustness

2566

formats = list(filter(is_wellformed, formats or []))

2567

2568

if not formats:

2569

self.raise_no_formats(info_dict)

formats_dict = {}

# We check that all the formats have the format and format_id fields

2574

for i, format in enumerate(formats):

2575

sanitize_string_field(format, 'format_id')

2576

sanitize_numeric_fields(format)

2577

format['url'] = sanitize_url(format['url'])

2578

if not format.get('format_id'):

2579

format['format_id'] = str(i)

2580

else:

2581

# Sanitize format_id from characters used in format selector expression

2582

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2583

format_id = format['format_id']

2584

if format_id not in formats_dict:

2585

formats_dict[format_id] = []

2586

formats_dict[format_id].append(format)

2587

2588

# Make sure all formats have unique format_id

2589

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2590

for format_id, ambiguous_formats in formats_dict.items():

2591

ambigious_id = len(ambiguous_formats) > 1

2592

for i, format in enumerate(ambiguous_formats):

2593

if ambigious_id:

2594

format['format_id'] = '%s-%d' % (format_id, i)

2595

if format.get('ext') is None:

2596

format['ext'] = determine_ext(format['url']).lower()

2597

# Ensure there is no conflict between id and ext in format selection

2598

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2599

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2600

format['format_id'] = 'f%s' % format['format_id']

2601

2602

for i, format in enumerate(formats):

2603

if format.get('format') is None:

2604

format['format'] = '{id} - {res}{note}'.format(

2605

id=format['format_id'],

2606

res=self.format_resolution(format),

2607

note=format_field(format, 'format_note', ' (%s)'),

2608

)

2609

if format.get('protocol') is None:

2610

format['protocol'] = determine_protocol(format)

2611

if format.get('resolution') is None:

2612

format['resolution'] = self.format_resolution(format, default=None)

2613

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2614

format['dynamic_range'] = 'SDR'

2615

if (info_dict.get('duration') and format.get('tbr')

2616

and not format.get('filesize') and not format.get('filesize_approx')):

2617

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2618

2619

# Add HTTP headers, so that external programs can use them from the

2620

# json output

2621

full_format_info = info_dict.copy()

2622

full_format_info.update(format)

2623

format['http_headers'] = self._calc_headers(full_format_info)

2624

# Remove private housekeeping stuff

2625

if '__x_forwarded_for_ip' in info_dict:

2626

del info_dict['__x_forwarded_for_ip']

2627

2628

if self.params.get('check_formats') is True:

2629

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2630

2631

if not formats or formats[0] is not info_dict:

2632

# only set the 'formats' fields if the original info_dict list them

2633

# otherwise we end up with a circular reference, the first (and unique)

2634

# element in the 'formats' field in info_dict is info_dict itself,

2635

# which can't be exported to json

2636

info_dict['formats'] = formats

2637

2638

info_dict, _ = self.pre_process(info_dict)

2639

2640

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2641

return info_dict

2642

2643

self.post_extract(info_dict)

2644

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2645

2646

# The pre-processors may have modified the formats

2647

formats = info_dict.get('formats', [info_dict])

2648

2649

list_only = self.params.get('simulate') is None and (

2650

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2651

interactive_format_selection = not list_only and self.format_selector == '-'

2652

if self.params.get('list_thumbnails'):

2653

self.list_thumbnails(info_dict)

2654

if self.params.get('listsubtitles'):

2655

if 'automatic_captions' in info_dict:

2656

self.list_subtitles(

2657

info_dict['id'], automatic_captions, 'automatic captions')

2658

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2659

if self.params.get('listformats') or interactive_format_selection:

2660

self.list_formats(info_dict)

2661

if list_only:

2662

# Without this printing, -F --print-json will not work

2663

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2664

return info_dict

2665

2666

format_selector = self.format_selector

2667

if format_selector is None:

2668

req_format = self._default_format_spec(info_dict, download=download)

2669

self.write_debug('Default format spec: %s' % req_format)

2670

format_selector = self.build_format_selector(req_format)

2671

2672

while True:

2673

if interactive_format_selection:

2674

req_format = input(

2675

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2676

try:

2677

format_selector = self.build_format_selector(req_format)

2678

except SyntaxError as err:

2679

self.report_error(err, tb=False, is_error=False)

2680

continue

2681

2682

formats_to_download = list(format_selector({

2683

'formats': formats,

2684

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2685

'incomplete_formats': (

2686

# All formats are video-only or

2687

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2688

# all formats are audio-only

2689

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),

2690

}))

2691

if interactive_format_selection and not formats_to_download:

2692

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2697

if not self.params.get('ignore_no_formats_error'):

2698

raise ExtractorError(

2699

'Requested format is not available. Use --list-formats for a list of available formats',

2700

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2701

self.report_warning('Requested format is not available')

2702

# Process what we can, even without any available formats.

2703

formats_to_download = [{}]

2704

2705

requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))

2706

best_format, downloaded_formats = formats_to_download[-1], []

2707

if download:

2708

if best_format and requested_ranges:

2709

def to_screen(*msg):

2710

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2711

2712

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2713

(f['format_id'] for f in formats_to_download))

2714

if requested_ranges != ({}, ):

2715

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2716

(f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))

2717

max_downloads_reached = False

2718

2719

for fmt, chapter in itertools.product(formats_to_download, requested_ranges):

2720

new_info = self._copy_infodict(info_dict)

2721

new_info.update(fmt)

2722

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2723

end_time = offset + min(chapter.get('end_time', duration), duration)

2724

if chapter or offset:

2725

new_info.update({

2726

'section_start': offset + chapter.get('start_time', 0),

2727

'section_end': end_time if end_time < offset + duration else None,

2728

'section_title': chapter.get('title'),

2729

'section_number': chapter.get('index'),

2730

})

2731

downloaded_formats.append(new_info)

2732

try:

2733

self.process_info(new_info)

2734

except MaxDownloadsReached:

2735

max_downloads_reached = True

2736

self._raise_pending_errors(new_info)

2737

# Remove copied info

2738

for key, val in tuple(new_info.items()):

2739

if info_dict.get(key) == val:

2740

new_info.pop(key)

2741

if max_downloads_reached:

2742

break

2743

2744

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2745

assert write_archive.issubset({True, False, 'ignore'})

2746

if True in write_archive and False not in write_archive:

2747

self.record_download_archive(info_dict)

2748

2749

info_dict['requested_downloads'] = downloaded_formats

2750

info_dict = self.run_all_pps('after_video', info_dict)

2751

if max_downloads_reached:

2752

raise MaxDownloadsReached()

2753

2754

# We update the info dict with the selected best quality format (backwards compatibility)

2755

info_dict.update(best_format)

2756

return info_dict

2757

2758

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2759

"""Select the requested subtitles and their format"""

2760

available_subs, normal_sub_langs = {}, []

2761

if normal_subtitles and self.params.get('writesubtitles'):

2762

available_subs.update(normal_subtitles)

2763

normal_sub_langs = tuple(normal_subtitles.keys())

2764

if automatic_captions and self.params.get('writeautomaticsub'):

2765

for lang, cap_info in automatic_captions.items():

2766

if lang not in available_subs:

2767

available_subs[lang] = cap_info

2768

2769

if not available_subs or (

2770

not self.params.get('writesubtitles')

2771

and not self.params.get('writeautomaticsub')):

2772

return None

2773

2774

all_sub_langs = tuple(available_subs.keys())

2775

if self.params.get('allsubtitles', False):

2776

requested_langs = all_sub_langs

2777

elif self.params.get('subtitleslangs', False):

2778

try:

2779

requested_langs = orderedSet_from_options(

2780

self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)

2781

except re.error as e:

2782

raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')

2783

elif normal_sub_langs:

2784

requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]

2785

else:

2786

requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]

2787

if requested_langs:

2788

self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')

2789

2790

formats_query = self.params.get('subtitlesformat', 'best')

2791

formats_preference = formats_query.split('/') if formats_query else []

2792

subs = {}

2793

for lang in requested_langs:

2794

formats = available_subs.get(lang)

2795

if formats is None:

2796

self.report_warning(f'{lang} subtitles not available for {video_id}')

2797

continue

2798

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2810

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2815

if info_dict is None:

2816

return

2817

info_copy = info_dict.copy()

2818

info_copy['formats_table'] = self.render_formats_table(info_dict)

2819

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2820

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2821

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2822

2823

def format_tmpl(tmpl):

2824

mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)

if not mobj:

return tmpl

fmt = '%({})s'

if tmpl.startswith('{'):

2830

tmpl = f'.{tmpl}'

2831

if tmpl.endswith('='):

2832

tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'

2833

return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))

2834

2835

for tmpl in self.params['forceprint'].get(key, []):

2836

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2837

2838

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2839

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2840

tmpl = format_tmpl(tmpl)

2841

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2842

if self._ensure_dir_exists(filename):

2843

with open(filename, 'a', encoding='utf-8') as f:

2844

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2845

2846

def __forced_printings(self, info_dict, filename, incomplete):

2847

def print_mandatory(field, actual_field=None):

2848

if actual_field is None:

2849

actual_field = field

2850

if (self.params.get('force%s' % field, False)

2851

and (not incomplete or info_dict.get(actual_field) is not None)):

2852

self.to_stdout(info_dict[actual_field])

2853

2854

def print_optional(field):

2855

if (self.params.get('force%s' % field, False)

2856

and info_dict.get(field) is not None):

2857

self.to_stdout(info_dict[field])

2858

2859

info_dict = info_dict.copy()

2860

if filename is not None:

2861

info_dict['filename'] = filename

2862

if info_dict.get('requested_formats') is not None:

2863

# For RTMP URLs, also include the playpath

2864

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2865

elif info_dict.get('url'):

2866

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2867

2868

if (self.params.get('forcejson')

2869

or self.params['forceprint'].get('video')

2870

or self.params['print_to_file'].get('video')):

2871

self.post_extract(info_dict)

2872

self._forceprint('video', info_dict)

2873

2874

print_mandatory('title')

2875

print_mandatory('id')

2876

print_mandatory('url', 'urls')

2877

print_optional('thumbnail')

2878

print_optional('description')

2879

print_optional('filename')

2880

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2881

self.to_stdout(formatSeconds(info_dict['duration']))

2882

print_mandatory('format')

2883

2884

if self.params.get('forcejson'):

2885

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2886

2887

def dl(self, name, info, subtitle=False, test=False):

2888

if not info.get('url'):

2889

self.raise_no_formats(info, True)

2890

2891

if test:

2892

verbose = self.params.get('verbose')

2893

params = {

2894

'test': True,

2895

'quiet': self.params.get('quiet') or not verbose,

2896

'verbose': verbose,

2897

'noprogress': not verbose,

2898

'nopart': True,

2899

'skip_unavailable_fragments': False,

2900

'keep_fragments': False,

2901

'overwrites': True,

2902

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2907

if not test:

2908

for ph in self._progress_hooks:

2909

fd.add_progress_hook(ph)

2910

urls = '", "'.join(

2911

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2912

for f in info.get('requested_formats', []) or [info])

2913

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

2914

2915

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2916

# But it may contain objects that are not deep-copyable

2917

new_info = self._copy_infodict(info)

2918

if new_info.get('http_headers') is None:

2919

new_info['http_headers'] = self._calc_headers(new_info)

2920

return fd.download(name, new_info, subtitle)

2921

2922

def existing_file(self, filepaths, *, default_overwrite=True):

2923

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2924

if existing_files and not self.params.get('overwrites', default_overwrite):

2925

return existing_files[0]

2926

2927

for file in existing_files:

2928

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2933

"""Process a single resolved IE result. (Modifies it in-place)"""

2934

2935

assert info_dict.get('_type', 'video') == 'video'

2936

original_infodict = info_dict

2937

2938

if 'format' not in info_dict and 'ext' in info_dict:

2939

info_dict['format'] = info_dict['ext']

2940

2941

# This is mostly just for backward compatibility of process_info

2942

# As a side-effect, this allows for format-specific filters

2943

if self._match_entry(info_dict) is not None:

2944

info_dict['__write_download_archive'] = 'ignore'

2945

return

2946

2947

# Does nothing under normal operation - for backward compatibility of process_info

2948

self.post_extract(info_dict)

2949

self._num_downloads += 1

2950

2951

# info_dict['_filename'] needs to be set for backward compatibility

2952

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2953

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2958

2959

def check_max_downloads():

2960

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

2961

raise MaxDownloadsReached()

2962

2963

if self.params.get('simulate'):

2964

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2965

check_max_downloads()

2966

return

2967

2968

if full_filename is None:

2969

return

2970

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2971

return

2972

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2973

return

2974

2975

if self._write_description('video', info_dict,

2976

self.prepare_filename(info_dict, 'description')) is None:

2977

return

2978

2979

sub_files = self._write_subtitles(info_dict, temp_filename)

2980

if sub_files is None:

2981

return

2982

files_to_move.update(dict(sub_files))

2983

2984

thumb_files = self._write_thumbnails(

2985

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2986

if thumb_files is None:

2987

return

2988

files_to_move.update(dict(thumb_files))

2989

2990

infofn = self.prepare_filename(info_dict, 'infojson')

2991

_infojson_written = self._write_info_json('video', info_dict, infofn)

2992

if _infojson_written:

2993

info_dict['infojson_filename'] = infofn

2994

# For backward compatibility, even though it was a private field

2995

info_dict['__infojson_filename'] = infofn

2996

elif _infojson_written is None:

2997

return

2998

2999

# Note: Annotations are deprecated

3000

annofn = None

3001

if self.params.get('writeannotations', False):

3002

annofn = self.prepare_filename(info_dict, 'annotation')

3003

if annofn:

3004

if not self._ensure_dir_exists(encodeFilename(annofn)):

3005

return

3006

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

3007

self.to_screen('[info] Video annotations are already present')

3008

elif not info_dict.get('annotations'):

3009

self.report_warning('There are no annotations to write.')

3010

else:

3011

try:

3012

self.to_screen('[info] Writing video annotations to: ' + annofn)

3013

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

3014

annofile.write(info_dict['annotations'])

3015

except (KeyError, TypeError):

3016

self.report_warning('There are no annotations to write.')

3017

except OSError:

3018

self.report_error('Cannot write annotations file: ' + annofn)

3019

return

3020

3021

# Write internet shortcut files

3022

def _write_link_file(link_type):

3023

url = try_get(info_dict['webpage_url'], iri_to_uri)

3024

if not url:

3025

self.report_warning(

3026

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3027

return True

3028

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3029

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3030

return False

3031

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3032

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3033

return True

3034

try:

3035

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3036

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3037

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3038

template_vars = {'url': url}

3039

if link_type == 'desktop':

3040

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3041

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3042

except OSError:

3043

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3049

'webloc': self.params.get('writewebloclink'),

3050

'desktop': self.params.get('writedesktoplink'),

3051

}

3052

if self.params.get('writelink'):

3053

link_type = ('webloc' if sys.platform == 'darwin'

3054

else 'desktop' if sys.platform.startswith('linux')

3055

else 'url')

3056

write_links[link_type] = True

3057

3058

if any(should_write and not _write_link_file(link_type)

3059

for link_type, should_write in write_links.items()):

3060

return

3061

3062

def replace_info_dict(new_info):

3063

nonlocal info_dict

3064

if new_info == info_dict:

3065

return

3066

info_dict.clear()

3067

info_dict.update(new_info)

3068

3069

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3070

replace_info_dict(new_info)

3071

3072

if self.params.get('skip_download'):

3073

info_dict['filepath'] = temp_filename

3074

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3075

info_dict['__files_to_move'] = files_to_move

3076

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3077

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3078

else:

3079

# Download

3080

info_dict.setdefault('__postprocessors', [])

3081

try:

3082

3083

def existing_video_file(*filepaths):

3084

ext = info_dict.get('ext')

3085

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3086

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3087

default_overwrite=False)

3088

if file:

3089

info_dict['ext'] = os.path.splitext(file)[1][1:]

3090

return file

3091

3092

fd, success = None, True

3093

if info_dict.get('protocol') or info_dict.get('url'):

3094

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3095

if fd is not FFmpegFD and (

3096

info_dict.get('section_start') or info_dict.get('section_end')):

3097

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3098

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3099

self.report_error(f'{msg}. Aborting')

3100

return

3101

3102

if info_dict.get('requested_formats') is not None:

3103

requested_formats = info_dict['requested_formats']

3104

old_ext = info_dict['ext']

3105

if self.params.get('merge_output_format') is None:

3106

if (info_dict['ext'] == 'webm'

3107

and info_dict.get('thumbnails')

3108

# check with type instead of pp_key, __name__, or isinstance

3109

# since we dont want any custom PPs to trigger this

3110

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3111

info_dict['ext'] = 'mkv'

3112

self.report_warning(

3113

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3114

new_ext = info_dict['ext']

3115

3116

def correct_ext(filename, ext=new_ext):

3117

if filename == '-':

3118

return filename

3119

filename_real_ext = os.path.splitext(filename)[1][1:]

3120

filename_wo_ext = (

3121

os.path.splitext(filename)[0]

3122

if filename_real_ext in (old_ext, new_ext)

3123

else filename)

3124

return f'{filename_wo_ext}.{ext}'

3125

3126

# Ensure filename always has a correct extension for successful merge

3127

full_filename = correct_ext(full_filename)

3128

temp_filename = correct_ext(temp_filename)

3129

dl_filename = existing_video_file(full_filename, temp_filename)

3130

info_dict['__real_download'] = False

3131

3132

merger = FFmpegMergerPP(self)

3133

downloaded = []

3134

if dl_filename is not None:

3135

self.report_file_already_downloaded(dl_filename)

3136

elif fd:

3137

for f in requested_formats if fd != FFmpegFD else []:

3138

f['filepath'] = fname = prepend_extension(

3139

correct_ext(temp_filename, info_dict['ext']),

3140

'f%s' % f['format_id'], info_dict['ext'])

3141

downloaded.append(fname)

3142

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3143

success, real_download = self.dl(temp_filename, info_dict)

3144

info_dict['__real_download'] = real_download

3145

else:

3146

if self.params.get('allow_unplayable_formats'):

3147

self.report_warning(

3148

'You have requested merging of multiple formats '

3149

'while also allowing unplayable formats to be downloaded. '

3150

'The formats won\'t be merged to prevent data corruption.')

3151

elif not merger.available:

3152

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3153

if not self.params.get('ignoreerrors'):

3154

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3155

return

3156

self.report_warning(f'{msg}. The formats won\'t be merged')

3157

3158

if temp_filename == '-':

3159

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3160

else 'but the formats are incompatible for simultaneous download' if merger.available

3161

else 'but ffmpeg is not installed')

3162

self.report_warning(

3163

f'You have requested downloading multiple formats to stdout {reason}. '

3164

'The formats will be streamed one after the other')

3165

fname = temp_filename

3166

for f in requested_formats:

3167

new_info = dict(info_dict)

3168

del new_info['requested_formats']

3169

new_info.update(f)

3170

if temp_filename != '-':

3171

fname = prepend_extension(

3172

correct_ext(temp_filename, new_info['ext']),

3173

'f%s' % f['format_id'], new_info['ext'])

3174

if not self._ensure_dir_exists(fname):

3175

return

3176

f['filepath'] = fname

3177

downloaded.append(fname)

3178

partial_success, real_download = self.dl(fname, new_info)

3179

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3180

success = success and partial_success

3181

3182

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3183

info_dict['__postprocessors'].append(merger)

3184

info_dict['__files_to_merge'] = downloaded

3185

# Even if there were no downloads, it is being merged only now

3186

info_dict['__real_download'] = True

3187

else:

3188

for file in downloaded:

3189

files_to_move[file] = None

3190

else:

3191

# Just a single file

3192

dl_filename = existing_video_file(full_filename, temp_filename)

3193

if dl_filename is None or dl_filename == temp_filename:

3194

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3195

# So we should try to resume the download

3196

success, real_download = self.dl(temp_filename, info_dict)

3197

info_dict['__real_download'] = real_download

3198

else:

3199

self.report_file_already_downloaded(dl_filename)

3200

3201

dl_filename = dl_filename or temp_filename

3202

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3203

3204

except network_exceptions as err:

3205

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3206

return

3207

except OSError as err:

3208

raise UnavailableVideoError(err)

3209

except (ContentTooShortError, ) as err:

3210

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3211

return

3212

3213

self._raise_pending_errors(info_dict)

3214

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3219

vid = info_dict['id']

3220

3221

if fixup_policy in ('ignore', 'never'):

3222

return

3223

elif fixup_policy == 'warn':

3224

do_fixup = 'warn'

3225

elif fixup_policy != 'force':

3226

assert fixup_policy in ('detect_or_warn', None)

3227

if not info_dict.get('__real_download'):

3228

do_fixup = False

3229

3230

def ffmpeg_fixup(cndn, msg, cls):

3231

if not (do_fixup and cndn):

3232

return

3233

elif do_fixup == 'warn':

3234

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3239

else:

3240

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3241

3242

stretched_ratio = info_dict.get('stretched_ratio')

3243

ffmpeg_fixup(stretched_ratio not in (1, None),

3244

f'Non-uniform pixel ratio {stretched_ratio}',

3245

FFmpegFixupStretchedPP)

3246

3247

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3248

downloader = downloader.FD_NAME if downloader else None

3249

3250

ext = info_dict.get('ext')

3251

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3252

isinstance(pp, FFmpegVideoConvertorPP)

3253

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3254

) for pp in self._pps['post_process'])

3255

3256

if not postprocessed_by_ffmpeg:

3257

ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',

3258

'writing DASH m4a. Only some players support this container',

3259

FFmpegFixupM4aPP)

3260

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3261

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3262

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3263

FFmpegFixupM3u8PP)

3264

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3265

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3266

3267

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3268

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3273

except PostProcessingError as err:

3274

self.report_error('Postprocessing: %s' % str(err))

3275

return

3276

try:

3277

for ph in self._post_hooks:

3278

ph(info_dict['filepath'])

3279

except Exception as err:

3280

self.report_error('post hooks: %s' % str(err))

3281

return

3282

info_dict['__write_download_archive'] = True

3283

3284

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3285

if self.params.get('force_write_download_archive'):

3286

info_dict['__write_download_archive'] = True

3287

check_max_downloads()

3288

3289

def __download_wrapper(self, func):

3290

@functools.wraps(func)

3291

def wrapper(*args, **kwargs):

3292

try:

3293

res = func(*args, **kwargs)

3294

except UnavailableVideoError as e:

3295

self.report_error(e)

3296

except DownloadCancelled as e:

3297

self.to_screen(f'[info] {e}')

3298

if not self.params.get('break_per_url'):

3299

raise

3300

self._num_downloads = 0

3301

else:

3302

if self.params.get('dump_single_json', False):

3303

self.post_extract(res)

3304

self.to_stdout(json.dumps(self.sanitize_info(res)))

3305

return wrapper

3306

3307

def download(self, url_list):

3308

"""Download a given list of URLs."""

3309

url_list = variadic(url_list) # Passing a single URL is a common mistake

3310

outtmpl = self.params['outtmpl']['default']

3311

if (len(url_list) > 1

3312

and outtmpl != '-'

3313

and '%' not in outtmpl

3314

and self.params.get('max_downloads') != 1):

3315

raise SameFileError(outtmpl)

3316

3317

for url in url_list:

3318

self.__download_wrapper(self.extract_info)(

3319

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3320

3321

return self._download_retcode

3322

3323

def download_with_info_file(self, info_filename):

3324

with contextlib.closing(fileinput.FileInput(

3325

[info_filename], mode='r',

3326

openhook=fileinput.hook_encoded('utf-8'))) as f:

3327

# FileInput doesn't have a read method, we can't call json.load

3328

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3329

try:

3330

self.__download_wrapper(self.process_ie_result)(info, download=True)

3331

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3332

if not isinstance(e, EntryNotInPlaylist):

3333

self.to_stderr('\r')

3334

webpage_url = info.get('webpage_url')

3335

if webpage_url is not None:

3336

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3337

return self.download([webpage_url])

3338

else:

3339

raise

3340

return self._download_retcode

3341

3342

@staticmethod

3343

def sanitize_info(info_dict, remove_private_keys=False):

3344

''' Sanitize the infodict for converting to json '''

3345

if info_dict is None:

3346

return info_dict

3347

info_dict.setdefault('epoch', int(time.time()))

3348

info_dict.setdefault('_type', 'video')

3349

info_dict.setdefault('_version', {

3350

'version': __version__,

3351

'current_git_head': current_git_head(),

3352

'release_git_head': RELEASE_GIT_HEAD,

3353

'repository': REPOSITORY,

3354

})

3355

3356

if remove_private_keys:

3357

reject = lambda k, v: v is None or k.startswith('__') or k in {

3358

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3359

'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',

3360

}

3361

else:

3362

reject = lambda k, v: False

3363

3364

def filter_fn(obj):

3365

if isinstance(obj, dict):

3366

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3367

elif isinstance(obj, (list, tuple, set, LazyList)):

3368

return list(map(filter_fn, obj))

3369

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3375

3376

@staticmethod

3377

def filter_requested_info(info_dict, actually_filter=True):

3378

''' Alias of sanitize_info for backward compatibility '''

3379

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3380

3381

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3382

for filename in set(filter(None, files_to_delete)):

3383

if msg:

3384

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3389

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3390

del info['__files_to_move'][filename]

3391

3392

@staticmethod

3393

def post_extract(info_dict):

3394

def actual_post_extract(info_dict):

3395

if info_dict.get('_type') in ('playlist', 'multi_video'):

3396

for video_dict in info_dict.get('entries', {}):

3397

actual_post_extract(video_dict or {})

3398

return

3399

3400

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3401

info_dict.update(post_extractor())

3402

3403

actual_post_extract(info_dict or {})

3404

3405

def run_pp(self, pp, infodict):

3406

files_to_delete = []

3407

if '__files_to_move' not in infodict:

3408

infodict['__files_to_move'] = {}

3409

try:

3410

files_to_delete, infodict = pp.run(infodict)

3411

except PostProcessingError as e:

3412

# Must be True and not 'only_download'

3413

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3419

return infodict

3420

if self.params.get('keepvideo', False):

3421

for f in files_to_delete:

3422

infodict['__files_to_move'].setdefault(f, '')

3423

else:

3424

self._delete_downloaded_files(

3425

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3426

return infodict

3427

3428

def run_all_pps(self, key, info, *, additional_pps=None):

3429

self._forceprint(key, info)

3430

for pp in (additional_pps or []) + self._pps[key]:

3431

info = self.run_pp(pp, info)

3432

return info

3433

3434

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3435

info = dict(ie_info)

3436

info['__files_to_move'] = files_to_move or {}

3437

try:

3438

info = self.run_all_pps(key, info)

3439

except PostProcessingError as err:

3440

msg = f'Preprocessing: {err}'

3441

info.setdefault('__pending_error', msg)

3442

self.report_error(msg, is_error=False)

3443

return info, info.pop('__files_to_move', None)

3444

3445

def post_process(self, filename, info, files_to_move=None):

3446

"""Run all the postprocessors on the given file."""

3447

info['filepath'] = filename

3448

info['__files_to_move'] = files_to_move or {}

3449

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3450

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3451

del info['__files_to_move']

3452

return self.run_all_pps('after_move', info)

3453

3454

def _make_archive_id(self, info_dict):

3455

video_id = info_dict.get('id')

3456

if not video_id:

3457

return

3458

# Future-proof against any change in case

3459

# and backwards compatibility with prior versions

3460

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3461

if extractor is None:

3462

url = str_or_none(info_dict.get('url'))

3463

if not url:

3464

return

3465

# Try to find matching extractor for the URL and take its ie_key

3466

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3473

3474

def in_download_archive(self, info_dict):

if not self.archive:

return False

vid_ids = [self._make_archive_id(info_dict)]

3479

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3480

return any(id_ in self.archive for id_ in vid_ids)

3481

3482

def record_download_archive(self, info_dict):

3483

fn = self.params.get('download_archive')

3484

if fn is None:

3485

return

3486

vid_id = self._make_archive_id(info_dict)

3487

assert vid_id

3488

3489

self.write_debug(f'Adding to archive: {vid_id}')

3490

if is_path_like(fn):

3491

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3492

archive_file.write(vid_id + '\n')

3493

self.archive.add(vid_id)

3494

3495

@staticmethod

3496

def format_resolution(format, default='unknown'):

3497

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3498

return 'audio only'

3499

if format.get('resolution') is not None:

3500

return format['resolution']

3501

if format.get('width') and format.get('height'):

3502

return '%dx%d' % (format['width'], format['height'])

3503

elif format.get('height'):

3504

return '%sp' % format['height']

3505

elif format.get('width'):

3506

return '%dx?' % format['width']

3507

return default

3508

3509

def _list_format_headers(self, *headers):

3510

if self.params.get('listformats_table', True) is not False:

3511

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3512

return headers

3513

3514

def _format_note(self, fdict):

3515

res = ''

3516

if fdict.get('ext') in ['f4f', 'f4m']:

3517

res += '(unsupported)'

3518

if fdict.get('language'):

3519

if res:

3520

res += ' '

3521

res += '[%s]' % fdict['language']

3522

if fdict.get('format_note') is not None:

3523

if res:

3524

res += ' '

3525

res += fdict['format_note']

3526

if fdict.get('tbr') is not None:

3527

if res:

3528

res += ', '

3529

res += '%4dk' % fdict['tbr']

3530

if fdict.get('container') is not None:

3531

if res:

3532

res += ', '

3533

res += '%s container' % fdict['container']

3534

if (fdict.get('vcodec') is not None

3535

and fdict.get('vcodec') != 'none'):

3536

if res:

3537

res += ', '

3538

res += fdict['vcodec']

3539

if fdict.get('vbr') is not None:

3540

res += '@'

3541

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3542

res += 'video@'

3543

if fdict.get('vbr') is not None:

3544

res += '%4dk' % fdict['vbr']

3545

if fdict.get('fps') is not None:

3546

if res:

3547

res += ', '

3548

res += '%sfps' % fdict['fps']

3549

if fdict.get('acodec') is not None:

3550

if res:

3551

res += ', '

3552

if fdict['acodec'] == 'none':

3553

res += 'video only'

3554

else:

3555

res += '%-5s' % fdict['acodec']

3556

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3561

res += '@%3dk' % fdict['abr']

3562

if fdict.get('asr') is not None:

3563

res += ' (%5dHz)' % fdict['asr']

3564

if fdict.get('filesize') is not None:

3565

if res:

3566

res += ', '

3567

res += format_bytes(fdict['filesize'])

3568

elif fdict.get('filesize_approx') is not None:

3569

if res:

3570

res += ', '

3571

res += '~' + format_bytes(fdict['filesize_approx'])

3572

return res

3573

3574

def render_formats_table(self, info_dict):

3575

if not info_dict.get('formats') and not info_dict.get('url'):

3576

return None

3577

3578

formats = info_dict.get('formats', [info_dict])

3579

if not self.params.get('listformats_table', True) is not False:

3580

table = [

3581

[

3582

format_field(f, 'format_id'),

3583

format_field(f, 'ext'),

3584

self.format_resolution(f),

3585

self._format_note(f)

3586

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3587

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3588

3589

def simplified_codec(f, field):

3590

assert field in ('acodec', 'vcodec')

3591

codec = f.get(field, 'unknown')

3592

if not codec:

3593

return 'unknown'

3594

elif codec != 'none':

3595

return '.'.join(codec.split('.')[:4])

3596

3597

if field == 'vcodec' and f.get('acodec') == 'none':

3598

return 'images'

3599

elif field == 'acodec' and f.get('vcodec') == 'none':

3600

return ''

3601

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3602

self.Styles.SUPPRESS)

3603

3604

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3605

table = [

3606

[

3607

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3608

format_field(f, 'ext'),

3609

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3610

format_field(f, 'fps', '\t%d', func=round),

3611

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3612

format_field(f, 'audio_channels', '\t%s'),

3613

delim,

3614

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3615

format_field(f, 'tbr', '\t%dk', func=round),

3616

shorten_protocol_name(f.get('protocol', '')),

3617

delim,

3618

simplified_codec(f, 'vcodec'),

3619

format_field(f, 'vbr', '\t%dk', func=round),

3620

simplified_codec(f, 'acodec'),

3621

format_field(f, 'abr', '\t%dk', func=round),

3622

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3623

join_nonempty(

3624

self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3625

format_field(f, 'language', '[%s]'),

3626

join_nonempty(format_field(f, 'format_note'),

3627

format_field(f, 'container', ignore=(None, f.get('ext'))),

3628

delim=', '),

3629

delim=' '),

3630

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3631

header_line = self._list_format_headers(

3632

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3633

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3634

3635

return render_table(

3636

header_line, table, hide_empty=True,

3637

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3638

3639

def render_thumbnails_table(self, info_dict):

3640

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3645

[[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])

3646

3647

def render_subtitles_table(self, video_id, subtitles):

3648

def _row(lang, formats):

3649

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3650

if len(set(names)) == 1:

3651

names = [] if names[0] == 'unknown' else names[:1]

3652

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3658

[_row(lang, formats) for lang, formats in subtitles.items()],

3659

hide_empty=True)

3660

3661

def __list_table(self, video_id, name, func, *args):

3662

table = func(*args)

3663

if not table:

3664

self.to_screen(f'{video_id} has no {name}')

3665

return

3666

self.to_screen(f'[info] Available {name} for {video_id}:')

3667

self.to_stdout(table)

3668

3669

def list_formats(self, info_dict):

3670

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3671

3672

def list_thumbnails(self, info_dict):

3673

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3674

3675

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3676

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3677

3678

def urlopen(self, req):

3679

""" Start an HTTP download """

3680

if isinstance(req, str):

3681

req = sanitized_Request(req)

3682

return self._opener.open(req, timeout=self._socket_timeout)

3683

3684

def print_debug_header(self):

3685

if not self.params.get('verbose'):

3686

return

3687

3688

from . import _IN_CLI # Must be delayed import

3689

3690

# These imports can be slow. So import them only as needed

3691

from .extractor.extractors import _LAZY_LOADER

3692

from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors

3693

3694

def get_encoding(stream):

3695

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3696

if not supports_terminal_sequences(stream):

3697

from .utils import WINDOWS_VT_MODE # Must be imported locally

3698

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3699

return ret

3700

3701

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3702

locale.getpreferredencoding(),

3703

sys.getfilesystemencoding(),

3704

self.get_encoding(),

3705

', '.join(

3706

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3707

if stream is not None and key != 'console')

3708

)

3709

3710

logger = self.params.get('logger')

3711

if logger:

3712

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3713

write_debug(encoding_str)

3714

else:

3715

write_string(f'[debug] {encoding_str}\n', encoding=None)

3716

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3717

3718

source = detect_variant()

3719

if VARIANT not in (None, 'pip'):

3720

source += '*'

3721

write_debug(join_nonempty(

3722

f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',

3723

__version__,

3724

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3725

'' if source == 'unknown' else f'({source})',

3726

'' if _IN_CLI else 'API',

3727

delim=' '))

3728

if not _LAZY_LOADER:

3729

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3730

write_debug('Lazy loading extractors is forcibly disabled')

3731

else:

3732

write_debug('Lazy loading extractors is disabled')

3733

if plugin_extractors or plugin_postprocessors:

3734

write_debug('Plugins: %s' % [

3735

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3736

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3737

if self.params['compat_opts']:

3738

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3739

3740

if current_git_head():

3741

write_debug(f'Git HEAD: {current_git_head()}')

3742

write_debug(system_identifier())

3743

3744

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3745

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3746

if ffmpeg_features:

3747

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3748

3749

exe_versions['rtmpdump'] = rtmpdump_version()

3750

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3751

exe_str = ', '.join(

3752

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3753

) or 'none'

3754

write_debug('exe versions: %s' % exe_str)

3755

3756

from .compat.compat_utils import get_package_info

3757

from .dependencies import available_dependencies

3758

3759

write_debug('Optional libraries: %s' % (', '.join(sorted({

3760

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3766

if hasattr(handler, 'proxies'):

3767

proxy_map.update(handler.proxies)

3768

write_debug(f'Proxy map: {proxy_map}')

3769

3770

# Not implemented

3771

if False and self.params.get('call_home'):

3772

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

3773

write_debug('Public IP address: %s' % ipaddr)

3774

latest_version = self.urlopen(

3775

'https://yt-dl.org/latest/version').read().decode()

3776

if version_tuple(latest_version) > version_tuple(__version__):

3777

self.report_warning(

3778

'You are using an outdated version (newest version: %s)! '

3779

'See https://yt-dl.org/update if you need help updating.' %

3780

latest_version)

3781

3782

def _setup_opener(self):

3783

if hasattr(self, '_opener'):

3784

return

3785

timeout_val = self.params.get('socket_timeout')

3786

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3787

3788

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3789

opts_cookiefile = self.params.get('cookiefile')

3790

opts_proxy = self.params.get('proxy')

3791

3792

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3793

3794

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3795

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3800

else:

3801

proxies = urllib.request.getproxies()

3802

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3803

if 'http' in proxies and 'https' not in proxies:

3804

proxies['https'] = proxies['http']

3805

proxy_handler = PerRequestProxyHandler(proxies)

3806

3807

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3808

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3809

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3810

redirect_handler = YoutubeDLRedirectHandler()

3811

data_handler = urllib.request.DataHandler()

3812

3813

# When passing our own FileHandler instance, build_opener won't add the

3814

# default FileHandler and allows us to disable the file protocol, which

3815

# can be used for malicious purposes (see

3816

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3817

file_handler = urllib.request.FileHandler()

3818

3819

def file_open(*args, **kwargs):

3820

raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3821

file_handler.file_open = file_open

3822

3823

opener = urllib.request.build_opener(

3824

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3825

3826

# Delete the default user-agent header, which would otherwise apply in

3827

# cases where our custom HTTP handler doesn't come into play

3828

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3829

opener.addheaders = []

3830

self._opener = opener

3831

3832

def encode(self, s):

3833

if isinstance(s, bytes):

3834

return s # Already encoded

3835

3836

try:

3837

return s.encode(self.get_encoding())

3838

except UnicodeEncodeError as err:

3839

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3840

raise

3841

3842

def get_encoding(self):

3843

encoding = self.params.get('encoding')

3844

if encoding is None:

3845

encoding = preferredencoding()

3846

return encoding

3847

3848

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3849

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3850

if overwrite is None:

3851

overwrite = self.params.get('overwrites', True)

3852

if not self.params.get('writeinfojson'):

3853

return False

3854

elif not infofn:

3855

self.write_debug(f'Skipping writing {label} infojson')

3856

return False

3857

elif not self._ensure_dir_exists(infofn):

3858

return None

3859

elif not overwrite and os.path.exists(infofn):

3860

self.to_screen(f'[info] {label.title()} metadata is already present')

3861

return 'exists'

3862

3863

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3864

try:

3865

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3866

return True

3867

except OSError:

3868

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

3869

return None

3870

3871

def _write_description(self, label, ie_result, descfn):

3872

''' Write description and returns True = written, False = skip, None = error '''

3873

if not self.params.get('writedescription'):

3874

return False

3875

elif not descfn:

3876

self.write_debug(f'Skipping writing {label} description')

3877

return False

3878

elif not self._ensure_dir_exists(descfn):

3879

return None

3880

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3881

self.to_screen(f'[info] {label.title()} description is already present')

3882

elif ie_result.get('description') is None:

3883

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3888

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3889

descfile.write(ie_result['description'])

3890

except OSError:

3891

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3896

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3897

ret = []

3898

subtitles = info_dict.get('requested_subtitles')

3899

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3900

# subtitles download errors are already managed as troubles in relevant IE

3901

# that way it will silently go on when used with unsupporting IE

3902

return ret

3903

3904

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3905

if not sub_filename_base:

3906

self.to_screen('[info] Skipping writing video subtitles')

3907

return ret

3908

for sub_lang, sub_info in subtitles.items():

3909

sub_format = sub_info['ext']

3910

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3911

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3912

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3913

if existing_sub:

3914

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3915

sub_info['filepath'] = existing_sub

3916

ret.append((existing_sub, sub_filename_final))

3917

continue

3918

3919

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3920

if sub_info.get('data') is not None:

3921

try:

3922

# Use newline='' to prevent conversion of newline characters

3923

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3924

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3925

subfile.write(sub_info['data'])

3926

sub_info['filepath'] = sub_filename

3927

ret.append((sub_filename, sub_filename_final))

3928

continue

3929

except OSError:

3930

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3935

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3936

self.dl(sub_filename, sub_copy, subtitle=True)

3937

sub_info['filepath'] = sub_filename

3938

ret.append((sub_filename, sub_filename_final))

3939

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3940

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

3941

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

3942

if not self.params.get('ignoreerrors'):

3943

self.report_error(msg)

3944

raise DownloadError(msg)

3945

self.report_warning(msg)

3946

return ret

3947

3948

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3949

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3950

write_all = self.params.get('write_all_thumbnails', False)

3951

thumbnails, ret = [], []

3952

if write_all or self.params.get('writethumbnail', False):

3953

thumbnails = info_dict.get('thumbnails') or []

3954

multiple = write_all and len(thumbnails) > 1

3955

3956

if thumb_filename_base is None:

3957

thumb_filename_base = filename

3958

if thumbnails and not thumb_filename_base:

3959

self.write_debug(f'Skipping writing {label} thumbnail')

3960

return ret

3961

3962

for idx, t in list(enumerate(thumbnails))[::-1]:

3963

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3964

thumb_display_id = f'{label} thumbnail {t["id"]}'

3965

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3966

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3967

3968

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

3969

if existing_thumb:

3970

self.to_screen('[info] %s is already present' % (

3971

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3972

t['filepath'] = existing_thumb

3973

ret.append((existing_thumb, thumb_filename_final))

3974

else:

3975

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3976

try:

3977

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

3978

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3979

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3980

shutil.copyfileobj(uf, thumbf)

3981

ret.append((thumb_filename, thumb_filename_final))

3982

t['filepath'] = thumb_filename

3983

except network_exceptions as err:

3984

thumbnails.pop(idx)

3985

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3986

if ret and not write_all:

3987

break

3988

return ret