jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	import collections
	2	import contextlib
	3	import datetime
	4	import errno
	5	import fileinput
	6	import functools
	7	import io
	8	import itertools
	9	import json
	10	import locale
	11	import operator
	12	import os
	13	import random
	14	import re
	15	import shutil
	16	import subprocess
	17	import sys
	18	import tempfile
	19	import time
	20	import tokenize
	21	import traceback
	22	import unicodedata
	23	import urllib.request
	24	from string import ascii_letters
	25
	26	from .cache import Cache
	27	from .compat import compat_os_name, compat_shlex_quote
	28	from .cookies import load_cookies
	29	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	30	from .downloader.rtmp import rtmpdump_version
	31	from .extractor import gen_extractor_classes, get_info_extractor
	32	from .extractor.openload import PhantomJSwrapper
	33	from .minicurses import format_text
	34	from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
	35	from .postprocessor import (
	36	EmbedThumbnailPP,
	37	FFmpegFixupDuplicateMoovPP,
	38	FFmpegFixupDurationPP,
	39	FFmpegFixupM3u8PP,
	40	FFmpegFixupM4aPP,
	41	FFmpegFixupStretchedPP,
	42	FFmpegFixupTimestampPP,
	43	FFmpegMergerPP,
	44	FFmpegPostProcessor,
	45	FFmpegVideoConvertorPP,
	46	MoveFilesAfterDownloadPP,
	47	get_postprocessor,
	48	)
	49	from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
	50	from .update import detect_variant
	51	from .utils import (
	52	DEFAULT_OUTTMPL,
	53	IDENTITY,
	54	LINK_TEMPLATES,
	55	MEDIA_EXTENSIONS,
	56	NO_DEFAULT,
	57	NUMBER_RE,
	58	OUTTMPL_TYPES,
	59	POSTPROCESS_WHEN,
	60	STR_FORMAT_RE_TMPL,
	61	STR_FORMAT_TYPES,
	62	ContentTooShortError,
	63	DateRange,
	64	DownloadCancelled,
	65	DownloadError,
	66	EntryNotInPlaylist,
	67	ExistingVideoReached,
	68	ExtractorError,
	69	GeoRestrictedError,
	70	HEADRequest,
	71	ISO3166Utils,
	72	LazyList,
	73	MaxDownloadsReached,
	74	Namespace,
	75	PagedList,
	76	PerRequestProxyHandler,
	77	PlaylistEntries,
	78	Popen,
	79	PostProcessingError,
	80	ReExtractInfo,
	81	RejectedVideoReached,
	82	SameFileError,
	83	UnavailableVideoError,
	84	UserNotLive,
	85	YoutubeDLCookieProcessor,
	86	YoutubeDLHandler,
	87	YoutubeDLRedirectHandler,
	88	age_restricted,
	89	args_to_str,
	90	bug_reports_message,
	91	date_from_str,
	92	determine_ext,
	93	determine_protocol,
	94	encode_compat_str,
	95	encodeFilename,
	96	error_to_compat_str,
	97	escapeHTML,
	98	expand_path,
	99	filter_dict,
	100	float_or_none,
	101	format_bytes,
	102	format_decimal_suffix,
	103	format_field,
	104	formatSeconds,
	105	get_compatible_ext,
	106	get_domain,
	107	int_or_none,
	108	iri_to_uri,
	109	join_nonempty,
	110	locked_file,
	111	make_archive_id,
	112	make_dir,
	113	make_HTTPS_handler,
	114	merge_headers,
	115	network_exceptions,
	116	number_of_digits,
	117	orderedSet,
	118	parse_filesize,
	119	preferredencoding,
	120	prepend_extension,
	121	register_socks_protocols,
	122	remove_terminal_sequences,
	123	render_table,
	124	replace_extension,
	125	sanitize_filename,
	126	sanitize_path,
	127	sanitize_url,
	128	sanitized_Request,
	129	std_headers,
	130	str_or_none,
	131	strftime_or_none,
	132	subtitles_filename,
	133	supports_terminal_sequences,
	134	system_identifier,
	135	timetuple_from_msec,
	136	to_high_limit_path,
	137	traverse_obj,
	138	try_call,
	139	try_get,
	140	url_basename,
	141	variadic,
	142	version_tuple,
	143	windows_enable_vt_mode,
	144	write_json_file,
	145	write_string,
	146	)
	147	from .version import RELEASE_GIT_HEAD, VARIANT, __version__
	148
	149	if compat_os_name == 'nt':
	150	import ctypes
	151
	152
	153	class YoutubeDL:
	154	"""YoutubeDL class.
	155
	156	YoutubeDL objects are the ones responsible of downloading the
	157	actual video file and writing it to disk if the user has requested
	158	it, among some other tasks. In most cases there should be one per
	159	program. As, given a video URL, the downloader doesn't know how to
	160	extract all the needed information, task that InfoExtractors do, it
	161	has to pass the URL to one of them.
	162
	163	For this, YoutubeDL objects have a method that allows
	164	InfoExtractors to be registered in a given order. When it is passed
	165	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	166	finds that reports being able to handle it. The InfoExtractor extracts
	167	all the information about the video or videos the URL refers to, and
	168	YoutubeDL process the extracted information, possibly using a File
	169	Downloader to download the video.
	170
	171	YoutubeDL objects accept a lot of parameters. In order not to saturate
	172	the object constructor with arguments, it receives a dictionary of
	173	options instead. These options are available through the params
	174	attribute for the InfoExtractors to use. The YoutubeDL also
	175	registers itself as the downloader in charge for the InfoExtractors
	176	that are added to it, so this is a "mutual registration".
	177
	178	Available options:
	179
	180	username: Username for authentication purposes.
	181	password: Password for authentication purposes.
	182	videopassword: Password for accessing a video.
	183	ap_mso: Adobe Pass multiple-system operator identifier.
	184	ap_username: Multiple-system operator account username.
	185	ap_password: Multiple-system operator account password.
	186	usenetrc: Use netrc for authentication instead.
	187	verbose: Print additional info to stdout.
	188	quiet: Do not print messages to stdout.
	189	no_warnings: Do not print out anything for warnings.
	190	forceprint: A dict with keys WHEN mapped to a list of templates to
	191	print to stdout. The allowed keys are video or any of the
	192	items in utils.POSTPROCESS_WHEN.
	193	For compatibility, a single list is also accepted
	194	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	195	a list of tuples with (template, filename)
	196	forcejson: Force printing info_dict as JSON.
	197	dump_single_json: Force printing the info_dict of the whole playlist
	198	(or video) as a single JSON line.
	199	force_write_download_archive: Force writing download archive regardless
	200	of 'skip_download' or 'simulate'.
	201	simulate: Do not download the video files. If unset (or None),
	202	simulate only if listsubtitles, listformats or list_thumbnails is used
	203	format: Video format code. see "FORMAT SELECTION" for more details.
	204	You can also pass a function. The function takes 'ctx' as
	205	argument and returns the formats to download.
	206	See "build_format_selector" for an implementation
	207	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	208	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	209	extracting metadata even if the video is not actually
	210	available for download (experimental)
	211	format_sort: A list of fields by which to sort the video formats.
	212	See "Sorting Formats" for more details.
	213	format_sort_force: Force the given format_sort. see "Sorting Formats"
	214	for more details.
	215	prefer_free_formats: Whether to prefer video formats with free containers
	216	over non-free ones of same quality.
	217	allow_multiple_video_streams: Allow multiple video streams to be merged
	218	into a single file
	219	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	220	into a single file
	221	check_formats Whether to test if the formats are downloadable.
	222	Can be True (check all), False (check none),
	223	'selected' (check selected formats),
	224	or None (check only if requested by extractor)
	225	paths: Dictionary of output paths. The allowed keys are 'home'
	226	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	227	outtmpl: Dictionary of templates for output names. Allowed keys
	228	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	229	For compatibility with youtube-dl, a single string can also be used
	230	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	231	restrictfilenames: Do not allow "&" and spaces in file names
	232	trim_file_name: Limit length of filename (extension excluded)
	233	windowsfilenames: Force the filenames to be windows compatible
	234	ignoreerrors: Do not stop on download/postprocessing errors.
	235	Can be 'only_download' to ignore only download errors.
	236	Default is 'only_download' for CLI, but False for API
	237	skip_playlist_after_errors: Number of allowed failures until the rest of
	238	the playlist is skipped
	239	force_generic_extractor: Force downloader to use the generic extractor
	240	overwrites: Overwrite all video and metadata files if True,
	241	overwrite only non-video files if None
	242	and don't overwrite any file if False
	243	For compatibility with youtube-dl,
	244	"nooverwrites" may also be used instead
	245	playlist_items: Specific indices of playlist to download.
	246	playlistrandom: Download playlist items in random order.
	247	lazy_playlist: Process playlist entries as they are received.
	248	matchtitle: Download only matching titles.
	249	rejecttitle: Reject downloads for matching titles.
	250	logger: Log messages to a logging.Logger instance.
	251	logtostderr: Log messages to stderr instead of stdout.
	252	consoletitle: Display progress in console window's titlebar.
	253	writedescription: Write the video description to a .description file
	254	writeinfojson: Write the video description to a .info.json file
	255	clean_infojson: Remove private fields from the infojson
	256	getcomments: Extract video comments. This will not be written to disk
	257	unless writeinfojson is also given
	258	writeannotations: Write the video annotations to a .annotations.xml file
	259	writethumbnail: Write the thumbnail image to a file
	260	allow_playlist_files: Whether to write playlists' description, infojson etc
	261	also to disk when using the 'write*' options
	262	write_all_thumbnails: Write all thumbnail formats to files
	263	writelink: Write an internet shortcut file, depending on the
	264	current platform (.url/.webloc/.desktop)
	265	writeurllink: Write a Windows internet shortcut file (.url)
	266	writewebloclink: Write a macOS internet shortcut file (.webloc)
	267	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	268	writesubtitles: Write the video subtitles to a file
	269	writeautomaticsub: Write the automatically generated subtitles to a file
	270	listsubtitles: Lists all available subtitles for the video
	271	subtitlesformat: The format code for subtitles
	272	subtitleslangs: List of languages of the subtitles to download (can be regex).
	273	The list may contain "all" to refer to all the available
	274	subtitles. The language can be prefixed with a "-" to
	275	exclude it from the requested languages, e.g. ['all', '-live_chat']
	276	keepvideo: Keep the video file after post-processing
	277	daterange: A DateRange object, download only if the upload_date is in the range.
	278	skip_download: Skip the actual download of the video file
	279	cachedir: Location of the cache files in the filesystem.
	280	False to disable filesystem cache.
	281	noplaylist: Download single video instead of a playlist if in doubt.
	282	age_limit: An integer representing the user's age in years.
	283	Unsuitable videos for the given age are skipped.
	284	min_views: An integer representing the minimum view count the video
	285	must have in order to not be skipped.
	286	Videos without view count information are always
	287	downloaded. None for no limit.
	288	max_views: An integer representing the maximum view count.
	289	Videos that are more popular than that are not
	290	downloaded.
	291	Videos without view count information are always
	292	downloaded. None for no limit.
	293	download_archive: File name of a file where all downloads are recorded.
	294	Videos already present in the file are not downloaded
	295	again.
	296	break_on_existing: Stop the download process after attempting to download a
	297	file that is in the archive.
	298	break_on_reject: Stop the download process when encountering a video that
	299	has been filtered out.
	300	break_per_url: Whether break_on_reject and break_on_existing
	301	should act on each input URL as opposed to for the entire queue
	302	cookiefile: File name or text stream from where cookies should be read and dumped to
	303	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	304	name/path from where cookies are loaded, and the name of the
	305	keyring, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
	306	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	307	support RFC 5746 secure renegotiation
	308	nocheckcertificate: Do not verify SSL certificates
	309	client_certificate: Path to client certificate file in PEM format. May include the private key
	310	client_certificate_key: Path to private key file for client certificate
	311	client_certificate_password: Password for client certificate private key, if encrypted.
	312	If not provided and the key is encrypted, yt-dlp will ask interactively
	313	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	314	(Only supported by some extractors)
	315	http_headers: A dictionary of custom headers to be used for all requests
	316	proxy: URL of the proxy server to use
	317	geo_verification_proxy: URL of the proxy to use for IP address verification
	318	on geo-restricted sites.
	319	socket_timeout: Time to wait for unresponsive hosts, in seconds
	320	bidi_workaround: Work around buggy terminals without bidirectional text
	321	support, using fridibi
	322	debug_printtraffic:Print out sent and received HTTP traffic
	323	default_search: Prepend this string if an input url is not valid.
	324	'auto' for elaborate guessing
	325	encoding: Use this encoding instead of the system-specified.
	326	extract_flat: Whether to resolve and process url_results further
	327	* False: Always process (default)
	328	* True: Never process
	329	* 'in_playlist': Do not process inside playlist/multi_video
	330	* 'discard': Always process, but don't return the result
	331	from inside playlist/multi_video
	332	* 'discard_in_playlist': Same as "discard", but only for
	333	playlists (not multi_video)
	334	wait_for_video: If given, wait for scheduled streams to become available.
	335	The value should be a tuple containing the range
	336	(min_secs, max_secs) to wait between retries
	337	postprocessors: A list of dictionaries, each with an entry
	338	* key: The name of the postprocessor. See
	339	yt_dlp/postprocessor/__init__.py for a list.
	340	* when: When to run the postprocessor. Allowed values are
	341	the entries of utils.POSTPROCESS_WHEN
	342	Assumed to be 'post_process' if not given
	343	progress_hooks: A list of functions that get called on download
	344	progress, with a dictionary with the entries
	345	* status: One of "downloading", "error", or "finished".
	346	Check this first and ignore unknown values.
	347	* info_dict: The extracted info_dict
	348
	349	If status is one of "downloading", or "finished", the
	350	following properties may also be present:
	351	* filename: The final filename (always present)
	352	* tmpfilename: The filename we're currently writing to
	353	* downloaded_bytes: Bytes on disk
	354	* total_bytes: Size of the whole file, None if unknown
	355	* total_bytes_estimate: Guess of the eventual file size,
	356	None if unavailable.
	357	* elapsed: The number of seconds since download started.
	358	* eta: The estimated time in seconds, None if unknown
	359	* speed: The download speed in bytes/second, None if
	360	unknown
	361	* fragment_index: The counter of the currently
	362	downloaded video fragment.
	363	* fragment_count: The number of fragments (= individual
	364	files that will be merged)
	365
	366	Progress hooks are guaranteed to be called at least once
	367	(with status "finished") if the download is successful.
	368	postprocessor_hooks: A list of functions that get called on postprocessing
	369	progress, with a dictionary with the entries
	370	* status: One of "started", "processing", or "finished".
	371	Check this first and ignore unknown values.
	372	* postprocessor: Name of the postprocessor
	373	* info_dict: The extracted info_dict
	374
	375	Progress hooks are guaranteed to be called at least twice
	376	(with status "started" and "finished") if the processing is successful.
	377	merge_output_format: "/" separated list of extensions to use when merging formats.
	378	final_ext: Expected final extension; used to detect when the file was
	379	already downloaded and converted
	380	fixup: Automatically correct known faults of the file.
	381	One of:
	382	- "never": do nothing
	383	- "warn": only emit a warning
	384	- "detect_or_warn": check whether we can do anything
	385	about it, warn otherwise (default)
	386	source_address: Client-side IP address to bind to.
	387	sleep_interval_requests: Number of seconds to sleep between requests
	388	during extraction
	389	sleep_interval: Number of seconds to sleep before each download when
	390	used alone or a lower bound of a range for randomized
	391	sleep before each download (minimum possible number
	392	of seconds to sleep) when used along with
	393	max_sleep_interval.
	394	max_sleep_interval:Upper bound of a range for randomized sleep before each
	395	download (maximum possible number of seconds to sleep).
	396	Must only be used along with sleep_interval.
	397	Actual sleep time will be a random float from range
	398	[sleep_interval; max_sleep_interval].
	399	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	400	listformats: Print an overview of available video formats and exit.
	401	list_thumbnails: Print a table of all thumbnails and exit.
	402	match_filter: A function that gets called for every video with the signature
	403	(info_dict, *, incomplete: bool) -> Optional[str]
	404	For backward compatibility with youtube-dl, the signature
	405	(info_dict) -> Optional[str] is also allowed.
	406	- If it returns a message, the video is ignored.
	407	- If it returns None, the video is downloaded.
	408	- If it returns utils.NO_DEFAULT, the user is interactively
	409	asked whether to download the video.
	410	match_filter_func in utils.py is one example for this.
	411	no_color: Do not emit color codes in output.
	412	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	413	HTTP header
	414	geo_bypass_country:
	415	Two-letter ISO 3166-2 country code that will be used for
	416	explicit geographic restriction bypassing via faking
	417	X-Forwarded-For HTTP header
	418	geo_bypass_ip_block:
	419	IP range in CIDR notation that will be used similarly to
	420	geo_bypass_country
	421	external_downloader: A dictionary of protocol keys and the executable of the
	422	external downloader to use for it. The allowed protocols
	423	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	424	Set the value to 'native' to use the native downloader
	425	compat_opts: Compatibility options. See "Differences in default behavior".
	426	The following options do not work when used through the API:
	427	filename, abort-on-error, multistreams, no-live-chat, format-sort
	428	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	429	Refer __init__.py for their implementation
	430	progress_template: Dictionary of templates for progress outputs.
	431	Allowed keys are 'download', 'postprocess',
	432	'download-title' (console title) and 'postprocess-title'.
	433	The template is mapped on a dictionary with keys 'progress' and 'info'
	434	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	435	as argument and returns the time to sleep in seconds.
	436	Allowed keys are 'http', 'fragment', 'file_access'
	437	download_ranges: A callback function that gets called for every video with
	438	the signature (info_dict, ydl) -> Iterable[Section].
	439	Only the returned sections will be downloaded.
	440	Each Section is a dict with the following keys:
	441	* start_time: Start time of the section in seconds
	442	* end_time: End time of the section in seconds
	443	* title: Section title (Optional)
	444	* index: Section number (Optional)
	445	force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
	446	noprogress: Do not print the progress bar
	447	live_from_start: Whether to download livestreams videos from the start
	448
	449	The following parameters are not used by YoutubeDL itself, they are used by
	450	the downloader (see yt_dlp/downloader/common.py):
	451	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	452	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	453	continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	454	external_downloader_args, concurrent_fragment_downloads.
	455
	456	The following options are used by the post processors:
	457	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	458	to the binary or its containing directory.
	459	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	460	and a list of additional command-line arguments for the
	461	postprocessor/executable. The dict can also have "PP+EXE" keys
	462	which are used when the given exe is used by the given PP.
	463	Use 'default' as the name for arguments to passed to all PP
	464	For compatibility with youtube-dl, a single list of args
	465	can also be used
	466
	467	The following options are used by the extractors:
	468	extractor_retries: Number of times to retry for known errors
	469	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	470	hls_split_discontinuity: Split HLS playlists to different formats at
	471	discontinuities such as ad breaks (default: False)
	472	extractor_args: A dictionary of arguments to be passed to the extractors.
	473	See "EXTRACTOR ARGUMENTS" for details.
	474	E.g. {'youtube': {'skip': ['dash', 'hls']}}
	475	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	476
	477	The following options are deprecated and may be removed in the future:
	478
	479	playliststart: - Use playlist_items
	480	Playlist item to start at.
	481	playlistend: - Use playlist_items
	482	Playlist item to end at.
	483	playlistreverse: - Use playlist_items
	484	Download playlist items in reverse order.
	485	forceurl: - Use forceprint
	486	Force printing final URL.
	487	forcetitle: - Use forceprint
	488	Force printing title.
	489	forceid: - Use forceprint
	490	Force printing ID.
	491	forcethumbnail: - Use forceprint
	492	Force printing thumbnail URL.
	493	forcedescription: - Use forceprint
	494	Force printing description.
	495	forcefilename: - Use forceprint
	496	Force printing final filename.
	497	forceduration: - Use forceprint
	498	Force printing duration.
	499	allsubtitles: - Use subtitleslangs = ['all']
	500	Downloads all the subtitles of the video

1

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import random

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

import urllib.request

24

from string import ascii_letters

25

26

from .cache import Cache

27

from .compat import compat_os_name, compat_shlex_quote

28

from .cookies import load_cookies

29

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

30

from .downloader.rtmp import rtmpdump_version

31

from .extractor import gen_extractor_classes, get_info_extractor

32

from .extractor.openload import PhantomJSwrapper

33

from .minicurses import format_text

34

from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors

35

from .postprocessor import (

36

EmbedThumbnailPP,

37

FFmpegFixupDuplicateMoovPP,

38

FFmpegFixupDurationPP,

39

FFmpegFixupM3u8PP,

40

FFmpegFixupM4aPP,

41

FFmpegFixupStretchedPP,

42

FFmpegFixupTimestampPP,

43

FFmpegMergerPP,

44

FFmpegPostProcessor,

45

FFmpegVideoConvertorPP,

46

MoveFilesAfterDownloadPP,

47

get_postprocessor,

48

)

49

from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping

50

from .update import detect_variant

from .utils import (

DEFAULT_OUTTMPL,

IDENTITY,

LINK_TEMPLATES,

MEDIA_EXTENSIONS,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

GeoRestrictedError,

HEADRequest,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

82

SameFileError,

83

UnavailableVideoError,

84

UserNotLive,

85

YoutubeDLCookieProcessor,

86

YoutubeDLHandler,

87

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

bug_reports_message,

date_from_str,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

escapeHTML,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_compatible_ext,

get_domain,

int_or_none,

iri_to_uri,

join_nonempty,

locked_file,

make_archive_id,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

parse_filesize,

preferredencoding,

prepend_extension,

register_socks_protocols,

122

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

system_identifier,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_call,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .version import RELEASE_GIT_HEAD, VARIANT, __version__

148

149

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

157

actual video file and writing it to disk if the user has requested

158

it, among some other tasks. In most cases there should be one per

159

program. As, given a video URL, the downloader doesn't know how to

160

extract all the needed information, task that InfoExtractors do, it

161

has to pass the URL to one of them.

162

163

For this, YoutubeDL objects have a method that allows

164

InfoExtractors to be registered in a given order. When it is passed

165

a URL, the YoutubeDL object handles it to the first InfoExtractor it

166

finds that reports being able to handle it. The InfoExtractor extracts

167

all the information about the video or videos the URL refers to, and

168

YoutubeDL process the extracted information, possibly using a File

169

Downloader to download the video.

170

171

YoutubeDL objects accept a lot of parameters. In order not to saturate

172

the object constructor with arguments, it receives a dictionary of

173

options instead. These options are available through the params

174

attribute for the InfoExtractors to use. The YoutubeDL also

175

registers itself as the downloader in charge for the InfoExtractors

176

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

181

password: Password for authentication purposes.

182

videopassword: Password for accessing a video.

183

ap_mso: Adobe Pass multiple-system operator identifier.

184

ap_username: Multiple-system operator account username.

185

ap_password: Multiple-system operator account password.

186

usenetrc: Use netrc for authentication instead.

187

verbose: Print additional info to stdout.

188

quiet: Do not print messages to stdout.

189

no_warnings: Do not print out anything for warnings.

190

forceprint: A dict with keys WHEN mapped to a list of templates to

191

print to stdout. The allowed keys are video or any of the

192

items in utils.POSTPROCESS_WHEN.

193

For compatibility, a single list is also accepted

194

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

195

a list of tuples with (template, filename)

196

forcejson: Force printing info_dict as JSON.

197

dump_single_json: Force printing the info_dict of the whole playlist

198

(or video) as a single JSON line.

199

force_write_download_archive: Force writing download archive regardless

200

of 'skip_download' or 'simulate'.

201

simulate: Do not download the video files. If unset (or None),

202

simulate only if listsubtitles, listformats or list_thumbnails is used

203

format: Video format code. see "FORMAT SELECTION" for more details.

204

You can also pass a function. The function takes 'ctx' as

205

argument and returns the formats to download.

206

See "build_format_selector" for an implementation

207

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

208

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

209

extracting metadata even if the video is not actually

210

available for download (experimental)

211

format_sort: A list of fields by which to sort the video formats.

212

See "Sorting Formats" for more details.

213

format_sort_force: Force the given format_sort. see "Sorting Formats"

214

for more details.

215

prefer_free_formats: Whether to prefer video formats with free containers

216

over non-free ones of same quality.

217

allow_multiple_video_streams: Allow multiple video streams to be merged

218

into a single file

219

allow_multiple_audio_streams: Allow multiple audio streams to be merged

220

into a single file

221

check_formats Whether to test if the formats are downloadable.

222

Can be True (check all), False (check none),

223

'selected' (check selected formats),

224

or None (check only if requested by extractor)

225

paths: Dictionary of output paths. The allowed keys are 'home'

226

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

227

outtmpl: Dictionary of templates for output names. Allowed keys

228

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

229

For compatibility with youtube-dl, a single string can also be used

230

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

231

restrictfilenames: Do not allow "&" and spaces in file names

232

trim_file_name: Limit length of filename (extension excluded)

233

windowsfilenames: Force the filenames to be windows compatible

234

ignoreerrors: Do not stop on download/postprocessing errors.

235

Can be 'only_download' to ignore only download errors.

236

Default is 'only_download' for CLI, but False for API

237

skip_playlist_after_errors: Number of allowed failures until the rest of

238

the playlist is skipped

239

force_generic_extractor: Force downloader to use the generic extractor

240

overwrites: Overwrite all video and metadata files if True,

241

overwrite only non-video files if None

242

and don't overwrite any file if False

243

For compatibility with youtube-dl,

244

"nooverwrites" may also be used instead

245

playlist_items: Specific indices of playlist to download.

246

playlistrandom: Download playlist items in random order.

247

lazy_playlist: Process playlist entries as they are received.

248

matchtitle: Download only matching titles.

249

rejecttitle: Reject downloads for matching titles.

250

logger: Log messages to a logging.Logger instance.

251

logtostderr: Log messages to stderr instead of stdout.

252

consoletitle: Display progress in console window's titlebar.

253

writedescription: Write the video description to a .description file

254

writeinfojson: Write the video description to a .info.json file

255

clean_infojson: Remove private fields from the infojson

256

getcomments: Extract video comments. This will not be written to disk

257

unless writeinfojson is also given

258

writeannotations: Write the video annotations to a .annotations.xml file

259

writethumbnail: Write the thumbnail image to a file

260

allow_playlist_files: Whether to write playlists' description, infojson etc

261

also to disk when using the 'write*' options

262

write_all_thumbnails: Write all thumbnail formats to files

263

writelink: Write an internet shortcut file, depending on the

264

current platform (.url/.webloc/.desktop)

265

writeurllink: Write a Windows internet shortcut file (.url)

266

writewebloclink: Write a macOS internet shortcut file (.webloc)

267

writedesktoplink: Write a Linux internet shortcut file (.desktop)

268

writesubtitles: Write the video subtitles to a file

269

writeautomaticsub: Write the automatically generated subtitles to a file

270

listsubtitles: Lists all available subtitles for the video

271

subtitlesformat: The format code for subtitles

272

subtitleslangs: List of languages of the subtitles to download (can be regex).

273

The list may contain "all" to refer to all the available

274

subtitles. The language can be prefixed with a "-" to

275

exclude it from the requested languages, e.g. ['all', '-live_chat']

276

keepvideo: Keep the video file after post-processing

277

daterange: A DateRange object, download only if the upload_date is in the range.

278

skip_download: Skip the actual download of the video file

279

cachedir: Location of the cache files in the filesystem.

280

False to disable filesystem cache.

281

noplaylist: Download single video instead of a playlist if in doubt.

282

age_limit: An integer representing the user's age in years.

283

Unsuitable videos for the given age are skipped.

284

min_views: An integer representing the minimum view count the video

285

must have in order to not be skipped.

286

Videos without view count information are always

287

downloaded. None for no limit.

288

max_views: An integer representing the maximum view count.

289

Videos that are more popular than that are not

290

downloaded.

291

Videos without view count information are always

292

downloaded. None for no limit.

293

download_archive: File name of a file where all downloads are recorded.

294

Videos already present in the file are not downloaded

295

again.

296

break_on_existing: Stop the download process after attempting to download a

297

file that is in the archive.

298

break_on_reject: Stop the download process when encountering a video that

299

has been filtered out.

300

break_per_url: Whether break_on_reject and break_on_existing

301

should act on each input URL as opposed to for the entire queue

302

cookiefile: File name or text stream from where cookies should be read and dumped to

303

cookiesfrombrowser: A tuple containing the name of the browser, the profile

304

name/path from where cookies are loaded, and the name of the

305

keyring, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')

306

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

307

support RFC 5746 secure renegotiation

308

nocheckcertificate: Do not verify SSL certificates

309

client_certificate: Path to client certificate file in PEM format. May include the private key

310

client_certificate_key: Path to private key file for client certificate

311

client_certificate_password: Password for client certificate private key, if encrypted.

312

If not provided and the key is encrypted, yt-dlp will ask interactively

313

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

314

(Only supported by some extractors)

315

http_headers: A dictionary of custom headers to be used for all requests

316

proxy: URL of the proxy server to use

317

geo_verification_proxy: URL of the proxy to use for IP address verification

318

on geo-restricted sites.

319

socket_timeout: Time to wait for unresponsive hosts, in seconds

320

bidi_workaround: Work around buggy terminals without bidirectional text

321

support, using fridibi

322

debug_printtraffic:Print out sent and received HTTP traffic

323

default_search: Prepend this string if an input url is not valid.

324

'auto' for elaborate guessing

325

encoding: Use this encoding instead of the system-specified.

326

extract_flat: Whether to resolve and process url_results further

327

* False: Always process (default)

328

* True: Never process

329

* 'in_playlist': Do not process inside playlist/multi_video

330

* 'discard': Always process, but don't return the result

331

from inside playlist/multi_video

332

* 'discard_in_playlist': Same as "discard", but only for

333

playlists (not multi_video)

334

wait_for_video: If given, wait for scheduled streams to become available.

335

The value should be a tuple containing the range

336

(min_secs, max_secs) to wait between retries

337

postprocessors: A list of dictionaries, each with an entry

338

* key: The name of the postprocessor. See

339

yt_dlp/postprocessor/__init__.py for a list.

340

* when: When to run the postprocessor. Allowed values are

341

the entries of utils.POSTPROCESS_WHEN

342

Assumed to be 'post_process' if not given

343

progress_hooks: A list of functions that get called on download

344

progress, with a dictionary with the entries

345

* status: One of "downloading", "error", or "finished".

346

Check this first and ignore unknown values.

347

* info_dict: The extracted info_dict

348

349

If status is one of "downloading", or "finished", the

350

following properties may also be present:

351

* filename: The final filename (always present)

352

* tmpfilename: The filename we're currently writing to

353

* downloaded_bytes: Bytes on disk

354

* total_bytes: Size of the whole file, None if unknown

355

* total_bytes_estimate: Guess of the eventual file size,

356

None if unavailable.

357

* elapsed: The number of seconds since download started.

358

* eta: The estimated time in seconds, None if unknown

359

* speed: The download speed in bytes/second, None if

360

unknown

361

* fragment_index: The counter of the currently

362

downloaded video fragment.

363

* fragment_count: The number of fragments (= individual

364

files that will be merged)

365

366

Progress hooks are guaranteed to be called at least once

367

(with status "finished") if the download is successful.

368

postprocessor_hooks: A list of functions that get called on postprocessing

369

progress, with a dictionary with the entries

370

* status: One of "started", "processing", or "finished".

371

Check this first and ignore unknown values.

372

* postprocessor: Name of the postprocessor

373

* info_dict: The extracted info_dict

374

375

Progress hooks are guaranteed to be called at least twice

376

(with status "started" and "finished") if the processing is successful.

377

merge_output_format: "/" separated list of extensions to use when merging formats.

378

final_ext: Expected final extension; used to detect when the file was

379

already downloaded and converted

380

fixup: Automatically correct known faults of the file.

381

One of:

382

- "never": do nothing

383

- "warn": only emit a warning

384

- "detect_or_warn": check whether we can do anything

385

about it, warn otherwise (default)

386

source_address: Client-side IP address to bind to.

387

sleep_interval_requests: Number of seconds to sleep between requests

388

during extraction

389

sleep_interval: Number of seconds to sleep before each download when

390

used alone or a lower bound of a range for randomized

391

sleep before each download (minimum possible number

392

of seconds to sleep) when used along with

393

max_sleep_interval.

394

max_sleep_interval:Upper bound of a range for randomized sleep before each

395

download (maximum possible number of seconds to sleep).

396

Must only be used along with sleep_interval.

397

Actual sleep time will be a random float from range

398

[sleep_interval; max_sleep_interval].

399

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

400

listformats: Print an overview of available video formats and exit.

401

list_thumbnails: Print a table of all thumbnails and exit.

402

match_filter: A function that gets called for every video with the signature

403

(info_dict, *, incomplete: bool) -> Optional[str]

404

For backward compatibility with youtube-dl, the signature

405

(info_dict) -> Optional[str] is also allowed.

406

- If it returns a message, the video is ignored.

407

- If it returns None, the video is downloaded.

408

- If it returns utils.NO_DEFAULT, the user is interactively

409

asked whether to download the video.

410

match_filter_func in utils.py is one example for this.

411

no_color: Do not emit color codes in output.

412

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

413

HTTP header

414

geo_bypass_country:

415

Two-letter ISO 3166-2 country code that will be used for

416

explicit geographic restriction bypassing via faking

417

X-Forwarded-For HTTP header

418

geo_bypass_ip_block:

419

IP range in CIDR notation that will be used similarly to

420

geo_bypass_country

421

external_downloader: A dictionary of protocol keys and the executable of the

422

external downloader to use for it. The allowed protocols

423

424

Set the value to 'native' to use the native downloader

425

compat_opts: Compatibility options. See "Differences in default behavior".

426

The following options do not work when used through the API:

427

filename, abort-on-error, multistreams, no-live-chat, format-sort

428

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

429

Refer __init__.py for their implementation

430

progress_template: Dictionary of templates for progress outputs.

431

Allowed keys are 'download', 'postprocess',

432

'download-title' (console title) and 'postprocess-title'.

433

The template is mapped on a dictionary with keys 'progress' and 'info'

434

retry_sleep_functions: Dictionary of functions that takes the number of attempts

435

as argument and returns the time to sleep in seconds.

436

Allowed keys are 'http', 'fragment', 'file_access'

437

download_ranges: A callback function that gets called for every video with

438

the signature (info_dict, ydl) -> Iterable[Section].

439

Only the returned sections will be downloaded.

440

Each Section is a dict with the following keys:

441

* start_time: Start time of the section in seconds

442

* end_time: End time of the section in seconds

443

* title: Section title (Optional)

444

* index: Section number (Optional)

445

force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts

446

noprogress: Do not print the progress bar

447

live_from_start: Whether to download livestreams videos from the start

448

449

The following parameters are not used by YoutubeDL itself, they are used by

450

the downloader (see yt_dlp/downloader/common.py):

451

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

452

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

453

continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

454

external_downloader_args, concurrent_fragment_downloads.

455

456

The following options are used by the post processors:

457

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

458

to the binary or its containing directory.

459

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

460

and a list of additional command-line arguments for the

461

postprocessor/executable. The dict can also have "PP+EXE" keys

462

which are used when the given exe is used by the given PP.

463

Use 'default' as the name for arguments to passed to all PP

464

For compatibility with youtube-dl, a single list of args

465

can also be used

466

467

The following options are used by the extractors:

468

extractor_retries: Number of times to retry for known errors

469

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

470

hls_split_discontinuity: Split HLS playlists to different formats at

471

discontinuities such as ad breaks (default: False)

472

extractor_args: A dictionary of arguments to be passed to the extractors.

473

See "EXTRACTOR ARGUMENTS" for details.

474

E.g. {'youtube': {'skip': ['dash', 'hls']}}

475

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

476

477

The following options are deprecated and may be removed in the future:

478

479

playliststart: - Use playlist_items

480

Playlist item to start at.

481

playlistend: - Use playlist_items

482

Playlist item to end at.

483

playlistreverse: - Use playlist_items

484

Download playlist items in reverse order.

485

forceurl: - Use forceprint

486

Force printing final URL.

487

forcetitle: - Use forceprint

488

Force printing title.

489

forceid: - Use forceprint

490

Force printing ID.

491

forcethumbnail: - Use forceprint

492

Force printing thumbnail URL.

493

forcedescription: - Use forceprint

494

Force printing description.

495

forcefilename: - Use forceprint

496

Force printing final filename.

497

forceduration: - Use forceprint

498

Force printing duration.

499

allsubtitles: - Use subtitleslangs = ['all']

500

Downloads all the subtitles of the video

501

(requires writesubtitles or writeautomaticsub)

502

include_ads: - Doesn't work

503

Download ads as well

504

call_home: - Not implemented

505

Boolean, true iff we are allowed to contact the

506

yt-dlp servers for debugging.

507

post_hooks: - Register a custom postprocessor

508

A list of functions that get called as the final step

509

for each video file, after all postprocessors have been

510

called. The filename will be passed as the only argument.

511

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

512

Use the native HLS downloader instead of ffmpeg/avconv

513

if True, otherwise use ffmpeg/avconv if False, otherwise

514

use downloader suggested by extractor if None.

515

prefer_ffmpeg: - avconv support is deprecated

516

If False, use avconv instead of ffmpeg if both are available,

517

otherwise prefer ffmpeg.

518

youtube_include_dash_manifest: - Use extractor_args

519

If True (default), DASH manifests and related

520

data will be downloaded and processed by extractor.

521

You can reduce network I/O by disabling it if you don't

522

care about DASH. (only for youtube)

523

youtube_include_hls_manifest: - Use extractor_args

524

If True (default), HLS manifests and related

525

data will be downloaded and processed by extractor.

526

You can reduce network I/O by disabling it if you don't

527

care about HLS. (only for youtube)

"""

_NUMERIC_FIELDS = {

'width', 'height', 'asr', 'audio_channels', 'fps',

532

'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',

533

'timestamp', 'release_timestamp',

534

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

535

'average_rating', 'comment_count', 'age_limit',

536

'start_time', 'end_time',

537

'chapter_number', 'season_number', 'episode_number',

538

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

543

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

544

'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',

545

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',

546

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

547

'preference', 'language', 'language_preference', 'quality', 'source_preference',

548

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',

549

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

550

}

551

_format_selection_exts = {

552

'audio': set(MEDIA_EXTENSIONS.common_audio),

553

'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),

554

'storyboards': set(MEDIA_EXTENSIONS.storyboards),

555

}

556

557

def __init__(self, params=None, auto_init=True):

558

"""Create a FileDownloader object with the given options.

559

@param auto_init Whether to load the default extractors and print header (if verbose).

560

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

567

self._pps = {k: [] for k in POSTPROCESS_WHEN}

568

self._printed_messages = set()

569

self._first_webpage_request = True

570

self._post_hooks = []

571

self._progress_hooks = []

572

self._postprocessor_hooks = []

573

self._download_retcode = 0

574

self._num_downloads = 0

575

self._num_videos = 0

576

self._playlist_level = 0

577

self._playlist_urls = set()

578

self.cache = Cache(self)

579

580

windows_enable_vt_mode()

581

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

582

self._out_files = Namespace(

583

out=stdout,

584

error=sys.stderr,

585

screen=sys.stderr if self.params.get('quiet') else stdout,

586

console=None if compat_os_name == 'nt' else next(

587

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

588

)

589

self._allow_colors = Namespace(**{

590

type_: not self.params.get('no_color') and supports_terminal_sequences(stream)

591

for type_, stream in self._out_files.items_ if type_ != 'console'

592

})

593

594

# The code is left like this to be reused for future deprecations

595

MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)

596

current_version = sys.version_info[:2]

597

if current_version < MIN_RECOMMENDED:

598

msg = ('Support for Python version %d.%d has been deprecated. '

599

'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'

600

'\n You will no longer receive updates on this version')

601

if current_version < MIN_SUPPORTED:

602

msg = 'Python version %d.%d is no longer supported'

603

self.deprecation_warning(

604

f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))

605

606

if self.params.get('allow_unplayable_formats'):

607

self.report_warning(

608

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

609

'This is a developer option intended for debugging. \n'

610

' If you experience any issues while using this option, '

611

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

612

613

def check_deprecated(param, option, suggestion):

614

if self.params.get(param) is not None:

615

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

620

if self.params.get('geo_verification_proxy') is None:

621

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

622

623

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

624

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

625

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

626

627

for msg in self.params.get('_warnings', []):

628

self.report_warning(msg)

629

for msg in self.params.get('_deprecation_warnings', []):

630

self.deprecation_warning(msg)

631

632

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

633

if 'list-formats' in self.params['compat_opts']:

634

self.params['listformats_table'] = False

635

636

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

637

# nooverwrites was unnecessarily changed to overwrites

638

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

639

# This ensures compatibility with both keys

640

self.params['overwrites'] = not self.params['nooverwrites']

641

elif self.params.get('overwrites') is None:

642

self.params.pop('overwrites', None)

643

else:

644

self.params['nooverwrites'] = not self.params['overwrites']

645

646

self.params.setdefault('forceprint', {})

647

self.params.setdefault('print_to_file', {})

648

649

# Compatibility with older syntax

650

if not isinstance(params['forceprint'], dict):

651

self.params['forceprint'] = {'video': params['forceprint']}

652

653

if self.params.get('bidi_workaround', False):

654

try:

655

import pty

656

master, slave = pty.openpty()

657

width = shutil.get_terminal_size().columns

658

width_args = [] if width is None else ['-w', str(width)]

659

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

660

try:

661

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

662

except OSError:

663

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

664

self._output_channel = os.fdopen(master, 'rb')

665

except OSError as ose:

666

if ose.errno == errno.ENOENT:

667

self.report_warning(

668

'Could not find fribidi executable, ignoring --bidi-workaround. '

669

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if auto_init:

if auto_init != 'no_verbose_header':

675

self.print_debug_header()

676

self.add_default_info_extractors()

677

678

if (sys.platform != 'win32'

679

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

680

and not self.params.get('restrictfilenames', False)):

681

# Unicode filesystem API will throw errors (#1474, #13027)

682

self.report_warning(

683

'Assuming --restrict-filenames since file system encoding '

684

'cannot encode all characters. '

685

'Set the LC_ALL environment variable to fix this.')

686

self.params['restrictfilenames'] = True

687

688

self._parse_outtmpl()

689

690

# Creating format selector here allows us to catch syntax errors before the extraction

691

self.format_selector = (

692

self.params.get('format') if self.params.get('format') in (None, '-')

693

else self.params['format'] if callable(self.params['format'])

694

else self.build_format_selector(self.params['format']))

695

696

# Set http_headers defaults according to std_headers

697

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

698

699

hooks = {

700

'post_hooks': self.add_post_hook,

701

'progress_hooks': self.add_progress_hook,

702

'postprocessor_hooks': self.add_postprocessor_hook,

703

}

704

for opt, fn in hooks.items():

705

for ph in self.params.get(opt, []):

706

fn(ph)

707

708

for pp_def_raw in self.params.get('postprocessors', []):

709

pp_def = dict(pp_def_raw)

710

when = pp_def.pop('when', 'post_process')

711

self.add_post_processor(

712

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

register_socks_protocols()

717

718

def preload_download_archive(fn):

719

"""Preload the archive, if any is specified"""

720

if fn is None:

721

return False

722

self.write_debug(f'Loading archive file {fn!r}')

723

try:

724

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

725

for line in archive_file:

726

self.archive.add(line.strip())

727

except OSError as ioe:

728

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

735

736

def warn_if_short_id(self, argv):

737

# short YouTube ID starting with dash?

738

idxs = [

739

i for i, a in enumerate(argv)

740

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

745

+ ['--'] + [argv[i] for i in idxs]

746

)

747

self.report_warning(

748

'Long argument string detected. '

749

'Use -- to separate parameters and URLs, like this:\n%s' %

750

args_to_str(correct_argv))

751

752

def add_info_extractor(self, ie):

753

"""Add an InfoExtractor object to the end of the list."""

754

ie_key = ie.ie_key()

755

self._ies[ie_key] = ie

756

if not isinstance(ie, type):

757

self._ies_instances[ie_key] = ie

758

ie.set_downloader(self)

759

760

def _get_info_extractor_class(self, ie_key):

761

ie = self._ies.get(ie_key)

762

if ie is None:

763

ie = get_info_extractor(ie_key)

764

self.add_info_extractor(ie)

765

return ie

766

767

def get_info_extractor(self, ie_key):

768

"""

769

Get an instance of an IE with name ie_key, it will try to get one from

770

the _ies list, if there's no instance it will create a new one and add

771

it to the extractor list.

772

"""

773

ie = self._ies_instances.get(ie_key)

774

if ie is None:

775

ie = get_info_extractor(ie_key)()

776

self.add_info_extractor(ie)

777

return ie

778

779

def add_default_info_extractors(self):

780

"""

781

Add the InfoExtractors returned by gen_extractors to the end of the list

782

"""

783

for ie in gen_extractor_classes():

784

self.add_info_extractor(ie)

785

786

def add_post_processor(self, pp, when='post_process'):

787

"""Add a PostProcessor object to the end of the chain."""

788

assert when in POSTPROCESS_WHEN, f'Invalid when={when}'

789

self._pps[when].append(pp)

790

pp.set_downloader(self)

791

792

def add_post_hook(self, ph):

793

"""Add the post hook"""

794

self._post_hooks.append(ph)

795

796

def add_progress_hook(self, ph):

797

"""Add the download progress hook"""

798

self._progress_hooks.append(ph)

799

800

def add_postprocessor_hook(self, ph):

801

"""Add the postprocessing progress hook"""

802

self._postprocessor_hooks.append(ph)

803

for pps in self._pps.values():

804

for pp in pps:

805

pp.add_progress_hook(ph)

806

807

def _bidi_workaround(self, message):

808

if not hasattr(self, '_output_channel'):

809

return message

810

811

assert hasattr(self, '_output_process')

812

assert isinstance(message, str)

813

line_count = message.count('\n') + 1

814

self._output_process.stdin.write((message + '\n').encode())

815

self._output_process.stdin.flush()

816

res = ''.join(self._output_channel.readline().decode()

817

for _ in range(line_count))

818

return res[:-len('\n')]

819

820

def _write_string(self, message, out=None, only_once=False):

821

if only_once:

822

if message in self._printed_messages:

823

return

824

self._printed_messages.add(message)

825

write_string(message, out=out, encoding=self.params.get('encoding'))

826

827

def to_stdout(self, message, skip_eol=False, quiet=None):

828

"""Print message to stdout"""

829

if quiet is not None:

830

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')

831

if skip_eol is not False:

832

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')

833

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

834

835

def to_screen(self, message, skip_eol=False, quiet=None):

836

"""Print message to screen if not in quiet mode"""

837

if self.params.get('logger'):

838

self.params['logger'].debug(message)

839

return

840

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

841

return

842

self._write_string(

843

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

844

self._out_files.screen)

845

846

def to_stderr(self, message, only_once=False):

847

"""Print message to stderr"""

848

assert isinstance(message, str)

849

if self.params.get('logger'):

850

self.params['logger'].error(message)

851

else:

852

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

853

854

def _send_console_code(self, code):

855

if compat_os_name == 'nt' or not self._out_files.console:

856

return

857

self._write_string(code, self._out_files.console)

858

859

def to_console_title(self, message):

860

if not self.params.get('consoletitle', False):

861

return

862

message = remove_terminal_sequences(message)

863

if compat_os_name == 'nt':

864

if ctypes.windll.kernel32.GetConsoleWindow():

865

# c_wchar_p() might not be necessary if `message` is

866

# already of type unicode()

867

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

868

else:

869

self._send_console_code(f'\033]0;{message}\007')

870

871

def save_console_title(self):

872

if not self.params.get('consoletitle') or self.params.get('simulate'):

873

return

874

self._send_console_code('\033[22;0t') # Save the title on stack

875

876

def restore_console_title(self):

877

if not self.params.get('consoletitle') or self.params.get('simulate'):

878

return

879

self._send_console_code('\033[23;0t') # Restore the title from stack

880

881

def __enter__(self):

882

self.save_console_title()

883

return self

884

885

def __exit__(self, *args):

886

self.restore_console_title()

887

888

if self.params.get('cookiefile') is not None:

889

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

890

891

def trouble(self, message=None, tb=None, is_error=True):

892

"""Determine action to take when a download problem appears.

893

894

Depending on if the downloader has been configured to ignore

895

download errors or not, this method may throw an exception or

896

not when errors are found, after printing the message.

897

898

@param tb If given, is additional traceback information

899

@param is_error Whether to raise error according to ignorerrors

900

"""

901

if message is not None:

902

self.to_stderr(message)

903

if self.params.get('verbose'):

904

if tb is None:

905

if sys.exc_info()[0]: # if .trouble has been called from an except block

906

tb = ''

907

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

908

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

909

tb += encode_compat_str(traceback.format_exc())

910

else:

911

tb_data = traceback.format_list(traceback.extract_stack())

912

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

918

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

919

exc_info = sys.exc_info()[1].exc_info

920

else:

921

exc_info = sys.exc_info()

922

raise DownloadError(message, exc_info)

923

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

WARNING='yellow',

SUPPRESS='light black',

934

)

935

936

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

941

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

942

text = text.encode(encoding, 'ignore').decode(encoding)

943

if fallback is not None and text != original_text:

944

text = fallback

945

return format_text(text, f) if allow_colors else text if fallback is None else fallback

946

947

def _format_out(self, *args, **kwargs):

948

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

949

950

def _format_screen(self, *args, **kwargs):

951

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

952

953

def _format_err(self, *args, **kwargs):

954

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

955

956

def report_warning(self, message, only_once=False):

957

'''

958

Print the message to stderr, it will be prefixed with 'WARNING:'

959

If stderr is a tty file the 'WARNING:' will be colored

960

'''

961

if self.params.get('logger') is not None:

962

self.params['logger'].warning(message)

963

else:

964

if self.params.get('no_warnings'):

965

return

966

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

967

968

def deprecation_warning(self, message):

969

if self.params.get('logger') is not None:

970

self.params['logger'].warning(f'DeprecationWarning: {message}')

971

else:

972

self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)

973

974

def report_error(self, message, *args, **kwargs):

975

'''

976

Do the same as trouble, but prefixes the message with 'ERROR:', colored

977

in red if stderr is a tty file.

978

'''

979

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

980

981

def write_debug(self, message, only_once=False):

982

'''Log debug message or Print message to stderr'''

983

if not self.params.get('verbose', False):

984

return

985

message = f'[debug] {message}'

986

if self.params.get('logger'):

987

self.params['logger'].debug(message)

988

else:

989

self.to_stderr(message, only_once)

990

991

def report_file_already_downloaded(self, file_name):

992

"""Report file has already been fully downloaded."""

993

try:

994

self.to_screen('[download] %s has already been downloaded' % file_name)

995

except UnicodeEncodeError:

996

self.to_screen('[download] The file has already been downloaded')

997

998

def report_file_delete(self, file_name):

999

"""Report that existing file will be deleted."""

1000

try:

1001

self.to_screen('Deleting existing file %s' % file_name)

1002

except UnicodeEncodeError:

1003

self.to_screen('Deleting existing file')

1004

1005

def raise_no_formats(self, info, forced=False, *, msg=None):

1006

has_drm = info.get('_has_drm')

1007

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

1008

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

1009

if forced or not ignored:

1010

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

1011

expected=has_drm or ignored or expected)

1012

else:

1013

self.report_warning(msg)

1014

1015

def parse_outtmpl(self):

1016

self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')

1017

self._parse_outtmpl()

1018

return self.params['outtmpl']

1019

1020

def _parse_outtmpl(self):

1021

sanitize = IDENTITY

1022

if self.params.get('restrictfilenames'): # Remove spaces in the default template

1023

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1024

1025

outtmpl = self.params.setdefault('outtmpl', {})

1026

if not isinstance(outtmpl, dict):

1027

self.params['outtmpl'] = outtmpl = {'default': outtmpl}

1028

outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})

1029

1030

def get_output_path(self, dir_type='', filename=None):

1031

paths = self.params.get('paths', {})

1032

assert isinstance(paths, dict)

1033

path = os.path.join(

1034

expand_path(paths.get('home', '').strip()),

1035

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1036

filename or '')

1037

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1038

1039

@staticmethod

1040

def _outtmpl_expandpath(outtmpl):

1041

# expand_path translates '%%' into '%' and '$$' into '$'

1042

# correspondingly that is not what we want since we need to keep

1043

# '%%' intact for template dict substitution step. Working around

1044

# with boundary-alike separator hack.

1045

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

1046

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1047

1048

# outtmpl should be expand_path'ed before template dict substitution

1049

# because meta fields may contain env variables we don't want to

1050

# be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and

1051

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1052

return expand_path(outtmpl).replace(sep, '')

1053

1054

@staticmethod

1055

def escape_outtmpl(outtmpl):

1056

''' Escape any remaining strings like %s, %abc% etc. '''

1057

return re.sub(

1058

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1059

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1064

''' @return None or Exception object '''

1065

outtmpl = re.sub(

1066

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),

1067

lambda mobj: f'{mobj.group(0)[:-1]}s',

1068

cls._outtmpl_expandpath(outtmpl))

1069

try:

1070

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1071

return None

1072

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1077

info_dict = dict(info_dict)

1078

info_dict.pop('__postprocessors', None)

1079

info_dict.pop('__pending_error', None)

1080

return info_dict

1081

1082

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1083

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1084

@param sanitize Whether to sanitize the output as a filename.

1085

For backward compatibility, a function can also be passed

1086

"""

1087

1088

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1089

1090

info_dict = self._copy_infodict(info_dict)

1091

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1092

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1093

if info_dict.get('duration', None) is not None

1094

else None)

1095

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1096

info_dict['video_autonumber'] = self._num_videos

1097

if info_dict.get('resolution') is None:

1098

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1099

1100

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1101

# of %(field)s to %(field)0Nd for backward compatibility

1102

field_size_compat_map = {

1103

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1104

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1105

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1115

# where keys (except first) can be string, int or slice

1116

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

1117

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1118

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1119

INTERNAL_FORMAT_RE = re.compile(rf'''(?x)

1120

(?P<negate>-)?

1121

(?P<fields>{FIELD_RE})

1122

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1123

(?:>(?P<strf_format>.+?))?

1124

(?P<remaining>

1125

(?P<alternate>(?<!\\),[^|&)]+)?

1126

(?:&(?P<replacement>.*?))?

1127

(?:\|(?P<default>.*?))?

1128

)$''')

1129

1130

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

1135

1136

def get_value(mdict):

1137

# Object traversal

1138

value = _traverse_infodict(mdict['fields'])

1139

# Negative

1140

if mdict['negate']:

1141

value = float_or_none(value)

1142

if value is not None:

1143

value *= -1

1144

# Do maths

1145

offset_key = mdict['maths']

1146

if offset_key:

1147

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1152

offset_key).group(0)

1153

offset_key = offset_key[len(item):]

1154

if operator is None:

1155

operator = MATH_FUNCTIONS[item]

1156

continue

1157

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1158

offset = float_or_none(item)

1159

if offset is None:

1160

offset = float_or_none(_traverse_infodict(item))

1161

try:

1162

value = operator(value, multiplier * offset)

1163

except (TypeError, ZeroDivisionError):

1164

return None

1165

operator = None

1166

# Datetime formatting

1167

if mdict['strf_format']:

1168

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

1169

1170

# XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485

1171

if sanitize and value == '':

value = None

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1176

1177

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1178

return sanitize_filename(str(value), restricted=restricted, is_id=(

1179

bool(re.search(r'(^|[_.])id(\.|$)', key))

1180

if 'filename-sanitization' in self.params['compat_opts']

1181

else NO_DEFAULT))

1182

1183

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1184

sanitize = bool(sanitize)

1185

1186

def _dumpjson_default(obj):

1187

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1192

if not outer_mobj.group('has_key'):

1193

return outer_mobj.group(0)

1194

key = outer_mobj.group('key')

1195

mobj = re.match(INTERNAL_FORMAT_RE, key)

1196

initial_field = mobj.group('fields') if mobj else ''

1197

value, replacement, default = None, None, na

1198

while mobj:

1199

mobj = mobj.groupdict()

1200

default = mobj['default'] if mobj['default'] is not None else default

1201

value = get_value(mobj)

1202

replacement = mobj['replacement']

1203

if value is None and mobj['alternate']:

1204

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1209

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1210

fmt = f'0{field_size_compat_map[key]:d}d'

1211

1212

value = default if value is None else value if replacement is None else replacement

1213

1214

flags = outer_mobj.group('conversion') or ''

1215

str_fmt = f'{fmt[:-1]}s'

1216

if fmt[-1] == 'l': # list

1217

delim = '\n' if '#' in flags else ', '

1218

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1219

elif fmt[-1] == 'j': # json

1220

value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt

1221

elif fmt[-1] == 'h': # html

1222

value, fmt = escapeHTML(value), str_fmt

1223

elif fmt[-1] == 'q': # quoted

1224

value = map(str, variadic(value) if '#' in flags else [value])

1225

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1226

elif fmt[-1] == 'B': # bytes

1227

value = f'%{str_fmt}'.encode() % str(value).encode()

1228

value, fmt = value.decode('utf-8', 'ignore'), 's'

1229

elif fmt[-1] == 'U': # unicode normalized

1230

value, fmt = unicodedata.normalize(

1231

# "+" = compatibility equivalence, "#" = NFD

1232

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1233

value), str_fmt

1234

elif fmt[-1] == 'D': # decimal suffix

1235

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1236

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1237

factor=1024 if '#' in flags else 1000)

1238

elif fmt[-1] == 'S': # filename sanitization

1239

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1240

elif fmt[-1] == 'c':

1241

if value:

1242

value = str(value)[0]

1243

else:

1244

fmt = str_fmt

1245

elif fmt[-1] not in 'rs': # numeric

1246

value = float_or_none(value)

1247

if value is None:

1248

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1253

# So we convert it to repr first

1254

value, fmt = repr(value), str_fmt

1255

if fmt[-1] in 'csr':

1256

value = sanitizer(initial_field, value)

1257

1258

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1259

TMPL_DICT[key] = value

1260

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1261

1262

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1263

1264

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1265

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1266

return self.escape_outtmpl(outtmpl) % info_dict

1267

1268

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1269

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1270

if outtmpl is None:

1271

outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])

1272

try:

1273

outtmpl = self._outtmpl_expandpath(outtmpl)

1274

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1279

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1280

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1281

filename = replace_extension(filename, ext, final_ext)

1282

elif tmpl_type:

1283

force_ext = OUTTMPL_TYPES[tmpl_type]

1284

if force_ext:

1285

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1286

1287

# https://github.com/blackjack4494/youtube-dlc/issues/85

1288

trim_file_name = self.params.get('trim_file_name', False)

1289

if trim_file_name:

1290

no_ext, *ext = filename.rsplit('.', 2)

1291

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1292

1293

return filename

1294

except ValueError as err:

1295

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1296

return None

1297

1298

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1299

"""Generate the output filename"""

1300

if outtmpl:

1301

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1302

dir_type = None

1303

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1304

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1309

pass

1310

elif filename == '-':

1311

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1312

elif os.path.isabs(filename):

1313

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1314

if filename == '-' or not filename:

1315

return filename

1316

1317

return self.get_output_path(dir_type, filename)

1318

1319

def _match_entry(self, info_dict, incomplete=False, silent=False):

1320

""" Returns None if the file should be downloaded """

1321

1322

video_title = info_dict.get('title', info_dict.get('id', 'entry'))

1323

1324

def check_filter():

1325

if 'title' in info_dict:

1326

# This can happen when we're just evaluating the playlist

1327

title = info_dict['title']

1328

matchtitle = self.params.get('matchtitle', False)

1329

if matchtitle:

1330

if not re.search(matchtitle, title, re.IGNORECASE):

1331

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1332

rejecttitle = self.params.get('rejecttitle', False)

1333

if rejecttitle:

1334

if re.search(rejecttitle, title, re.IGNORECASE):

1335

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1336

date = info_dict.get('upload_date')

1337

if date is not None:

1338

dateRange = self.params.get('daterange', DateRange())

1339

if date not in dateRange:

1340

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1341

view_count = info_dict.get('view_count')

1342

if view_count is not None:

1343

min_views = self.params.get('min_views')

1344

if min_views is not None and view_count < min_views:

1345

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1346

max_views = self.params.get('max_views')

1347

if max_views is not None and view_count > max_views:

1348

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1349

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1350

return 'Skipping "%s" because it is age restricted' % video_title

1351

1352

match_filter = self.params.get('match_filter')

1353

if match_filter is not None:

1354

try:

1355

ret = match_filter(info_dict, incomplete=incomplete)

1356

except TypeError:

1357

# For backward compatibility

1358

ret = None if incomplete else match_filter(info_dict)

1359

if ret is NO_DEFAULT:

1360

while True:

1361

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1362

reply = input(self._format_screen(

1363

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1364

if reply in {'y', ''}:

1365

return None

1366

elif reply == 'n':

1367

return f'Skipping {video_title}'

1368

elif ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1373

reason = '%s has already been recorded in the archive' % video_title

1374

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1375

else:

1376

reason = check_filter()

1377

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1378

if reason is not None:

1379

if not silent:

1380

self.to_screen('[download] ' + reason)

1381

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1387

'''Set the keys from extra_info in info dict if they are missing'''

1388

for key, value in extra_info.items():

1389

info_dict.setdefault(key, value)

1390

1391

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1392

process=True, force_generic_extractor=False):

1393

"""

1394

Return a list with a dictionary for each video extracted.

1395

1396

Arguments:

1397

url -- URL to extract

1398

1399

Keyword arguments:

1400

download -- whether to download videos during extraction

1401

ie_key -- extractor key hint

1402

extra_info -- dictionary containing the extra values to add to each result

1403

process -- whether to resolve all unresolved references (URLs, playlist items),

1404

must be True for download to work.

1405

force_generic_extractor -- force using the generic extractor

1406

"""

1407

1408

if extra_info is None:

1409

extra_info = {}

1410

1411

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1420

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1425

'and will probably not work.')

1426

1427

temp_id = ie.get_temp_id(url)

1428

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1429

self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')

1430

if self.params.get('break_on_existing', False):

1431

raise ExistingVideoReached()

1432

break

1433

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1434

else:

1435

self.report_error('no suitable InfoExtractor for URL %s' % url)

1436

1437

def _handle_extraction_exceptions(func):

1438

@functools.wraps(func)

1439

def wrapper(self, *args, **kwargs):

1440

while True:

1441

try:

1442

return func(self, *args, **kwargs)

1443

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1444

raise

1445

except ReExtractInfo as e:

1446

if e.expected:

1447

self.to_screen(f'{e}; Re-extracting data')

1448

else:

1449

self.to_stderr('\r')

1450

self.report_warning(f'{e}; Re-extracting data')

1451

continue

1452

except GeoRestrictedError as e:

1453

msg = e.msg

1454

if e.countries:

1455

msg += '\nThis video is available in %s.' % ', '.join(

1456

map(ISO3166Utils.short2full, e.countries))

1457

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1458

self.report_error(msg)

1459

except ExtractorError as e: # An error we somewhat expected

1460

self.report_error(str(e), e.format_traceback())

1461

except Exception as e:

1462

if self.params.get('ignoreerrors'):

1463

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result={}):

1470

if (not self.params.get('wait_for_video')

1471

or ie_result.get('_type', 'video') != 'video'

1472

or ie_result.get('formats') or ie_result.get('url')):

1473

return

1474

1475

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

full_msg = f'{msg}\n'

1481

if not self.params.get('noprogress'):

1482

full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'

1483

elif last_msg:

1484

return

1485

self.to_screen(full_msg, skip_eol=True)

1486

last_msg = msg

1487

1488

min_wait, max_wait = self.params.get('wait_for_video')

1489

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1490

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1491

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1492

self.report_warning('Release time of video is not known')

1493

elif ie_result and (diff or 0) <= 0:

1494

self.report_warning('Video should already be available according to extracted info')

1495

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1496

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1497

1498

wait_till = time.time() + diff

1499

try:

1500

while True:

1501

diff = wait_till - time.time()

1502

if diff <= 0:

1503

progress('')

1504

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1505

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1506

time.sleep(1)

1507

except KeyboardInterrupt:

1508

progress('')

1509

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1510

except BaseException as e:

1511

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@_handle_extraction_exceptions

1516

def __extract_info(self, url, ie, download, extra_info, process):

1517

try:

1518

ie_result = ie.extract(url)

1519

except UserNotLive as e:

1520

if process:

1521

if self.params.get('wait_for_video'):

1522

self.report_warning(e)

1523

self._wait_for_video()

1524

raise

1525

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1526

self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')

1527

return

1528

if isinstance(ie_result, list):

1529

# Backwards compatibility: old IE result format

1530

ie_result = {

1531

'_type': 'compat_list',

1532

'entries': ie_result,

1533

}

1534

if extra_info.get('original_url'):

1535

ie_result.setdefault('original_url', extra_info['original_url'])

1536

self.add_default_extra_info(ie_result, ie, url)

1537

if process:

1538

self._wait_for_video(ie_result)

1539

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1544

if url is not None:

1545

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1550

if webpage_url:

1551

self.add_extra_info(ie_result, {

1552

'webpage_url_basename': url_basename(webpage_url),

1553

'webpage_url_domain': get_domain(webpage_url),

1554

})

1555

if ie is not None:

1556

self.add_extra_info(ie_result, {

1557

'extractor': ie.IE_NAME,

1558

'extractor_key': ie.ie_key(),

1559

})

1560

1561

def process_ie_result(self, ie_result, download=True, extra_info=None):

1562

"""

1563

Take the result of the ie(may be modified) and resolve all unresolved

1564

references (URLs, playlist items).

1565

1566

It will also download the videos if 'download'.

1567

Returns the resolved ie_result.

1568

"""

1569

if extra_info is None:

1570

extra_info = {}

1571

result_type = ie_result.get('_type', 'video')

1572

1573

if result_type in ('url', 'url_transparent'):

1574

ie_result['url'] = sanitize_url(

1575

ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')

1576

if ie_result.get('original_url'):

1577

extra_info.setdefault('original_url', ie_result['original_url'])

1578

1579

extract_flat = self.params.get('extract_flat', False)

1580

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1581

or extract_flat is True):

1582

info_copy = ie_result.copy()

1583

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1584

if ie and not ie_result.get('id'):

1585

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1586

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1587

self.add_extra_info(info_copy, extra_info)

1588

info_copy, _ = self.pre_process(info_copy)

1589

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1590

self._raise_pending_errors(info_copy)

1591

if self.params.get('force_write_download_archive', False):

1592

self.record_download_archive(info_copy)

1593

return ie_result

1594

1595

if result_type == 'video':

1596

self.add_extra_info(ie_result, extra_info)

1597

ie_result = self.process_video_result(ie_result, download=download)

1598

self._raise_pending_errors(ie_result)

1599

additional_urls = (ie_result or {}).get('additional_urls')

1600

if additional_urls:

1601

# TODO: Improve MetadataParserPP to allow setting a list

1602

if isinstance(additional_urls, str):

1603

additional_urls = [additional_urls]

1604

self.to_screen(

1605

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1606

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1607

ie_result['additional_entries'] = [

1608

self.extract_info(

1609

url, download, extra_info=extra_info,

1610

force_generic_extractor=self.params.get('force_generic_extractor'))

1611

for url in additional_urls

1612

]

1613

return ie_result

1614

elif result_type == 'url':

1615

# We have to add extra_info to the results because it may be

1616

# contained in a playlist

1617

return self.extract_info(

1618

ie_result['url'], download,

1619

ie_key=ie_result.get('ie_key'),

1620

extra_info=extra_info)

1621

elif result_type == 'url_transparent':

1622

# Use the information from the embedding page

1623

info = self.extract_info(

1624

ie_result['url'], ie_key=ie_result.get('ie_key'),

1625

extra_info=extra_info, download=False, process=False)

1626

1627

# extract_info may return None when ignoreerrors is enabled and

1628

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

exempted_fields = {'_type', 'url', 'ie_key'}

1634

if not ie_result.get('section_end') and ie_result.get('section_start') is None:

1635

# For video clips, the id etc of the clip extractor should be used

1636

exempted_fields |= {'id', 'extractor', 'extractor_key'}

1637

1638

new_result = info.copy()

1639

new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))

1640

1641

# Extracted info may not be a video result (i.e.

1642

# info.get('_type', 'video') != video) but rather an url or

1643

# url_transparent. In such cases outer metadata (from ie_result)

1644

# should be propagated to inner one (info). For this to happen

1645

# _type of info should be overridden with url_transparent. This

1646

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1647

if new_result.get('_type') == 'url':

1648

new_result['_type'] = 'url_transparent'

1649

1650

return self.process_ie_result(

1651

new_result, download=download, extra_info=extra_info)

1652

elif result_type in ('playlist', 'multi_video'):

1653

# Protect from infinite recursion due to recursively nested playlists

1654

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1655

webpage_url = ie_result['webpage_url']

1656

if webpage_url in self._playlist_urls:

1657

self.to_screen(

1658

'[download] Skipping already downloaded playlist: %s'

1659

% ie_result.get('title') or ie_result.get('id'))

1660

return

1661

1662

self._playlist_level += 1

1663

self._playlist_urls.add(webpage_url)

1664

self._fill_common_fields(ie_result, False)

1665

self._sanitize_thumbnails(ie_result)

1666

try:

1667

return self.__process_playlist(ie_result, download)

1668

finally:

1669

self._playlist_level -= 1

1670

if not self._playlist_level:

1671

self._playlist_urls.clear()

1672

elif result_type == 'compat_list':

1673

self.report_warning(

1674

'Extractor %s returned a compat_list result. '

1675

'It needs to be updated.' % ie_result.get('extractor'))

1676

1677

def _fixup(r):

1678

self.add_extra_info(r, {

1679

'extractor': ie_result['extractor'],

1680

'webpage_url': ie_result['webpage_url'],

1681

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1682

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1683

'extractor_key': ie_result['extractor_key'],

1684

})

1685

return r

1686

ie_result['entries'] = [

1687

self.process_ie_result(_fixup(r), download, extra_info)

1688

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1693

1694

def _ensure_dir_exists(self, path):

1695

return make_dir(path, self.report_error)

1696

1697

@staticmethod

1698

def _playlist_infodict(ie_result, strict=False, **kwargs):

1699

info = {

1700

'playlist_count': ie_result.get('playlist_count'),

1701

'playlist': ie_result.get('title') or ie_result.get('id'),

1702

'playlist_id': ie_result.get('id'),

1703

'playlist_title': ie_result.get('title'),

1704

'playlist_uploader': ie_result.get('uploader'),

1705

'playlist_uploader_id': ie_result.get('uploader_id'),

**kwargs,

}

if strict:

return info

return {

**info,

'playlist_index': 0,

'__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),

1714

'extractor': ie_result['extractor'],

1715

'webpage_url': ie_result['webpage_url'],

1716

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1717

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1718

'extractor_key': ie_result['extractor_key'],

1719

}

1720

1721

def __process_playlist(self, ie_result, download):

1722

"""Process each entry in the playlist"""

1723

assert ie_result['_type'] in ('playlist', 'multi_video')

1724

1725

common_info = self._playlist_infodict(ie_result, strict=True)

1726

title = common_info.get('playlist') or '<Untitled>'

1727

if self._match_entry(common_info, incomplete=True) is not None:

1728

return

1729

self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')

1730

1731

all_entries = PlaylistEntries(self, ie_result)

1732

entries = orderedSet(all_entries.get_requested_items(), lazy=True)

1733

1734

lazy = self.params.get('lazy_playlist')

1735

if lazy:

1736

resolved_entries, n_entries = [], 'N/A'

1737

ie_result['requested_entries'], ie_result['entries'] = None, None

1738

else:

1739

entries = resolved_entries = list(entries)

1740

n_entries = len(resolved_entries)

1741

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])

1742

if not ie_result.get('playlist_count'):

1743

# Better to do this after potentially exhausting entries

1744

ie_result['playlist_count'] = all_entries.get_full_count()

1745

1746

extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))

1747

ie_copy = collections.ChainMap(ie_result, extra)

1748

1749

_infojson_written = False

1750

write_playlist_files = self.params.get('allow_playlist_files', True)

1751

if write_playlist_files and self.params.get('list_thumbnails'):

1752

self.list_thumbnails(ie_result)

1753

if write_playlist_files and not self.params.get('simulate'):

1754

_infojson_written = self._write_info_json(

1755

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1756

if _infojson_written is None:

1757

return

1758

if self._write_description('playlist', ie_result,

1759

self.prepare_filename(ie_copy, 'pl_description')) is None:

1760

return

1761

# TODO: This should be passed to ThumbnailsConvertor if necessary

1762

self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1763

1764

if lazy:

1765

if self.params.get('playlistreverse') or self.params.get('playlistrandom'):

1766

self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)

1767

elif self.params.get('playlistreverse'):

1768

entries.reverse()

1769

elif self.params.get('playlistrandom'):

1770

random.shuffle(entries)

1771

1772

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'

1773

f'{format_field(ie_result, "playlist_count", " of %s")}')

1774

1775

keep_resolved_entries = self.params.get('extract_flat') != 'discard'

1776

if self.params.get('extract_flat') == 'discard_in_playlist':

1777

keep_resolved_entries = ie_result['_type'] != 'playlist'

1778

if keep_resolved_entries:

1779

self.write_debug('The information of all playlist entries will be held in memory')

1780

1781

failures = 0

1782

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1783

for i, (playlist_index, entry) in enumerate(entries):

1784

if lazy:

1785

resolved_entries.append((playlist_index, entry))

if not entry:

continue

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

1790

if not lazy and 'playlist-index' in self.params.get('compat_opts', []):

1791

playlist_index = ie_result['requested_entries'][i]

1792

1793

entry_copy = collections.ChainMap(entry, {

1794

**common_info,

1795

'n_entries': int_or_none(n_entries),

1796

'playlist_index': playlist_index,

1797

'playlist_autonumber': i + 1,

1798

})

1799

1800

if self._match_entry(entry_copy, incomplete=True) is not None:

1801

# For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369

1802

resolved_entries[i] = (playlist_index, NO_DEFAULT)

1803

continue

1804

1805

self.to_screen('[download] Downloading video %s of %s' % (

1806

self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1807

1808

extra.update({

1809

'playlist_index': playlist_index,

1810

'playlist_autonumber': i + 1,

1811

})

1812

entry_result = self.__process_iterable_entry(entry, download, extra)

1813

if not entry_result:

1814

failures += 1

1815

if failures >= max_failures:

1816

self.report_error(

1817

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

1818

break

1819

if keep_resolved_entries:

1820

resolved_entries[i] = (playlist_index, entry_result)

1821

1822

# Update with processed data

1823

ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]

1824

ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]

1825

1826

# Write the updated info to json

1827

if _infojson_written is True and self._write_info_json(

1828

'updated playlist', ie_result,

1829

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1830

return

1831

1832

ie_result = self.run_all_pps('playlist', ie_result)

1833

self.to_screen(f'[download] Finished downloading playlist: {title}')

1834

return ie_result

1835

1836

@_handle_extraction_exceptions

1837

def __process_iterable_entry(self, entry, download, extra_info):

1838

return self.process_ie_result(

1839

entry, download=download, extra_info=extra_info)

1840

1841

def _build_format_filter(self, filter_spec):

1842

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1853

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1854

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1855

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1856

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1857

m = operator_rex.fullmatch(filter_spec)

1858

if m:

1859

try:

1860

comparison_value = int(m.group('value'))

1861

except ValueError:

1862

comparison_value = parse_filesize(m.group('value'))

1863

if comparison_value is None:

1864

comparison_value = parse_filesize(m.group('value') + 'B')

1865

if comparison_value is None:

1866

raise ValueError(

1867

'Invalid value %r in format specification %r' % (

1868

m.group('value'), filter_spec))

1869

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1875

'$=': lambda attr, value: attr.endswith(value),

1876

'*=': lambda attr, value: value in attr,

1877

'~=': lambda attr, value: value.search(attr) is not None

1878

}

1879

str_operator_rex = re.compile(r'''(?x)\s*

1880

(?P<key>[a-zA-Z0-9._-]+)\s*

1881

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1882

(?P<quote>["'])?

1883

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1884

(?(quote)(?P=quote))\s*

1885

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1886

m = str_operator_rex.fullmatch(filter_spec)

1887

if m:

1888

if m.group('op') == '~=':

1889

comparison_value = re.compile(m.group('value'))

1890

else:

1891

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1892

str_op = STR_OPERATORS[m.group('op')]

1893

if m.group('negation'):

1894

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1900

1901

def _filter(f):

1902

actual_value = f.get(m.group('key'))

1903

if actual_value is None:

1904

return m.group('none_inclusive')

1905

return op(actual_value, comparison_value)

1906

return _filter

1907

1908

def _check_formats(self, formats):

1909

for f in formats:

1910

self.to_screen('[info] Testing format %s' % f['format_id'])

1911

path = self.get_output_path('temp')

1912

if not self._ensure_dir_exists(f'{path}/'):

1913

continue

1914

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1915

temp_file.close()

1916

try:

1917

success, _ = self.dl(temp_file.name, f, test=True)

1918

except (DownloadError, OSError, ValueError) + network_exceptions:

1919

success = False

1920

finally:

1921

if os.path.exists(temp_file.name):

1922

try:

1923

os.remove(temp_file.name)

1924

except OSError:

1925

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1930

1931

def _default_format_spec(self, info_dict, download=True):

1932

1933

def can_merge():

1934

merger = FFmpegMergerPP(self)

1935

return merger.available and merger.can_merge()

1936

1937

prefer_best = (

1938

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

1943

or self.params['outtmpl']['default'] == '-'))

1944

compat = (

1945

prefer_best

1946

or self.params.get('allow_multiple_audio_streams', False)

1947

or 'format-spec' in self.params['compat_opts'])

1948

1949

return (

1950

'best/bestvideo+bestaudio' if prefer_best

1951

else 'bestvideo*+bestaudio/best' if not compat

1952

else 'bestvideo+bestaudio/best')

1953

1954

def build_format_selector(self, format_spec):

1955

def syntax_error(note, start):

1956

message = (

1957

'Invalid format specification: '

1958

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

1959

return SyntaxError(message)

1960

1961

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1966

1967

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1968

'video': self.params.get('allow_multiple_video_streams', False)}

1969

1970

check_formats = self.params.get('check_formats') == 'selected'

1971

1972

def _parse_filter(tokens):

1973

filter_parts = []

1974

for type, string, start, _, _ in tokens:

1975

if type == tokenize.OP and string == ']':

1976

return ''.join(filter_parts)

1977

else:

1978

filter_parts.append(string)

1979

1980

def _remove_unused_ops(tokens):

1981

# Remove operators that we don't use and join them with the surrounding strings.

1982

# E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1983

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1984

last_string, last_start, last_end, last_line = None, None, None, None

1985

for type, string, start, end, line in tokens:

1986

if type == tokenize.OP and string == '[':

1987

if last_string:

1988

yield tokenize.NAME, last_string, last_start, last_end, last_line

1989

last_string = None

1990

yield type, string, start, end, line

1991

# everything inside brackets will be handled by _parse_filter

1992

for type, string, start, end, line in tokens:

1993

yield type, string, start, end, line

1994

if type == tokenize.OP and string == ']':

1995

break

1996

elif type == tokenize.OP and string in ALLOWED_OPS:

1997

if last_string:

1998

yield tokenize.NAME, last_string, last_start, last_end, last_line

1999

last_string = None

2000

yield type, string, start, end, line

2001

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

2008

if last_string:

2009

yield tokenize.NAME, last_string, last_start, last_end, last_line

2010

2011

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2012

selectors = []

2013

current_selector = None

2014

for type, string, start, _, _ in tokens:

2015

# ENCODING is only defined in python 3.x

2016

if type == getattr(tokenize, 'ENCODING', None):

2017

continue

2018

elif type in [tokenize.NAME, tokenize.NUMBER]:

2019

current_selector = FormatSelector(SINGLE, string, [])

2020

elif type == tokenize.OP:

2021

if string == ')':

2022

if not inside_group:

2023

# ')' will be handled by the parentheses group

2024

tokens.restore_last_token()

2025

break

2026

elif inside_merge and string in ['/', ',']:

2027

tokens.restore_last_token()

2028

break

2029

elif inside_choice and string == ',':

2030

tokens.restore_last_token()

2031

break

2032

elif string == ',':

2033

if not current_selector:

2034

raise syntax_error('"," must follow a format selector', start)

2035

selectors.append(current_selector)

2036

current_selector = None

2037

elif string == '/':

2038

if not current_selector:

2039

raise syntax_error('"/" must follow a format selector', start)

2040

first_choice = current_selector

2041

second_choice = _parse_format_selection(tokens, inside_choice=True)

2042

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2043

elif string == '[':

2044

if not current_selector:

2045

current_selector = FormatSelector(SINGLE, 'best', [])

2046

format_filter = _parse_filter(tokens)

2047

current_selector.filters.append(format_filter)

2048

elif string == '(':

2049

if current_selector:

2050

raise syntax_error('Unexpected "("', start)

2051

group = _parse_format_selection(tokens, inside_group=True)

2052

current_selector = FormatSelector(GROUP, group, [])

2053

elif string == '+':

2054

if not current_selector:

2055

raise syntax_error('Unexpected "+"', start)

2056

selector_1 = current_selector

2057

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2058

if not selector_2:

2059

raise syntax_error('Expected a selector', start)

2060

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2061

else:

2062

raise syntax_error(f'Operator not recognized: "{string}"', start)

2063

elif type == tokenize.ENDMARKER:

2064

break

2065

if current_selector:

2066

selectors.append(current_selector)

2067

return selectors

2068

2069

def _merge(formats_pair):

2070

format_1, format_2 = formats_pair

2071

2072

formats_info = []

2073

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2074

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2075

2076

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2077

get_no_more = {'video': False, 'audio': False}

2078

for (i, fmt_info) in enumerate(formats_info):

2079

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2080

formats_info.pop(i)

2081

continue

2082

for aud_vid in ['audio', 'video']:

2083

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2084

if get_no_more[aud_vid]:

2085

formats_info.pop(i)

2086

break

2087

get_no_more[aud_vid] = True

2088

2089

if len(formats_info) == 1:

2090

return formats_info[0]

2091

2092

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2093

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2094

2095

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2096

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2097

2098

output_ext = get_compatible_ext(

2099

vcodecs=[f.get('vcodec') for f in video_fmts],

2100

acodecs=[f.get('acodec') for f in audio_fmts],

2101

vexts=[f['ext'] for f in video_fmts],

2102

aexts=[f['ext'] for f in audio_fmts],

2103

preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))

2104

or self.params.get('prefer_free_formats') and ('webm', 'mkv')))

2105

2106

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2107

2108

new_dict = {

2109

'requested_formats': formats_info,

2110

'format': '+'.join(filtered('format')),

2111

'format_id': '+'.join(filtered('format_id')),

2112

'ext': output_ext,

2113

'protocol': '+'.join(map(determine_protocol, formats_info)),

2114

'language': '+'.join(orderedSet(filtered('language'))) or None,

2115

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2116

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2117

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2123

'height': the_only_video.get('height'),

2124

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2125

'fps': the_only_video.get('fps'),

2126

'dynamic_range': the_only_video.get('dynamic_range'),

2127

'vcodec': the_only_video.get('vcodec'),

2128

'vbr': the_only_video.get('vbr'),

2129

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2135

'abr': the_only_audio.get('abr'),

2136

'asr': the_only_audio.get('asr'),

2137

'audio_channels': the_only_audio.get('audio_channels')

})

return new_dict

def _check_formats(formats):

2143

if not check_formats:

2144

yield from formats

2145

return

2146

yield from self._check_formats(formats)

2147

2148

def _build_selector_function(selector):

2149

if isinstance(selector, list): # ,

2150

fs = [_build_selector_function(s) for s in selector]

2151

2152

def selector_function(ctx):

2153

for f in fs:

2154

yield from f(ctx)

2155

return selector_function

2156

2157

elif selector.type == GROUP: # ()

2158

selector_function = _build_selector_function(selector.selector)

2159

2160

elif selector.type == PICKFIRST: # /

2161

fs = [_build_selector_function(s) for s in selector.selector]

2162

2163

def selector_function(ctx):

2164

for f in fs:

2165

picked_formats = list(f(ctx))

2166

if picked_formats:

2167

return picked_formats

2168

return []

2169

2170

elif selector.type == MERGE: # +

2171

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2172

2173

def selector_function(ctx):

2174

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2175

yield _merge(pair)

2176

2177

elif selector.type == SINGLE: # atom

2178

format_spec = selector.selector or 'best'

2179

2180

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2181

if format_spec == 'all':

2182

def selector_function(ctx):

2183

yield from _check_formats(ctx['formats'][::-1])

2184

elif format_spec == 'mergeall':

2185

def selector_function(ctx):

2186

formats = list(_check_formats(

2187

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2188

if not formats:

2189

return

2190

merged_format = formats[-1]

2191

for f in formats[-2::-1]:

2192

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2202

format_reverse = mobj.group('bw')[0] == 'b'

2203

format_type = (mobj.group('type') or [None])[0]

2204

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2205

format_modified = mobj.group('mod') is not None

2206

2207

format_fallback = not format_type and not format_modified # for b, w

2208

_filter_f = (

2209

(lambda f: f.get('%scodec' % format_type) != 'none')

2210

if format_type and format_modified # bv*, ba*, wv*, wa*

2211

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2212

if format_type # bv, ba, wv, wa

2213

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2214

if not format_modified # b, w

2215

else lambda f: True) # b*, w*

2216

filter_f = lambda f: _filter_f(f) and (

2217

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2218

else:

2219

if format_spec in self._format_selection_exts['audio']:

2220

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2221

elif format_spec in self._format_selection_exts['video']:

2222

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2223

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2224

elif format_spec in self._format_selection_exts['storyboards']:

2225

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2226

else:

2227

filter_f = lambda f: f.get('format_id') == format_spec # id

2228

2229

def selector_function(ctx):

2230

formats = list(ctx['formats'])

2231

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2232

if not matches:

2233

if format_fallback and ctx['incomplete_formats']:

2234

# for extractors with incomplete formats (audio only (soundcloud)

2235

# or video only (imgur)) best/worst will fallback to

2236

# best/worst {video,audio}-only format

2237

matches = formats

2238

elif seperate_fallback and not ctx['has_merged_format']:

2239

# for compatibility with youtube-dl when there is no pre-merged format

2240

matches = list(filter(seperate_fallback, formats))

2241

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2242

try:

2243

yield matches[format_idx - 1]

2244

except LazyList.IndexError:

2245

return

2246

2247

filters = [self._build_format_filter(f) for f in selector.filters]

2248

2249

def final_selector(ctx):

2250

ctx_copy = dict(ctx)

2251

for _filter in filters:

2252

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2253

return selector_function(ctx_copy)

2254

return final_selector

2255

2256

stream = io.BytesIO(format_spec.encode())

2257

try:

2258

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2259

except tokenize.TokenError:

2260

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2261

2262

class TokenIterator:

2263

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2272

raise StopIteration()

2273

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2280

self.counter -= 1

2281

2282

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2283

return _build_selector_function(parsed_selector)

2284

2285

def _calc_headers(self, info_dict):

2286

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2287

2288

cookies = self._calc_cookies(info_dict['url'])

2289

if cookies:

2290

res['Cookie'] = cookies

2291

2292

if 'X-Forwarded-For' not in res:

2293

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2294

if x_forwarded_for_ip:

2295

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2300

pr = sanitized_Request(url)

2301

self.cookiejar.add_cookie_header(pr)

2302

return pr.get_header('Cookie')

2303

2304

def _sort_thumbnails(self, thumbnails):

2305

thumbnails.sort(key=lambda t: (

2306

t.get('preference') if t.get('preference') is not None else -1,

2307

t.get('width') if t.get('width') is not None else -1,

2308

t.get('height') if t.get('height') is not None else -1,

2309

t.get('id') if t.get('id') is not None else '',

2310

t.get('url')))

2311

2312

def _sanitize_thumbnails(self, info_dict):

2313

thumbnails = info_dict.get('thumbnails')

2314

if thumbnails is None:

2315

thumbnail = info_dict.get('thumbnail')

2316

if thumbnail:

2317

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2322

for t in thumbnails:

2323

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2324

try:

2325

self.urlopen(HEADRequest(t['url']))

2326

except network_exceptions as err:

2327

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2332

for i, t in enumerate(thumbnails):

2333

if t.get('id') is None:

2334

t['id'] = '%d' % i

2335

if t.get('width') and t.get('height'):

2336

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2337

t['url'] = sanitize_url(t['url'])

2338

2339

if self.params.get('check_formats') is True:

2340

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2341

else:

2342

info_dict['thumbnails'] = thumbnails

2343

2344

def _fill_common_fields(self, info_dict, is_video=True):

2345

# TODO: move sanitization here

2346

if is_video:

2347

# playlists are allowed to lack "title"

2348

title = info_dict.get('title', NO_DEFAULT)

2349

if title is NO_DEFAULT:

2350

raise ExtractorError('Missing "title" field in extractor result',

2351

video_id=info_dict['id'], ie=info_dict['extractor'])

2352

info_dict['fulltitle'] = title

2353

if not title:

2354

if title == '':

2355

self.write_debug('Extractor gave empty title. Creating a generic title')

2356

else:

2357

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2358

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2359

2360

if info_dict.get('duration') is not None:

2361

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2362

2363

for ts_key, date_key in (

2364

('timestamp', 'upload_date'),

2365

('release_timestamp', 'release_date'),

2366

('modified_timestamp', 'modified_date'),

2367

):

2368

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2369

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2370

# see http://bugs.python.org/issue1646728)

2371

with contextlib.suppress(ValueError, OverflowError, OSError):

2372

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2373

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2374

2375

live_keys = ('is_live', 'was_live')

2376

live_status = info_dict.get('live_status')

2377

if live_status is None:

2378

for key in live_keys:

2379

if info_dict.get(key) is False:

2380

continue

2381

if info_dict.get(key):

2382

live_status = key

2383

break

2384

if all(info_dict.get(key) is False for key in live_keys):

2385

live_status = 'not_live'

2386

if live_status:

2387

info_dict['live_status'] = live_status

2388

for key in live_keys:

2389

if info_dict.get(key) is None:

2390

info_dict[key] = (live_status == key)

2391

2392

# Auto generate title fields corresponding to the *_number fields when missing

2393

# in order to always have clean titles. This is very common for TV series.

2394

for field in ('chapter', 'season', 'episode'):

2395

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2396

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2397

2398

def _raise_pending_errors(self, info):

2399

err = info.pop('__pending_error', None)

2400

if err:

2401

self.report_error(err, tb=False)

2402

2403

def process_video_result(self, info_dict, download=True):

2404

assert info_dict.get('_type', 'video') == 'video'

2405

self._num_videos += 1

2406

2407

if 'id' not in info_dict:

2408

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2409

elif not info_dict.get('id'):

2410

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2411

2412

def report_force_conversion(field, field_not, conversion):

2413

self.report_warning(

2414

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2415

% (field, field_not, conversion))

2416

2417

def sanitize_string_field(info, string_field):

2418

field = info.get(string_field)

2419

if field is None or isinstance(field, str):

2420

return

2421

report_force_conversion(string_field, 'a string', 'string')

2422

info[string_field] = str(field)

2423

2424

def sanitize_numeric_fields(info):

2425

for numeric_field in self._NUMERIC_FIELDS:

2426

field = info.get(numeric_field)

2427

if field is None or isinstance(field, (int, float)):

2428

continue

2429

report_force_conversion(numeric_field, 'numeric', 'int')

2430

info[numeric_field] = int_or_none(field)

2431

2432

sanitize_string_field(info_dict, 'id')

2433

sanitize_numeric_fields(info_dict)

2434

if info_dict.get('section_end') and info_dict.get('section_start') is not None:

2435

info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)

2436

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2437

self.report_warning('"duration" field is negative, there is an error in extractor')

2438

2439

chapters = info_dict.get('chapters') or []

2440

if chapters and chapters[0].get('start_time'):

2441

chapters.insert(0, {'start_time': 0})

2442

2443

dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}

2444

for idx, (prev, current, next_) in enumerate(zip(

2445

(dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):

2446

if current.get('start_time') is None:

2447

current['start_time'] = prev.get('end_time')

2448

if not current.get('end_time'):

2449

current['end_time'] = next_.get('start_time')

2450

if not current.get('title'):

2451

current['title'] = f'<Untitled Chapter {idx}>'

2452

2453

if 'playlist' not in info_dict:

2454

# It isn't part of a playlist

2455

info_dict['playlist'] = None

2456

info_dict['playlist_index'] = None

2457

2458

self._sanitize_thumbnails(info_dict)

2459

2460

thumbnail = info_dict.get('thumbnail')

2461

thumbnails = info_dict.get('thumbnails')

2462

if thumbnail:

2463

info_dict['thumbnail'] = sanitize_url(thumbnail)

2464

elif thumbnails:

2465

info_dict['thumbnail'] = thumbnails[-1]['url']

2466

2467

if info_dict.get('display_id') is None and 'id' in info_dict:

2468

info_dict['display_id'] = info_dict['id']

2469

2470

self._fill_common_fields(info_dict)

2471

2472

for cc_kind in ('subtitles', 'automatic_captions'):

2473

cc = info_dict.get(cc_kind)

2474

if cc:

2475

for _, subtitle in cc.items():

2476

for subtitle_format in subtitle:

2477

if subtitle_format.get('url'):

2478

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2479

if subtitle_format.get('ext') is None:

2480

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2481

2482

automatic_captions = info_dict.get('automatic_captions')

2483

subtitles = info_dict.get('subtitles')

2484

2485

info_dict['requested_subtitles'] = self.process_subtitles(

2486

info_dict['id'], subtitles, automatic_captions)

2487

2488

if info_dict.get('formats') is None:

2489

# There's only one format available

2490

formats = [info_dict]

2491

else:

2492

formats = info_dict['formats']

2493

2494

# or None ensures --clean-infojson removes it

2495

info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None

2496

if not self.params.get('allow_unplayable_formats'):

2497

formats = [f for f in formats if not f.get('has_drm')]

2498

if info_dict['_has_drm'] and formats and all(

2499

f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2500

self.report_warning(

2501

'This video is DRM protected and only images are available for download. '

2502

'Use --list-formats to see them')

2503

2504

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2505

if not get_from_start:

2506

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2507

if info_dict.get('is_live') and formats:

2508

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2509

if get_from_start and not formats:

2510

self.raise_no_formats(info_dict, msg=(

2511

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2512

'If you want to download from the current time, use --no-live-from-start'))

2513

2514

if not formats:

2515

self.raise_no_formats(info_dict)

2516

2517

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2522

'there is an error in extractor')

2523

return False

2524

if isinstance(url, bytes):

2525

sanitize_string_field(f, 'url')

2526

return True

2527

2528

# Filter out malformed formats for better extraction robustness

2529

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2534

for i, format in enumerate(formats):

2535

sanitize_string_field(format, 'format_id')

2536

sanitize_numeric_fields(format)

2537

format['url'] = sanitize_url(format['url'])

2538

if not format.get('format_id'):

2539

format['format_id'] = str(i)

2540

else:

2541

# Sanitize format_id from characters used in format selector expression

2542

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2543

format_id = format['format_id']

2544

if format_id not in formats_dict:

2545

formats_dict[format_id] = []

2546

formats_dict[format_id].append(format)

2547

2548

# Make sure all formats have unique format_id

2549

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2550

for format_id, ambiguous_formats in formats_dict.items():

2551

ambigious_id = len(ambiguous_formats) > 1

2552

for i, format in enumerate(ambiguous_formats):

2553

if ambigious_id:

2554

format['format_id'] = '%s-%d' % (format_id, i)

2555

if format.get('ext') is None:

2556

format['ext'] = determine_ext(format['url']).lower()

2557

# Ensure there is no conflict between id and ext in format selection

2558

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2559

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2560

format['format_id'] = 'f%s' % format['format_id']

2561

2562

for i, format in enumerate(formats):

2563

if format.get('format') is None:

2564

format['format'] = '{id} - {res}{note}'.format(

2565

id=format['format_id'],

2566

res=self.format_resolution(format),

2567

note=format_field(format, 'format_note', ' (%s)'),

2568

)

2569

if format.get('protocol') is None:

2570

format['protocol'] = determine_protocol(format)

2571

if format.get('resolution') is None:

2572

format['resolution'] = self.format_resolution(format, default=None)

2573

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2574

format['dynamic_range'] = 'SDR'

2575

if (info_dict.get('duration') and format.get('tbr')

2576

and not format.get('filesize') and not format.get('filesize_approx')):

2577

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2578

2579

# Add HTTP headers, so that external programs can use them from the

2580

# json output

2581

full_format_info = info_dict.copy()

2582

full_format_info.update(format)

2583

format['http_headers'] = self._calc_headers(full_format_info)

2584

# Remove private housekeeping stuff

2585

if '__x_forwarded_for_ip' in info_dict:

2586

del info_dict['__x_forwarded_for_ip']

2587

2588

if self.params.get('check_formats') is True:

2589

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2590

2591

if not formats or formats[0] is not info_dict:

2592

# only set the 'formats' fields if the original info_dict list them

2593

# otherwise we end up with a circular reference, the first (and unique)

2594

# element in the 'formats' field in info_dict is info_dict itself,

2595

# which can't be exported to json

2596

info_dict['formats'] = formats

2597

2598

info_dict, _ = self.pre_process(info_dict)

2599

2600

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2601

return info_dict

2602

2603

self.post_extract(info_dict)

2604

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2605

2606

# The pre-processors may have modified the formats

2607

formats = info_dict.get('formats', [info_dict])

2608

2609

list_only = self.params.get('simulate') is None and (

2610

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2611

interactive_format_selection = not list_only and self.format_selector == '-'

2612

if self.params.get('list_thumbnails'):

2613

self.list_thumbnails(info_dict)

2614

if self.params.get('listsubtitles'):

2615

if 'automatic_captions' in info_dict:

2616

self.list_subtitles(

2617

info_dict['id'], automatic_captions, 'automatic captions')

2618

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2619

if self.params.get('listformats') or interactive_format_selection:

2620

self.list_formats(info_dict)

2621

if list_only:

2622

# Without this printing, -F --print-json will not work

2623

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2624

return info_dict

2625

2626

format_selector = self.format_selector

2627

if format_selector is None:

2628

req_format = self._default_format_spec(info_dict, download=download)

2629

self.write_debug('Default format spec: %s' % req_format)

2630

format_selector = self.build_format_selector(req_format)

2631

2632

while True:

2633

if interactive_format_selection:

2634

req_format = input(

2635

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2636

try:

2637

format_selector = self.build_format_selector(req_format)

2638

except SyntaxError as err:

2639

self.report_error(err, tb=False, is_error=False)

2640

continue

2641

2642

formats_to_download = list(format_selector({

2643

'formats': formats,

2644

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2645

'incomplete_formats': (

2646

# All formats are video-only or

2647

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2648

# all formats are audio-only

2649

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),

2650

}))

2651

if interactive_format_selection and not formats_to_download:

2652

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2657

if not self.params.get('ignore_no_formats_error'):

2658

raise ExtractorError(

2659

'Requested format is not available. Use --list-formats for a list of available formats',

2660

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2661

self.report_warning('Requested format is not available')

2662

# Process what we can, even without any available formats.

2663

formats_to_download = [{}]

2664

2665

requested_ranges = self.params.get('download_ranges')

2666

if requested_ranges:

2667

requested_ranges = tuple(requested_ranges(info_dict, self))

2668

2669

best_format, downloaded_formats = formats_to_download[-1], []

if download:

if best_format:

def to_screen(*msg):

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2674

2675

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2676

(f['format_id'] for f in formats_to_download))

2677

if requested_ranges:

2678

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2679

(f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))

2680

max_downloads_reached = False

2681

2682

for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):

2683

new_info = self._copy_infodict(info_dict)

2684

new_info.update(fmt)

2685

offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')

2686

if chapter or offset:

2687

new_info.update({

2688

'section_start': offset + chapter.get('start_time', 0),

2689

'section_end': offset + min(chapter.get('end_time', duration), duration),

2690

'section_title': chapter.get('title'),

2691

'section_number': chapter.get('index'),

2692

})

2693

downloaded_formats.append(new_info)

2694

try:

2695

self.process_info(new_info)

2696

except MaxDownloadsReached:

2697

max_downloads_reached = True

2698

self._raise_pending_errors(new_info)

2699

# Remove copied info

2700

for key, val in tuple(new_info.items()):

2701

if info_dict.get(key) == val:

2702

new_info.pop(key)

2703

if max_downloads_reached:

2704

break

2705

2706

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2707

assert write_archive.issubset({True, False, 'ignore'})

2708

if True in write_archive and False not in write_archive:

2709

self.record_download_archive(info_dict)

2710

2711

info_dict['requested_downloads'] = downloaded_formats

2712

info_dict = self.run_all_pps('after_video', info_dict)

2713

if max_downloads_reached:

2714

raise MaxDownloadsReached()

2715

2716

# We update the info dict with the selected best quality format (backwards compatibility)

2717

info_dict.update(best_format)

2718

return info_dict

2719

2720

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2721

"""Select the requested subtitles and their format"""

2722

available_subs, normal_sub_langs = {}, []

2723

if normal_subtitles and self.params.get('writesubtitles'):

2724

available_subs.update(normal_subtitles)

2725

normal_sub_langs = tuple(normal_subtitles.keys())

2726

if automatic_captions and self.params.get('writeautomaticsub'):

2727

for lang, cap_info in automatic_captions.items():

2728

if lang not in available_subs:

2729

available_subs[lang] = cap_info

2730

2731

if (not self.params.get('writesubtitles') and not

2732

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = tuple(available_subs.keys())

2737

if self.params.get('allsubtitles', False):

2738

requested_langs = all_sub_langs

2739

elif self.params.get('subtitleslangs', False):

2740

# A list is used so that the order of languages will be the same as

2741

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2742

requested_langs = []

2743

for lang_re in self.params.get('subtitleslangs'):

2744

discard = lang_re[0] == '-'

2745

if discard:

2746

lang_re = lang_re[1:]

if lang_re == 'all':

if discard:

requested_langs = []

else:

requested_langs.extend(all_sub_langs)

2752

continue

2753

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2754

if discard:

2755

for lang in current_langs:

2756

while lang in requested_langs:

2757

requested_langs.remove(lang)

2758

else:

2759

requested_langs.extend(current_langs)

2760

requested_langs = orderedSet(requested_langs)

2761

elif normal_sub_langs:

2762

requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]

2763

else:

2764

requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]

2765

if requested_langs:

2766

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2767

2768

formats_query = self.params.get('subtitlesformat', 'best')

2769

formats_preference = formats_query.split('/') if formats_query else []

2770

subs = {}

2771

for lang in requested_langs:

2772

formats = available_subs.get(lang)

2773

if formats is None:

2774

self.report_warning(f'{lang} subtitles not available for {video_id}')

2775

continue

2776

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2788

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2793

if info_dict is None:

2794

return

2795

info_copy = info_dict.copy()

2796

info_copy['formats_table'] = self.render_formats_table(info_dict)

2797

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2798

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2799

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2800

2801

def format_tmpl(tmpl):

2802

mobj = re.match(r'\w+(=?)$', tmpl)

2803

if mobj and mobj.group(1):

2804

return f'{tmpl[:-1]} = %({tmpl[:-1]})r'

elif mobj:

return f'%({tmpl})s'

return tmpl

for tmpl in self.params['forceprint'].get(key, []):

2810

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2811

2812

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2813

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2814

tmpl = format_tmpl(tmpl)

2815

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2816

if self._ensure_dir_exists(filename):

2817

with open(filename, 'a', encoding='utf-8') as f:

2818

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2819

2820

def __forced_printings(self, info_dict, filename, incomplete):

2821

def print_mandatory(field, actual_field=None):

2822

if actual_field is None:

2823

actual_field = field

2824

if (self.params.get('force%s' % field, False)

2825

and (not incomplete or info_dict.get(actual_field) is not None)):

2826

self.to_stdout(info_dict[actual_field])

2827

2828

def print_optional(field):

2829

if (self.params.get('force%s' % field, False)

2830

and info_dict.get(field) is not None):

2831

self.to_stdout(info_dict[field])

2832

2833

info_dict = info_dict.copy()

2834

if filename is not None:

2835

info_dict['filename'] = filename

2836

if info_dict.get('requested_formats') is not None:

2837

# For RTMP URLs, also include the playpath

2838

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2839

elif info_dict.get('url'):

2840

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2841

2842

if (self.params.get('forcejson')

2843

or self.params['forceprint'].get('video')

2844

or self.params['print_to_file'].get('video')):

2845

self.post_extract(info_dict)

2846

self._forceprint('video', info_dict)

2847

2848

print_mandatory('title')

2849

print_mandatory('id')

2850

print_mandatory('url', 'urls')

2851

print_optional('thumbnail')

2852

print_optional('description')

2853

print_optional('filename')

2854

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2855

self.to_stdout(formatSeconds(info_dict['duration']))

2856

print_mandatory('format')

2857

2858

if self.params.get('forcejson'):

2859

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2860

2861

def dl(self, name, info, subtitle=False, test=False):

2862

if not info.get('url'):

2863

self.raise_no_formats(info, True)

2864

2865

if test:

2866

verbose = self.params.get('verbose')

2867

params = {

2868

'test': True,

2869

'quiet': self.params.get('quiet') or not verbose,

2870

'verbose': verbose,

2871

'noprogress': not verbose,

2872

'nopart': True,

2873

'skip_unavailable_fragments': False,

2874

'keep_fragments': False,

2875

'overwrites': True,

2876

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2881

if not test:

2882

for ph in self._progress_hooks:

2883

fd.add_progress_hook(ph)

2884

urls = '", "'.join(

2885

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2886

for f in info.get('requested_formats', []) or [info])

2887

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

2888

2889

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2890

# But it may contain objects that are not deep-copyable

2891

new_info = self._copy_infodict(info)

2892

if new_info.get('http_headers') is None:

2893

new_info['http_headers'] = self._calc_headers(new_info)

2894

return fd.download(name, new_info, subtitle)

2895

2896

def existing_file(self, filepaths, *, default_overwrite=True):

2897

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2898

if existing_files and not self.params.get('overwrites', default_overwrite):

2899

return existing_files[0]

2900

2901

for file in existing_files:

2902

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2907

"""Process a single resolved IE result. (Modifies it in-place)"""

2908

2909

assert info_dict.get('_type', 'video') == 'video'

2910

original_infodict = info_dict

2911

2912

if 'format' not in info_dict and 'ext' in info_dict:

2913

info_dict['format'] = info_dict['ext']

2914

2915

# This is mostly just for backward compatibility of process_info

2916

# As a side-effect, this allows for format-specific filters

2917

if self._match_entry(info_dict) is not None:

2918

info_dict['__write_download_archive'] = 'ignore'

2919

return

2920

2921

# Does nothing under normal operation - for backward compatibility of process_info

2922

self.post_extract(info_dict)

2923

self._num_downloads += 1

2924

2925

# info_dict['_filename'] needs to be set for backward compatibility

2926

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2927

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2932

2933

def check_max_downloads():

2934

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

2935

raise MaxDownloadsReached()

2936

2937

if self.params.get('simulate'):

2938

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2939

check_max_downloads()

2940

return

2941

2942

if full_filename is None:

2943

return

2944

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2945

return

2946

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2947

return

2948

2949

if self._write_description('video', info_dict,

2950

self.prepare_filename(info_dict, 'description')) is None:

2951

return

2952

2953

sub_files = self._write_subtitles(info_dict, temp_filename)

2954

if sub_files is None:

2955

return

2956

files_to_move.update(dict(sub_files))

2957

2958

thumb_files = self._write_thumbnails(

2959

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2960

if thumb_files is None:

2961

return

2962

files_to_move.update(dict(thumb_files))

2963

2964

infofn = self.prepare_filename(info_dict, 'infojson')

2965

_infojson_written = self._write_info_json('video', info_dict, infofn)

2966

if _infojson_written:

2967

info_dict['infojson_filename'] = infofn

2968

# For backward compatibility, even though it was a private field

2969

info_dict['__infojson_filename'] = infofn

2970

elif _infojson_written is None:

2971

return

2972

2973

# Note: Annotations are deprecated

2974

annofn = None

2975

if self.params.get('writeannotations', False):

2976

annofn = self.prepare_filename(info_dict, 'annotation')

2977

if annofn:

2978

if not self._ensure_dir_exists(encodeFilename(annofn)):

2979

return

2980

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2981

self.to_screen('[info] Video annotations are already present')

2982

elif not info_dict.get('annotations'):

2983

self.report_warning('There are no annotations to write.')

2984

else:

2985

try:

2986

self.to_screen('[info] Writing video annotations to: ' + annofn)

2987

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2988

annofile.write(info_dict['annotations'])

2989

except (KeyError, TypeError):

2990

self.report_warning('There are no annotations to write.')

2991

except OSError:

2992

self.report_error('Cannot write annotations file: ' + annofn)

2993

return

2994

2995

# Write internet shortcut files

2996

def _write_link_file(link_type):

2997

url = try_get(info_dict['webpage_url'], iri_to_uri)

2998

if not url:

2999

self.report_warning(

3000

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

3001

return True

3002

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

3003

if not self._ensure_dir_exists(encodeFilename(linkfn)):

3004

return False

3005

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

3006

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

3007

return True

3008

try:

3009

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

3010

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

3011

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

3012

template_vars = {'url': url}

3013

if link_type == 'desktop':

3014

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

3015

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

3016

except OSError:

3017

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

3023

'webloc': self.params.get('writewebloclink'),

3024

'desktop': self.params.get('writedesktoplink'),

3025

}

3026

if self.params.get('writelink'):

3027

link_type = ('webloc' if sys.platform == 'darwin'

3028

else 'desktop' if sys.platform.startswith('linux')

3029

else 'url')

3030

write_links[link_type] = True

3031

3032

if any(should_write and not _write_link_file(link_type)

3033

for link_type, should_write in write_links.items()):

3034

return

3035

3036

def replace_info_dict(new_info):

3037

nonlocal info_dict

3038

if new_info == info_dict:

3039

return

3040

info_dict.clear()

3041

info_dict.update(new_info)

3042

3043

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

3044

replace_info_dict(new_info)

3045

3046

if self.params.get('skip_download'):

3047

info_dict['filepath'] = temp_filename

3048

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3049

info_dict['__files_to_move'] = files_to_move

3050

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

3051

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3052

else:

3053

# Download

3054

info_dict.setdefault('__postprocessors', [])

3055

try:

3056

3057

def existing_video_file(*filepaths):

3058

ext = info_dict.get('ext')

3059

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3060

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3061

default_overwrite=False)

3062

if file:

3063

info_dict['ext'] = os.path.splitext(file)[1][1:]

3064

return file

3065

3066

fd, success = None, True

3067

if info_dict.get('protocol') or info_dict.get('url'):

3068

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3069

if fd is not FFmpegFD and (

3070

info_dict.get('section_start') or info_dict.get('section_end')):

3071

msg = ('This format cannot be partially downloaded' if FFmpegFD.available()

3072

else 'You have requested downloading the video partially, but ffmpeg is not installed')

3073

self.report_error(f'{msg}. Aborting')

3074

return

3075

3076

if info_dict.get('requested_formats') is not None:

3077

requested_formats = info_dict['requested_formats']

3078

old_ext = info_dict['ext']

3079

if self.params.get('merge_output_format') is None:

3080

if (info_dict['ext'] == 'webm'

3081

and info_dict.get('thumbnails')

3082

# check with type instead of pp_key, __name__, or isinstance

3083

# since we dont want any custom PPs to trigger this

3084

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3085

info_dict['ext'] = 'mkv'

3086

self.report_warning(

3087

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3088

new_ext = info_dict['ext']

3089

3090

def correct_ext(filename, ext=new_ext):

3091

if filename == '-':

3092

return filename

3093

filename_real_ext = os.path.splitext(filename)[1][1:]

3094

filename_wo_ext = (

3095

os.path.splitext(filename)[0]

3096

if filename_real_ext in (old_ext, new_ext)

3097

else filename)

3098

return f'{filename_wo_ext}.{ext}'

3099

3100

# Ensure filename always has a correct extension for successful merge

3101

full_filename = correct_ext(full_filename)

3102

temp_filename = correct_ext(temp_filename)

3103

dl_filename = existing_video_file(full_filename, temp_filename)

3104

info_dict['__real_download'] = False

3105

3106

merger = FFmpegMergerPP(self)

3107

downloaded = []

3108

if dl_filename is not None:

3109

self.report_file_already_downloaded(dl_filename)

3110

elif fd:

3111

for f in requested_formats if fd != FFmpegFD else []:

3112

f['filepath'] = fname = prepend_extension(

3113

correct_ext(temp_filename, info_dict['ext']),

3114

'f%s' % f['format_id'], info_dict['ext'])

3115

downloaded.append(fname)

3116

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3117

success, real_download = self.dl(temp_filename, info_dict)

3118

info_dict['__real_download'] = real_download

3119

else:

3120

if self.params.get('allow_unplayable_formats'):

3121

self.report_warning(

3122

'You have requested merging of multiple formats '

3123

'while also allowing unplayable formats to be downloaded. '

3124

'The formats won\'t be merged to prevent data corruption.')

3125

elif not merger.available:

3126

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3127

if not self.params.get('ignoreerrors'):

3128

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3129

return

3130

self.report_warning(f'{msg}. The formats won\'t be merged')

3131

3132

if temp_filename == '-':

3133

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3134

else 'but the formats are incompatible for simultaneous download' if merger.available

3135

else 'but ffmpeg is not installed')

3136

self.report_warning(

3137

f'You have requested downloading multiple formats to stdout {reason}. '

3138

'The formats will be streamed one after the other')

3139

fname = temp_filename

3140

for f in requested_formats:

3141

new_info = dict(info_dict)

3142

del new_info['requested_formats']

3143

new_info.update(f)

3144

if temp_filename != '-':

3145

fname = prepend_extension(

3146

correct_ext(temp_filename, new_info['ext']),

3147

'f%s' % f['format_id'], new_info['ext'])

3148

if not self._ensure_dir_exists(fname):

3149

return

3150

f['filepath'] = fname

3151

downloaded.append(fname)

3152

partial_success, real_download = self.dl(fname, new_info)

3153

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3154

success = success and partial_success

3155

3156

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3157

info_dict['__postprocessors'].append(merger)

3158

info_dict['__files_to_merge'] = downloaded

3159

# Even if there were no downloads, it is being merged only now

3160

info_dict['__real_download'] = True

3161

else:

3162

for file in downloaded:

3163

files_to_move[file] = None

3164

else:

3165

# Just a single file

3166

dl_filename = existing_video_file(full_filename, temp_filename)

3167

if dl_filename is None or dl_filename == temp_filename:

3168

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3169

# So we should try to resume the download

3170

success, real_download = self.dl(temp_filename, info_dict)

3171

info_dict['__real_download'] = real_download

3172

else:

3173

self.report_file_already_downloaded(dl_filename)

3174

3175

dl_filename = dl_filename or temp_filename

3176

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3177

3178

except network_exceptions as err:

3179

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3180

return

3181

except OSError as err:

3182

raise UnavailableVideoError(err)

3183

except (ContentTooShortError, ) as err:

3184

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3185

return

3186

3187

self._raise_pending_errors(info_dict)

3188

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3193

vid = info_dict['id']

3194

3195

if fixup_policy in ('ignore', 'never'):

3196

return

3197

elif fixup_policy == 'warn':

3198

do_fixup = 'warn'

3199

elif fixup_policy != 'force':

3200

assert fixup_policy in ('detect_or_warn', None)

3201

if not info_dict.get('__real_download'):

3202

do_fixup = False

3203

3204

def ffmpeg_fixup(cndn, msg, cls):

3205

if not (do_fixup and cndn):

3206

return

3207

elif do_fixup == 'warn':

3208

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3213

else:

3214

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3215

3216

stretched_ratio = info_dict.get('stretched_ratio')

3217

ffmpeg_fixup(stretched_ratio not in (1, None),

3218

f'Non-uniform pixel ratio {stretched_ratio}',

3219

FFmpegFixupStretchedPP)

3220

3221

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3222

downloader = downloader.FD_NAME if downloader else None

3223

3224

ext = info_dict.get('ext')

3225

postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((

3226

isinstance(pp, FFmpegVideoConvertorPP)

3227

and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)

3228

) for pp in self._pps['post_process'])

3229

3230

if not postprocessed_by_ffmpeg:

3231

ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',

3232

'writing DASH m4a. Only some players support this container',

3233

FFmpegFixupM4aPP)

3234

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3235

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3236

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3237

FFmpegFixupM3u8PP)

3238

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3239

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3240

3241

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3242

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3247

except PostProcessingError as err:

3248

self.report_error('Postprocessing: %s' % str(err))

3249

return

3250

try:

3251

for ph in self._post_hooks:

3252

ph(info_dict['filepath'])

3253

except Exception as err:

3254

self.report_error('post hooks: %s' % str(err))

3255

return

3256

info_dict['__write_download_archive'] = True

3257

3258

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3259

if self.params.get('force_write_download_archive'):

3260

info_dict['__write_download_archive'] = True

3261

check_max_downloads()

3262

3263

def __download_wrapper(self, func):

3264

@functools.wraps(func)

3265

def wrapper(*args, **kwargs):

3266

try:

3267

res = func(*args, **kwargs)

3268

except UnavailableVideoError as e:

3269

self.report_error(e)

3270

except DownloadCancelled as e:

3271

self.to_screen(f'[info] {e}')

3272

if not self.params.get('break_per_url'):

3273

raise

3274

else:

3275

if self.params.get('dump_single_json', False):

3276

self.post_extract(res)

3277

self.to_stdout(json.dumps(self.sanitize_info(res)))

3278

return wrapper

3279

3280

def download(self, url_list):

3281

"""Download a given list of URLs."""

3282

url_list = variadic(url_list) # Passing a single URL is a common mistake

3283

outtmpl = self.params['outtmpl']['default']

3284

if (len(url_list) > 1

3285

and outtmpl != '-'

3286

and '%' not in outtmpl

3287

and self.params.get('max_downloads') != 1):

3288

raise SameFileError(outtmpl)

3289

3290

for url in url_list:

3291

self.__download_wrapper(self.extract_info)(

3292

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3293

3294

return self._download_retcode

3295

3296

def download_with_info_file(self, info_filename):

3297

with contextlib.closing(fileinput.FileInput(

3298

[info_filename], mode='r',

3299

openhook=fileinput.hook_encoded('utf-8'))) as f:

3300

# FileInput doesn't have a read method, we can't call json.load

3301

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3302

try:

3303

self.__download_wrapper(self.process_ie_result)(info, download=True)

3304

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3305

if not isinstance(e, EntryNotInPlaylist):

3306

self.to_stderr('\r')

3307

webpage_url = info.get('webpage_url')

3308

if webpage_url is not None:

3309

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3310

return self.download([webpage_url])

3311

else:

3312

raise

3313

return self._download_retcode

3314

3315

@staticmethod

3316

def sanitize_info(info_dict, remove_private_keys=False):

3317

''' Sanitize the infodict for converting to json '''

3318

if info_dict is None:

3319

return info_dict

3320

info_dict.setdefault('epoch', int(time.time()))

3321

info_dict.setdefault('_type', 'video')

3322

3323

if remove_private_keys:

3324

reject = lambda k, v: v is None or k.startswith('__') or k in {

3325

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3326

'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',

3327

}

3328

else:

3329

reject = lambda k, v: False

3330

3331

def filter_fn(obj):

3332

if isinstance(obj, dict):

3333

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3334

elif isinstance(obj, (list, tuple, set, LazyList)):

3335

return list(map(filter_fn, obj))

3336

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3342

3343

@staticmethod

3344

def filter_requested_info(info_dict, actually_filter=True):

3345

''' Alias of sanitize_info for backward compatibility '''

3346

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3347

3348

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3349

for filename in set(filter(None, files_to_delete)):

3350

if msg:

3351

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3356

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3357

del info['__files_to_move'][filename]

3358

3359

@staticmethod

3360

def post_extract(info_dict):

3361

def actual_post_extract(info_dict):

3362

if info_dict.get('_type') in ('playlist', 'multi_video'):

3363

for video_dict in info_dict.get('entries', {}):

3364

actual_post_extract(video_dict or {})

3365

return

3366

3367

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3368

info_dict.update(post_extractor())

3369

3370

actual_post_extract(info_dict or {})

3371

3372

def run_pp(self, pp, infodict):

3373

files_to_delete = []

3374

if '__files_to_move' not in infodict:

3375

infodict['__files_to_move'] = {}

3376

try:

3377

files_to_delete, infodict = pp.run(infodict)

3378

except PostProcessingError as e:

3379

# Must be True and not 'only_download'

3380

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3386

return infodict

3387

if self.params.get('keepvideo', False):

3388

for f in files_to_delete:

3389

infodict['__files_to_move'].setdefault(f, '')

3390

else:

3391

self._delete_downloaded_files(

3392

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3393

return infodict

3394

3395

def run_all_pps(self, key, info, *, additional_pps=None):

3396

self._forceprint(key, info)

3397

for pp in (additional_pps or []) + self._pps[key]:

3398

info = self.run_pp(pp, info)

3399

return info

3400

3401

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3402

info = dict(ie_info)

3403

info['__files_to_move'] = files_to_move or {}

3404

try:

3405

info = self.run_all_pps(key, info)

3406

except PostProcessingError as err:

3407

msg = f'Preprocessing: {err}'

3408

info.setdefault('__pending_error', msg)

3409

self.report_error(msg, is_error=False)

3410

return info, info.pop('__files_to_move', None)

3411

3412

def post_process(self, filename, info, files_to_move=None):

3413

"""Run all the postprocessors on the given file."""

3414

info['filepath'] = filename

3415

info['__files_to_move'] = files_to_move or {}

3416

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3417

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3418

del info['__files_to_move']

3419

return self.run_all_pps('after_move', info)

3420

3421

def _make_archive_id(self, info_dict):

3422

video_id = info_dict.get('id')

3423

if not video_id:

3424

return

3425

# Future-proof against any change in case

3426

# and backwards compatibility with prior versions

3427

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3428

if extractor is None:

3429

url = str_or_none(info_dict.get('url'))

3430

if not url:

3431

return

3432

# Try to find matching extractor for the URL and take its ie_key

3433

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return make_archive_id(extractor, video_id)

3440

3441

def in_download_archive(self, info_dict):

3442

fn = self.params.get('download_archive')

if fn is None:

return False

vid_ids = [self._make_archive_id(info_dict)]

3447

vid_ids.extend(info_dict.get('_old_archive_ids') or [])

3448

return any(id_ in self.archive for id_ in vid_ids)

3449

3450

def record_download_archive(self, info_dict):

3451

fn = self.params.get('download_archive')

3452

if fn is None:

3453

return

3454

vid_id = self._make_archive_id(info_dict)

3455

assert vid_id

3456

self.write_debug(f'Adding to archive: {vid_id}')

3457

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3458

archive_file.write(vid_id + '\n')

3459

self.archive.add(vid_id)

3460

3461

@staticmethod

3462

def format_resolution(format, default='unknown'):

3463

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3464

return 'audio only'

3465

if format.get('resolution') is not None:

3466

return format['resolution']

3467

if format.get('width') and format.get('height'):

3468

return '%dx%d' % (format['width'], format['height'])

3469

elif format.get('height'):

3470

return '%sp' % format['height']

3471

elif format.get('width'):

3472

return '%dx?' % format['width']

3473

return default

3474

3475

def _list_format_headers(self, *headers):

3476

if self.params.get('listformats_table', True) is not False:

3477

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3478

return headers

3479

3480

def _format_note(self, fdict):

3481

res = ''

3482

if fdict.get('ext') in ['f4f', 'f4m']:

3483

res += '(unsupported)'

3484

if fdict.get('language'):

3485

if res:

3486

res += ' '

3487

res += '[%s]' % fdict['language']

3488

if fdict.get('format_note') is not None:

3489

if res:

3490

res += ' '

3491

res += fdict['format_note']

3492

if fdict.get('tbr') is not None:

3493

if res:

3494

res += ', '

3495

res += '%4dk' % fdict['tbr']

3496

if fdict.get('container') is not None:

3497

if res:

3498

res += ', '

3499

res += '%s container' % fdict['container']

3500

if (fdict.get('vcodec') is not None

3501

and fdict.get('vcodec') != 'none'):

3502

if res:

3503

res += ', '

3504

res += fdict['vcodec']

3505

if fdict.get('vbr') is not None:

3506

res += '@'

3507

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3508

res += 'video@'

3509

if fdict.get('vbr') is not None:

3510

res += '%4dk' % fdict['vbr']

3511

if fdict.get('fps') is not None:

3512

if res:

3513

res += ', '

3514

res += '%sfps' % fdict['fps']

3515

if fdict.get('acodec') is not None:

3516

if res:

3517

res += ', '

3518

if fdict['acodec'] == 'none':

3519

res += 'video only'

3520

else:

3521

res += '%-5s' % fdict['acodec']

3522

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3527

res += '@%3dk' % fdict['abr']

3528

if fdict.get('asr') is not None:

3529

res += ' (%5dHz)' % fdict['asr']

3530

if fdict.get('filesize') is not None:

3531

if res:

3532

res += ', '

3533

res += format_bytes(fdict['filesize'])

3534

elif fdict.get('filesize_approx') is not None:

3535

if res:

3536

res += ', '

3537

res += '~' + format_bytes(fdict['filesize_approx'])

3538

return res

3539

3540

def render_formats_table(self, info_dict):

3541

if not info_dict.get('formats') and not info_dict.get('url'):

3542

return None

3543

3544

formats = info_dict.get('formats', [info_dict])

3545

if not self.params.get('listformats_table', True) is not False:

3546

table = [

3547

[

3548

format_field(f, 'format_id'),

3549

format_field(f, 'ext'),

3550

self.format_resolution(f),

3551

self._format_note(f)

3552

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3553

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3554

3555

def simplified_codec(f, field):

3556

assert field in ('acodec', 'vcodec')

3557

codec = f.get(field, 'unknown')

3558

if not codec:

3559

return 'unknown'

3560

elif codec != 'none':

3561

return '.'.join(codec.split('.')[:4])

3562

3563

if field == 'vcodec' and f.get('acodec') == 'none':

3564

return 'images'

3565

elif field == 'acodec' and f.get('vcodec') == 'none':

3566

return ''

3567

return self._format_out('audio only' if field == 'vcodec' else 'video only',

3568

self.Styles.SUPPRESS)

3569

3570

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3571

table = [

3572

[

3573

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3574

format_field(f, 'ext'),

3575

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3576

format_field(f, 'fps', '\t%d', func=round),

3577

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3578

format_field(f, 'audio_channels', '\t%s'),

3579

delim,

3580

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3581

format_field(f, 'tbr', '\t%dk', func=round),

3582

shorten_protocol_name(f.get('protocol', '')),

3583

delim,

3584

simplified_codec(f, 'vcodec'),

3585

format_field(f, 'vbr', '\t%dk', func=round),

3586

simplified_codec(f, 'acodec'),

3587

format_field(f, 'abr', '\t%dk', func=round),

3588

format_field(f, 'asr', '\t%s', func=format_decimal_suffix),

3589

join_nonempty(

3590

self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3591

format_field(f, 'language', '[%s]'),

3592

join_nonempty(format_field(f, 'format_note'),

3593

format_field(f, 'container', ignore=(None, f.get('ext'))),

3594

delim=', '),

3595

delim=' '),

3596

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3597

header_line = self._list_format_headers(

3598

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3599

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3600

3601

return render_table(

3602

header_line, table, hide_empty=True,

3603

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3604

3605

def render_thumbnails_table(self, info_dict):

3606

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3611

[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])

3612

3613

def render_subtitles_table(self, video_id, subtitles):

3614

def _row(lang, formats):

3615

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3616

if len(set(names)) == 1:

3617

names = [] if names[0] == 'unknown' else names[:1]

3618

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3624

[_row(lang, formats) for lang, formats in subtitles.items()],

3625

hide_empty=True)

3626

3627

def __list_table(self, video_id, name, func, *args):

3628

table = func(*args)

3629

if not table:

3630

self.to_screen(f'{video_id} has no {name}')

3631

return

3632

self.to_screen(f'[info] Available {name} for {video_id}:')

3633

self.to_stdout(table)

3634

3635

def list_formats(self, info_dict):

3636

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3637

3638

def list_thumbnails(self, info_dict):

3639

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3640

3641

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3642

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3643

3644

def urlopen(self, req):

3645

""" Start an HTTP download """

3646

if isinstance(req, str):

3647

req = sanitized_Request(req)

3648

return self._opener.open(req, timeout=self._socket_timeout)

3649

3650

def print_debug_header(self):

3651

if not self.params.get('verbose'):

3652

return

3653

3654

# These imports can be slow. So import them only as needed

3655

from .extractor.extractors import _LAZY_LOADER

3656

from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors

3657

3658

def get_encoding(stream):

3659

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3660

if not supports_terminal_sequences(stream):

3661

from .utils import WINDOWS_VT_MODE # Must be imported locally

3662

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3663

return ret

3664

3665

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3666

locale.getpreferredencoding(),

3667

sys.getfilesystemencoding(),

3668

self.get_encoding(),

3669

', '.join(

3670

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3671

if stream is not None and key != 'console')

3672

)

3673

3674

logger = self.params.get('logger')

3675

if logger:

3676

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3677

write_debug(encoding_str)

3678

else:

3679

write_string(f'[debug] {encoding_str}\n', encoding=None)

3680

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3681

3682

source = detect_variant()

3683

if VARIANT not in (None, 'pip'):

3684

source += '*'

3685

write_debug(join_nonempty(

3686

'yt-dlp version', __version__,

3687

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3688

'' if source == 'unknown' else f'({source})',

3689

delim=' '))

3690

if not _LAZY_LOADER:

3691

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3692

write_debug('Lazy loading extractors is forcibly disabled')

3693

else:

3694

write_debug('Lazy loading extractors is disabled')

3695

if plugin_extractors or plugin_postprocessors:

3696

write_debug('Plugins: %s' % [

3697

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3698

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3699

if self.params['compat_opts']:

3700

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3701

3702

if source == 'source':

3703

try:

3704

stdout, _, _ = Popen.run(

3705

['git', 'rev-parse', '--short', 'HEAD'],

3706

text=True, cwd=os.path.dirname(os.path.abspath(__file__)),

3707

stdout=subprocess.PIPE, stderr=subprocess.PIPE)

3708

if re.fullmatch('[0-9a-f]+', stdout.strip()):

3709

write_debug(f'Git HEAD: {stdout.strip()}')

3710

except Exception:

3711

with contextlib.suppress(Exception):

3712

sys.exc_clear()

3713

3714

write_debug(system_identifier())

3715

3716

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3717

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3718

if ffmpeg_features:

3719

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3720

3721

exe_versions['rtmpdump'] = rtmpdump_version()

3722

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3723

exe_str = ', '.join(

3724

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3725

) or 'none'

3726

write_debug('exe versions: %s' % exe_str)

3727

3728

from .compat.compat_utils import get_package_info

3729

from .dependencies import available_dependencies

3730

3731

write_debug('Optional libraries: %s' % (', '.join(sorted({

3732

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3738

if hasattr(handler, 'proxies'):

3739

proxy_map.update(handler.proxies)

3740

write_debug(f'Proxy map: {proxy_map}')

3741

3742

# Not implemented

3743

if False and self.params.get('call_home'):

3744

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

3745

write_debug('Public IP address: %s' % ipaddr)

3746

latest_version = self.urlopen(

3747

'https://yt-dl.org/latest/version').read().decode()

3748

if version_tuple(latest_version) > version_tuple(__version__):

3749

self.report_warning(

3750

'You are using an outdated version (newest version: %s)! '

3751

'See https://yt-dl.org/update if you need help updating.' %

3752

latest_version)

3753

3754

def _setup_opener(self):

3755

if hasattr(self, '_opener'):

3756

return

3757

timeout_val = self.params.get('socket_timeout')

3758

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3759

3760

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3761

opts_cookiefile = self.params.get('cookiefile')

3762

opts_proxy = self.params.get('proxy')

3763

3764

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3765

3766

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3767

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3772

else:

3773

proxies = urllib.request.getproxies()

3774

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3775

if 'http' in proxies and 'https' not in proxies:

3776

proxies['https'] = proxies['http']

3777

proxy_handler = PerRequestProxyHandler(proxies)

3778

3779

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3780

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3781

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3782

redirect_handler = YoutubeDLRedirectHandler()

3783

data_handler = urllib.request.DataHandler()

3784

3785

# When passing our own FileHandler instance, build_opener won't add the

3786

# default FileHandler and allows us to disable the file protocol, which

3787

# can be used for malicious purposes (see

3788

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3789

file_handler = urllib.request.FileHandler()

3790

3791

def file_open(*args, **kwargs):

3792

raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3793

file_handler.file_open = file_open

3794

3795

opener = urllib.request.build_opener(

3796

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3797

3798

# Delete the default user-agent header, which would otherwise apply in

3799

# cases where our custom HTTP handler doesn't come into play

3800

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3801

opener.addheaders = []

3802

self._opener = opener

3803

3804

def encode(self, s):

3805

if isinstance(s, bytes):

3806

return s # Already encoded

3807

3808

try:

3809

return s.encode(self.get_encoding())

3810

except UnicodeEncodeError as err:

3811

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3812

raise

3813

3814

def get_encoding(self):

3815

encoding = self.params.get('encoding')

3816

if encoding is None:

3817

encoding = preferredencoding()

3818

return encoding

3819

3820

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3821

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3822

if overwrite is None:

3823

overwrite = self.params.get('overwrites', True)

3824

if not self.params.get('writeinfojson'):

3825

return False

3826

elif not infofn:

3827

self.write_debug(f'Skipping writing {label} infojson')

3828

return False

3829

elif not self._ensure_dir_exists(infofn):

3830

return None

3831

elif not overwrite and os.path.exists(infofn):

3832

self.to_screen(f'[info] {label.title()} metadata is already present')

3833

return 'exists'

3834

3835

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3836

try:

3837

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3838

return True

3839

except OSError:

3840

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

3841

return None

3842

3843

def _write_description(self, label, ie_result, descfn):

3844

''' Write description and returns True = written, False = skip, None = error '''

3845

if not self.params.get('writedescription'):

3846

return False

3847

elif not descfn:

3848

self.write_debug(f'Skipping writing {label} description')

3849

return False

3850

elif not self._ensure_dir_exists(descfn):

3851

return None

3852

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3853

self.to_screen(f'[info] {label.title()} description is already present')

3854

elif ie_result.get('description') is None:

3855

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3860

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3861

descfile.write(ie_result['description'])

3862

except OSError:

3863

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3868

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3869

ret = []

3870

subtitles = info_dict.get('requested_subtitles')

3871

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3872

# subtitles download errors are already managed as troubles in relevant IE

3873

# that way it will silently go on when used with unsupporting IE

3874

return ret

3875

3876

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3877

if not sub_filename_base:

3878

self.to_screen('[info] Skipping writing video subtitles')

3879

return ret

3880

for sub_lang, sub_info in subtitles.items():

3881

sub_format = sub_info['ext']

3882

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3883

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3884

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3885

if existing_sub:

3886

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3887

sub_info['filepath'] = existing_sub

3888

ret.append((existing_sub, sub_filename_final))

3889

continue

3890

3891

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3892

if sub_info.get('data') is not None:

3893

try:

3894

# Use newline='' to prevent conversion of newline characters

3895

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3896

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3897

subfile.write(sub_info['data'])

3898

sub_info['filepath'] = sub_filename

3899

ret.append((sub_filename, sub_filename_final))

3900

continue

3901

except OSError:

3902

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3907

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3908

self.dl(sub_filename, sub_copy, subtitle=True)

3909

sub_info['filepath'] = sub_filename

3910

ret.append((sub_filename, sub_filename_final))

3911

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3912

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

3913

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

3914

if not self.params.get('ignoreerrors'):

3915

self.report_error(msg)

3916

raise DownloadError(msg)

3917

self.report_warning(msg)

3918

return ret

3919

3920

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3921

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3922

write_all = self.params.get('write_all_thumbnails', False)

3923

thumbnails, ret = [], []

3924

if write_all or self.params.get('writethumbnail', False):

3925

thumbnails = info_dict.get('thumbnails') or []

3926

multiple = write_all and len(thumbnails) > 1

3927

3928

if thumb_filename_base is None:

3929

thumb_filename_base = filename

3930

if thumbnails and not thumb_filename_base:

3931

self.write_debug(f'Skipping writing {label} thumbnail')

3932

return ret

3933

3934

for idx, t in list(enumerate(thumbnails))[::-1]:

3935

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3936

thumb_display_id = f'{label} thumbnail {t["id"]}'

3937

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3938

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3939

3940

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

3941

if existing_thumb:

3942

self.to_screen('[info] %s is already present' % (

3943

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3944

t['filepath'] = existing_thumb

3945

ret.append((existing_thumb, thumb_filename_final))

3946

else:

3947

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3948

try:

3949

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

3950

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3951

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3952

shutil.copyfileobj(uf, thumbf)

3953

ret.append((thumb_filename, thumb_filename_final))

3954

t['filepath'] = thumb_filename

3955

except network_exceptions as err:

3956

thumbnails.pop(idx)

3957

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3958

if ret and not write_all:

3959

break

3960

return ret