jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	import collections
	3	import contextlib
	4	import datetime
	5	import errno
	6	import fileinput
	7	import functools
	8	import io
	9	import itertools
	10	import json
	11	import locale
	12	import operator
	13	import os
	14	import platform
	15	import random
	16	import re
	17	import shutil
	18	import subprocess
	19	import sys
	20	import tempfile
	21	import time
	22	import tokenize
	23	import traceback
	24	import unicodedata
	25	import urllib.request
	26	from string import ascii_letters
	27
	28	from .cache import Cache
	29	from .compat import (
	30	HAS_LEGACY as compat_has_legacy,
	31	compat_get_terminal_size,
	32	compat_os_name,
	33	compat_shlex_quote,
	34	compat_str,
	35	compat_urllib_error,
	36	compat_urllib_request,
	37	)
	38	from .cookies import load_cookies
	39	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	40	from .downloader.rtmp import rtmpdump_version
	41	from .extractor import gen_extractor_classes, get_info_extractor
	42	from .extractor.openload import PhantomJSwrapper
	43	from .minicurses import format_text
	44	from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
	45	from .postprocessor import (
	46	EmbedThumbnailPP,
	47	FFmpegFixupDuplicateMoovPP,
	48	FFmpegFixupDurationPP,
	49	FFmpegFixupM3u8PP,
	50	FFmpegFixupM4aPP,
	51	FFmpegFixupStretchedPP,
	52	FFmpegFixupTimestampPP,
	53	FFmpegMergerPP,
	54	FFmpegPostProcessor,
	55	MoveFilesAfterDownloadPP,
	56	get_postprocessor,
	57	)
	58	from .update import detect_variant
	59	from .utils import (
	60	DEFAULT_OUTTMPL,
	61	LINK_TEMPLATES,
	62	NO_DEFAULT,
	63	NUMBER_RE,
	64	OUTTMPL_TYPES,
	65	POSTPROCESS_WHEN,
	66	STR_FORMAT_RE_TMPL,
	67	STR_FORMAT_TYPES,
	68	ContentTooShortError,
	69	DateRange,
	70	DownloadCancelled,
	71	DownloadError,
	72	EntryNotInPlaylist,
	73	ExistingVideoReached,
	74	ExtractorError,
	75	GeoRestrictedError,
	76	HEADRequest,
	77	ISO3166Utils,
	78	LazyList,
	79	MaxDownloadsReached,
	80	Namespace,
	81	PagedList,
	82	PerRequestProxyHandler,
	83	PlaylistEntries,
	84	Popen,
	85	PostProcessingError,
	86	ReExtractInfo,
	87	RejectedVideoReached,
	88	SameFileError,
	89	UnavailableVideoError,
	90	YoutubeDLCookieProcessor,
	91	YoutubeDLHandler,
	92	YoutubeDLRedirectHandler,
	93	age_restricted,
	94	args_to_str,
	95	date_from_str,
	96	determine_ext,
	97	determine_protocol,
	98	encode_compat_str,
	99	encodeFilename,
	100	error_to_compat_str,
	101	expand_path,
	102	filter_dict,
	103	float_or_none,
	104	format_bytes,
	105	format_decimal_suffix,
	106	format_field,
	107	formatSeconds,
	108	get_domain,
	109	int_or_none,
	110	iri_to_uri,
	111	join_nonempty,
	112	locked_file,
	113	make_dir,
	114	make_HTTPS_handler,
	115	merge_headers,
	116	network_exceptions,
	117	number_of_digits,
	118	orderedSet,
	119	parse_filesize,
	120	platform_name,
	121	preferredencoding,
	122	prepend_extension,
	123	register_socks_protocols,
	124	remove_terminal_sequences,
	125	render_table,
	126	replace_extension,
	127	sanitize_filename,
	128	sanitize_path,
	129	sanitize_url,
	130	sanitized_Request,
	131	std_headers,
	132	str_or_none,
	133	strftime_or_none,
	134	subtitles_filename,
	135	supports_terminal_sequences,
	136	timetuple_from_msec,
	137	to_high_limit_path,
	138	traverse_obj,
	139	try_get,
	140	url_basename,
	141	variadic,
	142	version_tuple,
	143	windows_enable_vt_mode,
	144	write_json_file,
	145	write_string,
	146	)
	147	from .version import RELEASE_GIT_HEAD, __version__
	148
	149	if compat_os_name == 'nt':
	150	import ctypes
	151
	152
	153	class YoutubeDL:
	154	"""YoutubeDL class.
	155
	156	YoutubeDL objects are the ones responsible of downloading the
	157	actual video file and writing it to disk if the user has requested
	158	it, among some other tasks. In most cases there should be one per
	159	program. As, given a video URL, the downloader doesn't know how to
	160	extract all the needed information, task that InfoExtractors do, it
	161	has to pass the URL to one of them.
	162
	163	For this, YoutubeDL objects have a method that allows
	164	InfoExtractors to be registered in a given order. When it is passed
	165	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	166	finds that reports being able to handle it. The InfoExtractor extracts
	167	all the information about the video or videos the URL refers to, and
	168	YoutubeDL process the extracted information, possibly using a File
	169	Downloader to download the video.
	170
	171	YoutubeDL objects accept a lot of parameters. In order not to saturate
	172	the object constructor with arguments, it receives a dictionary of
	173	options instead. These options are available through the params
	174	attribute for the InfoExtractors to use. The YoutubeDL also
	175	registers itself as the downloader in charge for the InfoExtractors
	176	that are added to it, so this is a "mutual registration".
	177
	178	Available options:
	179
	180	username: Username for authentication purposes.
	181	password: Password for authentication purposes.
	182	videopassword: Password for accessing a video.
	183	ap_mso: Adobe Pass multiple-system operator identifier.
	184	ap_username: Multiple-system operator account username.
	185	ap_password: Multiple-system operator account password.
	186	usenetrc: Use netrc for authentication instead.
	187	verbose: Print additional info to stdout.
	188	quiet: Do not print messages to stdout.
	189	no_warnings: Do not print out anything for warnings.
	190	forceprint: A dict with keys WHEN mapped to a list of templates to
	191	print to stdout. The allowed keys are video or any of the
	192	items in utils.POSTPROCESS_WHEN.
	193	For compatibility, a single list is also accepted
	194	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	195	a list of tuples with (template, filename)
	196	forcejson: Force printing info_dict as JSON.
	197	dump_single_json: Force printing the info_dict of the whole playlist
	198	(or video) as a single JSON line.
	199	force_write_download_archive: Force writing download archive regardless
	200	of 'skip_download' or 'simulate'.
	201	simulate: Do not download the video files. If unset (or None),
	202	simulate only if listsubtitles, listformats or list_thumbnails is used
	203	format: Video format code. see "FORMAT SELECTION" for more details.
	204	You can also pass a function. The function takes 'ctx' as
	205	argument and returns the formats to download.
	206	See "build_format_selector" for an implementation
	207	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	208	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	209	extracting metadata even if the video is not actually
	210	available for download (experimental)
	211	format_sort: A list of fields by which to sort the video formats.
	212	See "Sorting Formats" for more details.
	213	format_sort_force: Force the given format_sort. see "Sorting Formats"
	214	for more details.
	215	prefer_free_formats: Whether to prefer video formats with free containers
	216	over non-free ones of same quality.
	217	allow_multiple_video_streams: Allow multiple video streams to be merged
	218	into a single file
	219	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	220	into a single file
	221	check_formats Whether to test if the formats are downloadable.
	222	Can be True (check all), False (check none),
	223	'selected' (check selected formats),
	224	or None (check only if requested by extractor)
	225	paths: Dictionary of output paths. The allowed keys are 'home'
	226	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	227	outtmpl: Dictionary of templates for output names. Allowed keys
	228	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	229	For compatibility with youtube-dl, a single string can also be used
	230	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	231	restrictfilenames: Do not allow "&" and spaces in file names
	232	trim_file_name: Limit length of filename (extension excluded)
	233	windowsfilenames: Force the filenames to be windows compatible
	234	ignoreerrors: Do not stop on download/postprocessing errors.
	235	Can be 'only_download' to ignore only download errors.
	236	Default is 'only_download' for CLI, but False for API
	237	skip_playlist_after_errors: Number of allowed failures until the rest of
	238	the playlist is skipped
	239	force_generic_extractor: Force downloader to use the generic extractor
	240	overwrites: Overwrite all video and metadata files if True,
	241	overwrite only non-video files if None
	242	and don't overwrite any file if False
	243	For compatibility with youtube-dl,
	244	"nooverwrites" may also be used instead
	245	playliststart: Playlist item to start at.
	246	playlistend: Playlist item to end at.
	247	playlist_items: Specific indices of playlist to download.
	248	playlistreverse: Download playlist items in reverse order.
	249	playlistrandom: Download playlist items in random order.
	250	matchtitle: Download only matching titles.
	251	rejecttitle: Reject downloads for matching titles.
	252	logger: Log messages to a logging.Logger instance.
	253	logtostderr: Log messages to stderr instead of stdout.
	254	consoletitle: Display progress in console window's titlebar.
	255	writedescription: Write the video description to a .description file
	256	writeinfojson: Write the video description to a .info.json file
	257	clean_infojson: Remove private fields from the infojson
	258	getcomments: Extract video comments. This will not be written to disk
	259	unless writeinfojson is also given
	260	writeannotations: Write the video annotations to a .annotations.xml file
	261	writethumbnail: Write the thumbnail image to a file
	262	allow_playlist_files: Whether to write playlists' description, infojson etc
	263	also to disk when using the 'write*' options
	264	write_all_thumbnails: Write all thumbnail formats to files
	265	writelink: Write an internet shortcut file, depending on the
	266	current platform (.url/.webloc/.desktop)
	267	writeurllink: Write a Windows internet shortcut file (.url)
	268	writewebloclink: Write a macOS internet shortcut file (.webloc)
	269	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	270	writesubtitles: Write the video subtitles to a file
	271	writeautomaticsub: Write the automatically generated subtitles to a file
	272	listsubtitles: Lists all available subtitles for the video
	273	subtitlesformat: The format code for subtitles
	274	subtitleslangs: List of languages of the subtitles to download (can be regex).
	275	The list may contain "all" to refer to all the available
	276	subtitles. The language can be prefixed with a "-" to
	277	exclude it from the requested languages. Eg: ['all', '-live_chat']
	278	keepvideo: Keep the video file after post-processing
	279	daterange: A DateRange object, download only if the upload_date is in the range.
	280	skip_download: Skip the actual download of the video file
	281	cachedir: Location of the cache files in the filesystem.
	282	False to disable filesystem cache.
	283	noplaylist: Download single video instead of a playlist if in doubt.
	284	age_limit: An integer representing the user's age in years.
	285	Unsuitable videos for the given age are skipped.
	286	min_views: An integer representing the minimum view count the video
	287	must have in order to not be skipped.
	288	Videos without view count information are always
	289	downloaded. None for no limit.
	290	max_views: An integer representing the maximum view count.
	291	Videos that are more popular than that are not
	292	downloaded.
	293	Videos without view count information are always
	294	downloaded. None for no limit.
	295	download_archive: File name of a file where all downloads are recorded.
	296	Videos already present in the file are not downloaded
	297	again.
	298	break_on_existing: Stop the download process after attempting to download a
	299	file that is in the archive.
	300	break_on_reject: Stop the download process when encountering a video that
	301	has been filtered out.
	302	break_per_url: Whether break_on_reject and break_on_existing
	303	should act on each input URL as opposed to for the entire queue
	304	cookiefile: File name or text stream from where cookies should be read and dumped to
	305	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	306	name/pathfrom where cookies are loaded, and the name of the
	307	keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
	308	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	309	support RFC 5746 secure renegotiation
	310	nocheckcertificate: Do not verify SSL certificates
	311	client_certificate: Path to client certificate file in PEM format. May include the private key
	312	client_certificate_key: Path to private key file for client certificate
	313	client_certificate_password: Password for client certificate private key, if encrypted.
	314	If not provided and the key is encrypted, yt-dlp will ask interactively
	315	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	316	At the moment, this is only supported by YouTube.
	317	http_headers: A dictionary of custom headers to be used for all requests
	318	proxy: URL of the proxy server to use
	319	geo_verification_proxy: URL of the proxy to use for IP address verification
	320	on geo-restricted sites.
	321	socket_timeout: Time to wait for unresponsive hosts, in seconds
	322	bidi_workaround: Work around buggy terminals without bidirectional text
	323	support, using fridibi
	324	debug_printtraffic:Print out sent and received HTTP traffic
	325	default_search: Prepend this string if an input url is not valid.
	326	'auto' for elaborate guessing
	327	encoding: Use this encoding instead of the system-specified.
	328	extract_flat: Do not resolve URLs, return the immediate result.
	329	Pass in 'in_playlist' to only show this behavior for
	330	playlist items.
	331	wait_for_video: If given, wait for scheduled streams to become available.
	332	The value should be a tuple containing the range
	333	(min_secs, max_secs) to wait between retries
	334	postprocessors: A list of dictionaries, each with an entry
	335	* key: The name of the postprocessor. See
	336	yt_dlp/postprocessor/__init__.py for a list.
	337	* when: When to run the postprocessor. Allowed values are
	338	the entries of utils.POSTPROCESS_WHEN
	339	Assumed to be 'post_process' if not given
	340	progress_hooks: A list of functions that get called on download
	341	progress, with a dictionary with the entries
	342	* status: One of "downloading", "error", or "finished".
	343	Check this first and ignore unknown values.
	344	* info_dict: The extracted info_dict
	345
	346	If status is one of "downloading", or "finished", the
	347	following properties may also be present:
	348	* filename: The final filename (always present)
	349	* tmpfilename: The filename we're currently writing to
	350	* downloaded_bytes: Bytes on disk
	351	* total_bytes: Size of the whole file, None if unknown
	352	* total_bytes_estimate: Guess of the eventual file size,
	353	None if unavailable.
	354	* elapsed: The number of seconds since download started.
	355	* eta: The estimated time in seconds, None if unknown
	356	* speed: The download speed in bytes/second, None if
	357	unknown
	358	* fragment_index: The counter of the currently
	359	downloaded video fragment.
	360	* fragment_count: The number of fragments (= individual
	361	files that will be merged)
	362
	363	Progress hooks are guaranteed to be called at least once
	364	(with status "finished") if the download is successful.
	365	postprocessor_hooks: A list of functions that get called on postprocessing
	366	progress, with a dictionary with the entries
	367	* status: One of "started", "processing", or "finished".
	368	Check this first and ignore unknown values.
	369	* postprocessor: Name of the postprocessor
	370	* info_dict: The extracted info_dict
	371
	372	Progress hooks are guaranteed to be called at least twice
	373	(with status "started" and "finished") if the processing is successful.
	374	merge_output_format: Extension to use when merging formats.
	375	final_ext: Expected final extension; used to detect when the file was
	376	already downloaded and converted
	377	fixup: Automatically correct known faults of the file.
	378	One of:
	379	- "never": do nothing
	380	- "warn": only emit a warning
	381	- "detect_or_warn": check whether we can do anything
	382	about it, warn otherwise (default)
	383	source_address: Client-side IP address to bind to.
	384	sleep_interval_requests: Number of seconds to sleep between requests
	385	during extraction
	386	sleep_interval: Number of seconds to sleep before each download when
	387	used alone or a lower bound of a range for randomized
	388	sleep before each download (minimum possible number
	389	of seconds to sleep) when used along with
	390	max_sleep_interval.
	391	max_sleep_interval:Upper bound of a range for randomized sleep before each
	392	download (maximum possible number of seconds to sleep).
	393	Must only be used along with sleep_interval.
	394	Actual sleep time will be a random float from range
	395	[sleep_interval; max_sleep_interval].
	396	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	397	listformats: Print an overview of available video formats and exit.
	398	list_thumbnails: Print a table of all thumbnails and exit.
	399	match_filter: A function that gets called for every video with the signature
	400	(info_dict, *, incomplete: bool) -> Optional[str]
	401	For backward compatibility with youtube-dl, the signature
	402	(info_dict) -> Optional[str] is also allowed.
	403	- If it returns a message, the video is ignored.
	404	- If it returns None, the video is downloaded.
	405	- If it returns utils.NO_DEFAULT, the user is interactively
	406	asked whether to download the video.
	407	match_filter_func in utils.py is one example for this.
	408	no_color: Do not emit color codes in output.
	409	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	410	HTTP header
	411	geo_bypass_country:
	412	Two-letter ISO 3166-2 country code that will be used for
	413	explicit geographic restriction bypassing via faking
	414	X-Forwarded-For HTTP header
	415	geo_bypass_ip_block:
	416	IP range in CIDR notation that will be used similarly to
	417	geo_bypass_country
	418	external_downloader: A dictionary of protocol keys and the executable of the
	419	external downloader to use for it. The allowed protocols
	420	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	421	Set the value to 'native' to use the native downloader
	422	compat_opts: Compatibility options. See "Differences in default behavior".
	423	The following options do not work when used through the API:
	424	filename, abort-on-error, multistreams, no-live-chat, format-sort
	425	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	426	Refer __init__.py for their implementation
	427	progress_template: Dictionary of templates for progress outputs.
	428	Allowed keys are 'download', 'postprocess',
	429	'download-title' (console title) and 'postprocess-title'.
	430	The template is mapped on a dictionary with keys 'progress' and 'info'
	431	retry_sleep_functions: Dictionary of functions that takes the number of attempts
	432	as argument and returns the time to sleep in seconds.
	433	Allowed keys are 'http', 'fragment', 'file_access'
	434	download_ranges: A function that gets called for every video with the signature
	435	(info_dict, *, ydl) -> Iterable[Section].
	436	Only the returned sections will be downloaded. Each Section contains:
	437	* start_time: Start time of the section in seconds
	438	* end_time: End time of the section in seconds
	439	* title: Section title (Optional)
	440	* index: Section number (Optional)
	441
	442	The following parameters are not used by YoutubeDL itself, they are used by
	443	the downloader (see yt_dlp/downloader/common.py):
	444	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	445	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	446	continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	447	external_downloader_args, concurrent_fragment_downloads.
	448
	449	The following options are used by the post processors:
	450	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	451	to the binary or its containing directory.
	452	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	453	and a list of additional command-line arguments for the
	454	postprocessor/executable. The dict can also have "PP+EXE" keys
	455	which are used when the given exe is used by the given PP.
	456	Use 'default' as the name for arguments to passed to all PP
	457	For compatibility with youtube-dl, a single list of args
	458	can also be used
	459
	460	The following options are used by the extractors:
	461	extractor_retries: Number of times to retry for known errors
	462	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	463	hls_split_discontinuity: Split HLS playlists to different formats at
	464	discontinuities such as ad breaks (default: False)
	465	extractor_args: A dictionary of arguments to be passed to the extractors.
	466	See "EXTRACTOR ARGUMENTS" for details.
	467	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	468	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	469
	470	The following options are deprecated and may be removed in the future:
	471
	472	forceurl: - Use forceprint
	473	Force printing final URL.
	474	forcetitle: - Use forceprint
	475	Force printing title.
	476	forceid: - Use forceprint
	477	Force printing ID.
	478	forcethumbnail: - Use forceprint
	479	Force printing thumbnail URL.
	480	forcedescription: - Use forceprint
	481	Force printing description.
	482	forcefilename: - Use forceprint
	483	Force printing final filename.
	484	forceduration: - Use forceprint
	485	Force printing duration.
	486	allsubtitles: - Use subtitleslangs = ['all']
	487	Downloads all the subtitles of the video
	488	(requires writesubtitles or writeautomaticsub)
	489	include_ads: - Doesn't work
	490	Download ads as well
	491	call_home: - Not implemented
	492	Boolean, true iff we are allowed to contact the
	493	yt-dlp servers for debugging.
	494	post_hooks: - Register a custom postprocessor
	495	A list of functions that get called as the final step
	496	for each video file, after all postprocessors have been
	497	called. The filename will be passed as the only argument.
	498	hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
	499	Use the native HLS downloader instead of ffmpeg/avconv
	500	if True, otherwise use ffmpeg/avconv if False, otherwise

1

#!/usr/bin/env python3

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import platform

import random

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

import urllib.request

26

from string import ascii_letters

27

28

from .cache import Cache

29

from .compat import (

30

HAS_LEGACY as compat_has_legacy,

31

compat_get_terminal_size,

compat_os_name,

compat_shlex_quote,

compat_str,

compat_urllib_error,

compat_urllib_request,

37

)

38

from .cookies import load_cookies

39

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

40

from .downloader.rtmp import rtmpdump_version

41

from .extractor import gen_extractor_classes, get_info_extractor

42

from .extractor.openload import PhantomJSwrapper

43

from .minicurses import format_text

44

from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors

45

from .postprocessor import (

46

EmbedThumbnailPP,

47

FFmpegFixupDuplicateMoovPP,

48

FFmpegFixupDurationPP,

49

FFmpegFixupM3u8PP,

50

FFmpegFixupM4aPP,

51

FFmpegFixupStretchedPP,

52

FFmpegFixupTimestampPP,

53

FFmpegMergerPP,

54

FFmpegPostProcessor,

55

MoveFilesAfterDownloadPP,

56

get_postprocessor,

57

)

58

from .update import detect_variant

from .utils import (

DEFAULT_OUTTMPL,

LINK_TEMPLATES,

NO_DEFAULT,

NUMBER_RE,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

GeoRestrictedError,

HEADRequest,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

PlaylistEntries,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

88

SameFileError,

89

UnavailableVideoError,

90

YoutubeDLCookieProcessor,

91

YoutubeDLHandler,

92

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

date_from_str,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_domain,

int_or_none,

iri_to_uri,

join_nonempty,

locked_file,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

parse_filesize,

platform_name,

preferredencoding,

prepend_extension,

register_socks_protocols,

124

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_get,

url_basename,

variadic,

version_tuple,

windows_enable_vt_mode,

write_json_file,

write_string,

)

from .version import RELEASE_GIT_HEAD, __version__

148

149

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

157

actual video file and writing it to disk if the user has requested

158

it, among some other tasks. In most cases there should be one per

159

program. As, given a video URL, the downloader doesn't know how to

160

extract all the needed information, task that InfoExtractors do, it

161

has to pass the URL to one of them.

162

163

For this, YoutubeDL objects have a method that allows

164

InfoExtractors to be registered in a given order. When it is passed

165

a URL, the YoutubeDL object handles it to the first InfoExtractor it

166

finds that reports being able to handle it. The InfoExtractor extracts

167

all the information about the video or videos the URL refers to, and

168

YoutubeDL process the extracted information, possibly using a File

169

Downloader to download the video.

170

171

YoutubeDL objects accept a lot of parameters. In order not to saturate

172

the object constructor with arguments, it receives a dictionary of

173

options instead. These options are available through the params

174

attribute for the InfoExtractors to use. The YoutubeDL also

175

registers itself as the downloader in charge for the InfoExtractors

176

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

181

password: Password for authentication purposes.

182

videopassword: Password for accessing a video.

183

ap_mso: Adobe Pass multiple-system operator identifier.

184

ap_username: Multiple-system operator account username.

185

ap_password: Multiple-system operator account password.

186

usenetrc: Use netrc for authentication instead.

187

verbose: Print additional info to stdout.

188

quiet: Do not print messages to stdout.

189

no_warnings: Do not print out anything for warnings.

190

forceprint: A dict with keys WHEN mapped to a list of templates to

191

print to stdout. The allowed keys are video or any of the

192

items in utils.POSTPROCESS_WHEN.

193

For compatibility, a single list is also accepted

194

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

195

a list of tuples with (template, filename)

196

forcejson: Force printing info_dict as JSON.

197

dump_single_json: Force printing the info_dict of the whole playlist

198

(or video) as a single JSON line.

199

force_write_download_archive: Force writing download archive regardless

200

of 'skip_download' or 'simulate'.

201

simulate: Do not download the video files. If unset (or None),

202

simulate only if listsubtitles, listformats or list_thumbnails is used

203

format: Video format code. see "FORMAT SELECTION" for more details.

204

You can also pass a function. The function takes 'ctx' as

205

argument and returns the formats to download.

206

See "build_format_selector" for an implementation

207

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

208

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

209

extracting metadata even if the video is not actually

210

available for download (experimental)

211

format_sort: A list of fields by which to sort the video formats.

212

See "Sorting Formats" for more details.

213

format_sort_force: Force the given format_sort. see "Sorting Formats"

214

for more details.

215

prefer_free_formats: Whether to prefer video formats with free containers

216

over non-free ones of same quality.

217

allow_multiple_video_streams: Allow multiple video streams to be merged

218

into a single file

219

allow_multiple_audio_streams: Allow multiple audio streams to be merged

220

into a single file

221

check_formats Whether to test if the formats are downloadable.

222

Can be True (check all), False (check none),

223

'selected' (check selected formats),

224

or None (check only if requested by extractor)

225

paths: Dictionary of output paths. The allowed keys are 'home'

226

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

227

outtmpl: Dictionary of templates for output names. Allowed keys

228

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

229

For compatibility with youtube-dl, a single string can also be used

230

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

231

restrictfilenames: Do not allow "&" and spaces in file names

232

trim_file_name: Limit length of filename (extension excluded)

233

windowsfilenames: Force the filenames to be windows compatible

234

ignoreerrors: Do not stop on download/postprocessing errors.

235

Can be 'only_download' to ignore only download errors.

236

Default is 'only_download' for CLI, but False for API

237

skip_playlist_after_errors: Number of allowed failures until the rest of

238

the playlist is skipped

239

force_generic_extractor: Force downloader to use the generic extractor

240

overwrites: Overwrite all video and metadata files if True,

241

overwrite only non-video files if None

242

and don't overwrite any file if False

243

For compatibility with youtube-dl,

244

"nooverwrites" may also be used instead

245

playliststart: Playlist item to start at.

246

playlistend: Playlist item to end at.

247

playlist_items: Specific indices of playlist to download.

248

playlistreverse: Download playlist items in reverse order.

249

playlistrandom: Download playlist items in random order.

250

matchtitle: Download only matching titles.

251

rejecttitle: Reject downloads for matching titles.

252

logger: Log messages to a logging.Logger instance.

253

logtostderr: Log messages to stderr instead of stdout.

254

consoletitle: Display progress in console window's titlebar.

255

writedescription: Write the video description to a .description file

256

writeinfojson: Write the video description to a .info.json file

257

clean_infojson: Remove private fields from the infojson

258

getcomments: Extract video comments. This will not be written to disk

259

unless writeinfojson is also given

260

writeannotations: Write the video annotations to a .annotations.xml file

261

writethumbnail: Write the thumbnail image to a file

262

allow_playlist_files: Whether to write playlists' description, infojson etc

263

also to disk when using the 'write*' options

264

write_all_thumbnails: Write all thumbnail formats to files

265

writelink: Write an internet shortcut file, depending on the

266

current platform (.url/.webloc/.desktop)

267

writeurllink: Write a Windows internet shortcut file (.url)

268

writewebloclink: Write a macOS internet shortcut file (.webloc)

269

writedesktoplink: Write a Linux internet shortcut file (.desktop)

270

writesubtitles: Write the video subtitles to a file

271

writeautomaticsub: Write the automatically generated subtitles to a file

272

listsubtitles: Lists all available subtitles for the video

273

subtitlesformat: The format code for subtitles

274

subtitleslangs: List of languages of the subtitles to download (can be regex).

275

The list may contain "all" to refer to all the available

276

subtitles. The language can be prefixed with a "-" to

277

exclude it from the requested languages. Eg: ['all', '-live_chat']

278

keepvideo: Keep the video file after post-processing

279

daterange: A DateRange object, download only if the upload_date is in the range.

280

skip_download: Skip the actual download of the video file

281

cachedir: Location of the cache files in the filesystem.

282

False to disable filesystem cache.

283

noplaylist: Download single video instead of a playlist if in doubt.

284

age_limit: An integer representing the user's age in years.

285

Unsuitable videos for the given age are skipped.

286

min_views: An integer representing the minimum view count the video

287

must have in order to not be skipped.

288

Videos without view count information are always

289

downloaded. None for no limit.

290

max_views: An integer representing the maximum view count.

291

Videos that are more popular than that are not

292

downloaded.

293

Videos without view count information are always

294

downloaded. None for no limit.

295

download_archive: File name of a file where all downloads are recorded.

296

Videos already present in the file are not downloaded

297

again.

298

break_on_existing: Stop the download process after attempting to download a

299

file that is in the archive.

300

break_on_reject: Stop the download process when encountering a video that

301

has been filtered out.

302

break_per_url: Whether break_on_reject and break_on_existing

303

should act on each input URL as opposed to for the entire queue

304

cookiefile: File name or text stream from where cookies should be read and dumped to

305

cookiesfrombrowser: A tuple containing the name of the browser, the profile

306

name/pathfrom where cookies are loaded, and the name of the

307

keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')

308

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

309

support RFC 5746 secure renegotiation

310

nocheckcertificate: Do not verify SSL certificates

311

client_certificate: Path to client certificate file in PEM format. May include the private key

312

client_certificate_key: Path to private key file for client certificate

313

client_certificate_password: Password for client certificate private key, if encrypted.

314

If not provided and the key is encrypted, yt-dlp will ask interactively

315

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

316

At the moment, this is only supported by YouTube.

317

http_headers: A dictionary of custom headers to be used for all requests

318

proxy: URL of the proxy server to use

319

geo_verification_proxy: URL of the proxy to use for IP address verification

320

on geo-restricted sites.

321

socket_timeout: Time to wait for unresponsive hosts, in seconds

322

bidi_workaround: Work around buggy terminals without bidirectional text

323

support, using fridibi

324

debug_printtraffic:Print out sent and received HTTP traffic

325

default_search: Prepend this string if an input url is not valid.

326

'auto' for elaborate guessing

327

encoding: Use this encoding instead of the system-specified.

328

extract_flat: Do not resolve URLs, return the immediate result.

329

Pass in 'in_playlist' to only show this behavior for

330

playlist items.

331

wait_for_video: If given, wait for scheduled streams to become available.

332

The value should be a tuple containing the range

333

(min_secs, max_secs) to wait between retries

334

postprocessors: A list of dictionaries, each with an entry

335

* key: The name of the postprocessor. See

336

yt_dlp/postprocessor/__init__.py for a list.

337

* when: When to run the postprocessor. Allowed values are

338

the entries of utils.POSTPROCESS_WHEN

339

Assumed to be 'post_process' if not given

340

progress_hooks: A list of functions that get called on download

341

progress, with a dictionary with the entries

342

* status: One of "downloading", "error", or "finished".

343

Check this first and ignore unknown values.

344

* info_dict: The extracted info_dict

345

346

If status is one of "downloading", or "finished", the

347

following properties may also be present:

348

* filename: The final filename (always present)

349

* tmpfilename: The filename we're currently writing to

350

* downloaded_bytes: Bytes on disk

351

* total_bytes: Size of the whole file, None if unknown

352

* total_bytes_estimate: Guess of the eventual file size,

353

None if unavailable.

354

* elapsed: The number of seconds since download started.

355

* eta: The estimated time in seconds, None if unknown

356

* speed: The download speed in bytes/second, None if

357

unknown

358

* fragment_index: The counter of the currently

359

downloaded video fragment.

360

* fragment_count: The number of fragments (= individual

361

files that will be merged)

362

363

Progress hooks are guaranteed to be called at least once

364

(with status "finished") if the download is successful.

365

postprocessor_hooks: A list of functions that get called on postprocessing

366

progress, with a dictionary with the entries

367

* status: One of "started", "processing", or "finished".

368

Check this first and ignore unknown values.

369

* postprocessor: Name of the postprocessor

370

* info_dict: The extracted info_dict

371

372

Progress hooks are guaranteed to be called at least twice

373

(with status "started" and "finished") if the processing is successful.

374

merge_output_format: Extension to use when merging formats.

375

final_ext: Expected final extension; used to detect when the file was

376

already downloaded and converted

377

fixup: Automatically correct known faults of the file.

378

One of:

379

- "never": do nothing

380

- "warn": only emit a warning

381

- "detect_or_warn": check whether we can do anything

382

about it, warn otherwise (default)

383

source_address: Client-side IP address to bind to.

384

sleep_interval_requests: Number of seconds to sleep between requests

385

during extraction

386

sleep_interval: Number of seconds to sleep before each download when

387

used alone or a lower bound of a range for randomized

388

sleep before each download (minimum possible number

389

of seconds to sleep) when used along with

390

max_sleep_interval.

391

max_sleep_interval:Upper bound of a range for randomized sleep before each

392

download (maximum possible number of seconds to sleep).

393

Must only be used along with sleep_interval.

394

Actual sleep time will be a random float from range

395

[sleep_interval; max_sleep_interval].

396

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

397

listformats: Print an overview of available video formats and exit.

398

list_thumbnails: Print a table of all thumbnails and exit.

399

match_filter: A function that gets called for every video with the signature

400

(info_dict, *, incomplete: bool) -> Optional[str]

401

For backward compatibility with youtube-dl, the signature

402

(info_dict) -> Optional[str] is also allowed.

403

- If it returns a message, the video is ignored.

404

- If it returns None, the video is downloaded.

405

- If it returns utils.NO_DEFAULT, the user is interactively

406

asked whether to download the video.

407

match_filter_func in utils.py is one example for this.

408

no_color: Do not emit color codes in output.

409

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

410

HTTP header

411

geo_bypass_country:

412

Two-letter ISO 3166-2 country code that will be used for

413

explicit geographic restriction bypassing via faking

414

X-Forwarded-For HTTP header

415

geo_bypass_ip_block:

416

IP range in CIDR notation that will be used similarly to

417

geo_bypass_country

418

external_downloader: A dictionary of protocol keys and the executable of the

419

external downloader to use for it. The allowed protocols

420

421

Set the value to 'native' to use the native downloader

422

compat_opts: Compatibility options. See "Differences in default behavior".

423

The following options do not work when used through the API:

424

filename, abort-on-error, multistreams, no-live-chat, format-sort

425

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

426

Refer __init__.py for their implementation

427

progress_template: Dictionary of templates for progress outputs.

428

Allowed keys are 'download', 'postprocess',

429

'download-title' (console title) and 'postprocess-title'.

430

The template is mapped on a dictionary with keys 'progress' and 'info'

431

retry_sleep_functions: Dictionary of functions that takes the number of attempts

432

as argument and returns the time to sleep in seconds.

433

Allowed keys are 'http', 'fragment', 'file_access'

434

download_ranges: A function that gets called for every video with the signature

435

(info_dict, *, ydl) -> Iterable[Section].

436

Only the returned sections will be downloaded. Each Section contains:

437

* start_time: Start time of the section in seconds

438

* end_time: End time of the section in seconds

439

* title: Section title (Optional)

440

* index: Section number (Optional)

441

442

The following parameters are not used by YoutubeDL itself, they are used by

443

the downloader (see yt_dlp/downloader/common.py):

444

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

445

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

446

continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

447

external_downloader_args, concurrent_fragment_downloads.

448

449

The following options are used by the post processors:

450

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

451

to the binary or its containing directory.

452

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

453

and a list of additional command-line arguments for the

454

postprocessor/executable. The dict can also have "PP+EXE" keys

455

which are used when the given exe is used by the given PP.

456

Use 'default' as the name for arguments to passed to all PP

457

For compatibility with youtube-dl, a single list of args

458

can also be used

459

460

The following options are used by the extractors:

461

extractor_retries: Number of times to retry for known errors

462

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

463

hls_split_discontinuity: Split HLS playlists to different formats at

464

discontinuities such as ad breaks (default: False)

465

extractor_args: A dictionary of arguments to be passed to the extractors.

466

See "EXTRACTOR ARGUMENTS" for details.

467

Eg: {'youtube': {'skip': ['dash', 'hls']}}

468

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

469

470

The following options are deprecated and may be removed in the future:

471

472

forceurl: - Use forceprint

473

Force printing final URL.

474

forcetitle: - Use forceprint

475

Force printing title.

476

forceid: - Use forceprint

477

Force printing ID.

478

forcethumbnail: - Use forceprint

479

Force printing thumbnail URL.

480

forcedescription: - Use forceprint

481

Force printing description.

482

forcefilename: - Use forceprint

483

Force printing final filename.

484

forceduration: - Use forceprint

485

Force printing duration.

486

allsubtitles: - Use subtitleslangs = ['all']

487

Downloads all the subtitles of the video

488

(requires writesubtitles or writeautomaticsub)

489

include_ads: - Doesn't work

490

Download ads as well

491

call_home: - Not implemented

492

Boolean, true iff we are allowed to contact the

493

yt-dlp servers for debugging.

494

post_hooks: - Register a custom postprocessor

495

A list of functions that get called as the final step

496

for each video file, after all postprocessors have been

497

called. The filename will be passed as the only argument.

498

hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.

499

Use the native HLS downloader instead of ffmpeg/avconv

500

if True, otherwise use ffmpeg/avconv if False, otherwise

501

use downloader suggested by extractor if None.

502

prefer_ffmpeg: - avconv support is deprecated

503

If False, use avconv instead of ffmpeg if both are available,

504

otherwise prefer ffmpeg.

505

youtube_include_dash_manifest: - Use extractor_args

506

If True (default), DASH manifests and related

507

data will be downloaded and processed by extractor.

508

You can reduce network I/O by disabling it if you don't

509

care about DASH. (only for youtube)

510

youtube_include_hls_manifest: - Use extractor_args

511

If True (default), HLS manifests and related

512

data will be downloaded and processed by extractor.

513

You can reduce network I/O by disabling it if you don't

514

care about HLS. (only for youtube)

"""

_NUMERIC_FIELDS = {

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

519

'timestamp', 'release_timestamp',

520

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

521

'average_rating', 'comment_count', 'age_limit',

522

'start_time', 'end_time',

523

'chapter_number', 'season_number', 'episode_number',

524

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

529

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

530

'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',

531

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',

532

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

533

'preference', 'language', 'language_preference', 'quality', 'source_preference',

534

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',

535

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

536

}

537

_format_selection_exts = {

538

'audio': {'m4a', 'mp3', 'ogg', 'aac'},

539

'video': {'mp4', 'flv', 'webm', '3gp'},

540

'storyboards': {'mhtml'},

541

}

542

543

def __init__(self, params=None, auto_init=True):

544

"""Create a FileDownloader object with the given options.

545

@param auto_init Whether to load the default extractors and print header (if verbose).

546

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

553

self._pps = {k: [] for k in POSTPROCESS_WHEN}

554

self._printed_messages = set()

555

self._first_webpage_request = True

556

self._post_hooks = []

557

self._progress_hooks = []

558

self._postprocessor_hooks = []

559

self._download_retcode = 0

560

self._num_downloads = 0

561

self._num_videos = 0

562

self._playlist_level = 0

563

self._playlist_urls = set()

564

self.cache = Cache(self)

565

566

windows_enable_vt_mode()

567

stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout

568

self._out_files = Namespace(

569

out=stdout,

570

error=sys.stderr,

571

screen=sys.stderr if self.params.get('quiet') else stdout,

572

console=None if compat_os_name == 'nt' else next(

573

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

574

)

575

self._allow_colors = Namespace(**{

576

type_: not self.params.get('no_color') and supports_terminal_sequences(stream)

577

for type_, stream in self._out_files.items_ if type_ != 'console'

578

})

579

580

if sys.version_info < (3, 6):

581

self.report_warning(

582

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

583

584

if self.params.get('allow_unplayable_formats'):

585

self.report_warning(

586

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

587

'This is a developer option intended for debugging. \n'

588

' If you experience any issues while using this option, '

589

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

590

591

def check_deprecated(param, option, suggestion):

592

if self.params.get(param) is not None:

593

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

598

if self.params.get('geo_verification_proxy') is None:

599

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

600

601

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

602

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

603

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

604

605

for msg in self.params.get('_warnings', []):

606

self.report_warning(msg)

607

for msg in self.params.get('_deprecation_warnings', []):

608

self.deprecation_warning(msg)

609

610

self.params['compat_opts'] = set(self.params.get('compat_opts', ()))

611

if not compat_has_legacy:

612

self.params['compat_opts'].add('no-compat-legacy')

613

if 'list-formats' in self.params['compat_opts']:

614

self.params['listformats_table'] = False

615

616

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

617

# nooverwrites was unnecessarily changed to overwrites

618

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

619

# This ensures compatibility with both keys

620

self.params['overwrites'] = not self.params['nooverwrites']

621

elif self.params.get('overwrites') is None:

622

self.params.pop('overwrites', None)

623

else:

624

self.params['nooverwrites'] = not self.params['overwrites']

625

626

self.params.setdefault('forceprint', {})

627

self.params.setdefault('print_to_file', {})

628

629

# Compatibility with older syntax

630

if not isinstance(params['forceprint'], dict):

631

self.params['forceprint'] = {'video': params['forceprint']}

632

633

if self.params.get('bidi_workaround', False):

634

try:

635

import pty

636

master, slave = pty.openpty()

637

width = compat_get_terminal_size().columns

638

width_args = [] if width is None else ['-w', str(width)]

639

sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}

640

try:

641

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

642

except OSError:

643

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

644

self._output_channel = os.fdopen(master, 'rb')

645

except OSError as ose:

646

if ose.errno == errno.ENOENT:

647

self.report_warning(

648

'Could not find fribidi executable, ignoring --bidi-workaround. '

649

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if auto_init:

if auto_init != 'no_verbose_header':

655

self.print_debug_header()

656

self.add_default_info_extractors()

657

658

if (sys.platform != 'win32'

659

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

660

and not self.params.get('restrictfilenames', False)):

661

# Unicode filesystem API will throw errors (#1474, #13027)

662

self.report_warning(

663

'Assuming --restrict-filenames since file system encoding '

664

'cannot encode all characters. '

665

'Set the LC_ALL environment variable to fix this.')

666

self.params['restrictfilenames'] = True

667

668

self.outtmpl_dict = self.parse_outtmpl()

669

670

# Creating format selector here allows us to catch syntax errors before the extraction

671

self.format_selector = (

672

self.params.get('format') if self.params.get('format') in (None, '-')

673

else self.params['format'] if callable(self.params['format'])

674

else self.build_format_selector(self.params['format']))

675

676

# Set http_headers defaults according to std_headers

677

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

678

679

hooks = {

680

'post_hooks': self.add_post_hook,

681

'progress_hooks': self.add_progress_hook,

682

'postprocessor_hooks': self.add_postprocessor_hook,

683

}

684

for opt, fn in hooks.items():

685

for ph in self.params.get(opt, []):

686

fn(ph)

687

688

for pp_def_raw in self.params.get('postprocessors', []):

689

pp_def = dict(pp_def_raw)

690

when = pp_def.pop('when', 'post_process')

691

self.add_post_processor(

692

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

register_socks_protocols()

697

698

def preload_download_archive(fn):

699

"""Preload the archive, if any is specified"""

700

if fn is None:

701

return False

702

self.write_debug(f'Loading archive file {fn!r}')

703

try:

704

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

705

for line in archive_file:

706

self.archive.add(line.strip())

707

except OSError as ioe:

708

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

715

716

def warn_if_short_id(self, argv):

717

# short YouTube ID starting with dash?

718

idxs = [

719

i for i, a in enumerate(argv)

720

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

725

+ ['--'] + [argv[i] for i in idxs]

726

)

727

self.report_warning(

728

'Long argument string detected. '

729

'Use -- to separate parameters and URLs, like this:\n%s' %

730

args_to_str(correct_argv))

731

732

def add_info_extractor(self, ie):

733

"""Add an InfoExtractor object to the end of the list."""

734

ie_key = ie.ie_key()

735

self._ies[ie_key] = ie

736

if not isinstance(ie, type):

737

self._ies_instances[ie_key] = ie

738

ie.set_downloader(self)

739

740

def _get_info_extractor_class(self, ie_key):

741

ie = self._ies.get(ie_key)

742

if ie is None:

743

ie = get_info_extractor(ie_key)

744

self.add_info_extractor(ie)

745

return ie

746

747

def get_info_extractor(self, ie_key):

748

"""

749

Get an instance of an IE with name ie_key, it will try to get one from

750

the _ies list, if there's no instance it will create a new one and add

751

it to the extractor list.

752

"""

753

ie = self._ies_instances.get(ie_key)

754

if ie is None:

755

ie = get_info_extractor(ie_key)()

756

self.add_info_extractor(ie)

757

return ie

758

759

def add_default_info_extractors(self):

760

"""

761

Add the InfoExtractors returned by gen_extractors to the end of the list

762

"""

763

for ie in gen_extractor_classes():

764

self.add_info_extractor(ie)

765

766

def add_post_processor(self, pp, when='post_process'):

767

"""Add a PostProcessor object to the end of the chain."""

768

self._pps[when].append(pp)

769

pp.set_downloader(self)

770

771

def add_post_hook(self, ph):

772

"""Add the post hook"""

773

self._post_hooks.append(ph)

774

775

def add_progress_hook(self, ph):

776

"""Add the download progress hook"""

777

self._progress_hooks.append(ph)

778

779

def add_postprocessor_hook(self, ph):

780

"""Add the postprocessing progress hook"""

781

self._postprocessor_hooks.append(ph)

782

for pps in self._pps.values():

783

for pp in pps:

784

pp.add_progress_hook(ph)

785

786

def _bidi_workaround(self, message):

787

if not hasattr(self, '_output_channel'):

788

return message

789

790

assert hasattr(self, '_output_process')

791

assert isinstance(message, compat_str)

792

line_count = message.count('\n') + 1

793

self._output_process.stdin.write((message + '\n').encode())

794

self._output_process.stdin.flush()

795

res = ''.join(self._output_channel.readline().decode()

796

for _ in range(line_count))

797

return res[:-len('\n')]

798

799

def _write_string(self, message, out=None, only_once=False):

800

if only_once:

801

if message in self._printed_messages:

802

return

803

self._printed_messages.add(message)

804

write_string(message, out=out, encoding=self.params.get('encoding'))

805

806

def to_stdout(self, message, skip_eol=False, quiet=None):

807

"""Print message to stdout"""

808

if quiet is not None:

809

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')

810

if skip_eol is not False:

811

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')

812

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)

813

814

def to_screen(self, message, skip_eol=False, quiet=None):

815

"""Print message to screen if not in quiet mode"""

816

if self.params.get('logger'):

817

self.params['logger'].debug(message)

818

return

819

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

820

return

821

self._write_string(

822

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

823

self._out_files.screen)

824

825

def to_stderr(self, message, only_once=False):

826

"""Print message to stderr"""

827

assert isinstance(message, compat_str)

828

if self.params.get('logger'):

829

self.params['logger'].error(message)

830

else:

831

self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)

832

833

def _send_console_code(self, code):

834

if compat_os_name == 'nt' or not self._out_files.console:

835

return

836

self._write_string(code, self._out_files.console)

837

838

def to_console_title(self, message):

839

if not self.params.get('consoletitle', False):

840

return

841

message = remove_terminal_sequences(message)

842

if compat_os_name == 'nt':

843

if ctypes.windll.kernel32.GetConsoleWindow():

844

# c_wchar_p() might not be necessary if `message` is

845

# already of type unicode()

846

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

847

else:

848

self._send_console_code(f'\033]0;{message}\007')

849

850

def save_console_title(self):

851

if not self.params.get('consoletitle') or self.params.get('simulate'):

852

return

853

self._send_console_code('\033[22;0t') # Save the title on stack

854

855

def restore_console_title(self):

856

if not self.params.get('consoletitle') or self.params.get('simulate'):

857

return

858

self._send_console_code('\033[23;0t') # Restore the title from stack

859

860

def __enter__(self):

861

self.save_console_title()

862

return self

863

864

def __exit__(self, *args):

865

self.restore_console_title()

866

867

if self.params.get('cookiefile') is not None:

868

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

869

870

def trouble(self, message=None, tb=None, is_error=True):

871

"""Determine action to take when a download problem appears.

872

873

Depending on if the downloader has been configured to ignore

874

download errors or not, this method may throw an exception or

875

not when errors are found, after printing the message.

876

877

@param tb If given, is additional traceback information

878

@param is_error Whether to raise error according to ignorerrors

879

"""

880

if message is not None:

881

self.to_stderr(message)

882

if self.params.get('verbose'):

883

if tb is None:

884

if sys.exc_info()[0]: # if .trouble has been called from an except block

885

tb = ''

886

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

887

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

888

tb += encode_compat_str(traceback.format_exc())

889

else:

890

tb_data = traceback.format_list(traceback.extract_stack())

891

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

897

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

898

exc_info = sys.exc_info()[1].exc_info

899

else:

900

exc_info = sys.exc_info()

901

raise DownloadError(message, exc_info)

902

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

WARNING='yellow',

SUPPRESS='light black',

913

)

914

915

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

920

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

921

text = text.encode(encoding, 'ignore').decode(encoding)

922

if fallback is not None and text != original_text:

923

text = fallback

924

return format_text(text, f) if allow_colors else text if fallback is None else fallback

925

926

def _format_out(self, *args, **kwargs):

927

return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)

928

929

def _format_screen(self, *args, **kwargs):

930

return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)

931

932

def _format_err(self, *args, **kwargs):

933

return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)

934

935

def report_warning(self, message, only_once=False):

936

'''

937

Print the message to stderr, it will be prefixed with 'WARNING:'

938

If stderr is a tty file the 'WARNING:' will be colored

939

'''

940

if self.params.get('logger') is not None:

941

self.params['logger'].warning(message)

942

else:

943

if self.params.get('no_warnings'):

944

return

945

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

946

947

def deprecation_warning(self, message):

948

if self.params.get('logger') is not None:

949

self.params['logger'].warning(f'DeprecationWarning: {message}')

950

else:

951

self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)

952

953

def report_error(self, message, *args, **kwargs):

954

'''

955

Do the same as trouble, but prefixes the message with 'ERROR:', colored

956

in red if stderr is a tty file.

957

'''

958

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

959

960

def write_debug(self, message, only_once=False):

961

'''Log debug message or Print message to stderr'''

962

if not self.params.get('verbose', False):

963

return

964

message = f'[debug] {message}'

965

if self.params.get('logger'):

966

self.params['logger'].debug(message)

967

else:

968

self.to_stderr(message, only_once)

969

970

def report_file_already_downloaded(self, file_name):

971

"""Report file has already been fully downloaded."""

972

try:

973

self.to_screen('[download] %s has already been downloaded' % file_name)

974

except UnicodeEncodeError:

975

self.to_screen('[download] The file has already been downloaded')

976

977

def report_file_delete(self, file_name):

978

"""Report that existing file will be deleted."""

979

try:

980

self.to_screen('Deleting existing file %s' % file_name)

981

except UnicodeEncodeError:

982

self.to_screen('Deleting existing file')

983

984

def raise_no_formats(self, info, forced=False, *, msg=None):

985

has_drm = info.get('_has_drm')

986

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

987

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

988

if forced or not ignored:

989

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

990

expected=has_drm or ignored or expected)

991

else:

992

self.report_warning(msg)

993

994

def parse_outtmpl(self):

995

outtmpl_dict = self.params.get('outtmpl', {})

996

if not isinstance(outtmpl_dict, dict):

997

outtmpl_dict = {'default': outtmpl_dict}

998

# Remove spaces in the default template

999

if self.params.get('restrictfilenames'):

1000

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

1001

else:

1002

sanitize = lambda x: x

1003

outtmpl_dict.update({

1004

k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()

1005

if outtmpl_dict.get(k) is None})

1006

for _, val in outtmpl_dict.items():

1007

if isinstance(val, bytes):

1008

self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')

1009

return outtmpl_dict

1010

1011

def get_output_path(self, dir_type='', filename=None):

1012

paths = self.params.get('paths', {})

1013

assert isinstance(paths, dict)

1014

path = os.path.join(

1015

expand_path(paths.get('home', '').strip()),

1016

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1017

filename or '')

1018

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1019

1020

@staticmethod

1021

def _outtmpl_expandpath(outtmpl):

1022

# expand_path translates '%%' into '%' and '$$' into '$'

1023

# correspondingly that is not what we want since we need to keep

1024

# '%%' intact for template dict substitution step. Working around

1025

# with boundary-alike separator hack.

1026

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

1027

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1028

1029

# outtmpl should be expand_path'ed before template dict substitution

1030

# because meta fields may contain env variables we don't want to

1031

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

1032

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1033

return expand_path(outtmpl).replace(sep, '')

1034

1035

@staticmethod

1036

def escape_outtmpl(outtmpl):

1037

''' Escape any remaining strings like %s, %abc% etc. '''

1038

return re.sub(

1039

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1040

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1045

''' @return None or Exception object '''

1046

outtmpl = re.sub(

1047

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),

1048

lambda mobj: f'{mobj.group(0)[:-1]}s',

1049

cls._outtmpl_expandpath(outtmpl))

1050

try:

1051

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1052

return None

1053

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1058

info_dict = dict(info_dict)

1059

info_dict.pop('__postprocessors', None)

1060

info_dict.pop('__pending_error', None)

1061

return info_dict

1062

1063

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1064

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1065

@param sanitize Whether to sanitize the output as a filename.

1066

For backward compatibility, a function can also be passed

1067

"""

1068

1069

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1070

1071

info_dict = self._copy_infodict(info_dict)

1072

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1073

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1074

if info_dict.get('duration', None) is not None

1075

else None)

1076

info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)

1077

info_dict['video_autonumber'] = self._num_videos

1078

if info_dict.get('resolution') is None:

1079

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1080

1081

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1082

# of %(field)s to %(field)0Nd for backward compatibility

1083

field_size_compat_map = {

1084

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1085

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1086

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1096

# where keys (except first) can be string, int or slice

1097

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

1098

MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'

1099

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1100

INTERNAL_FORMAT_RE = re.compile(rf'''(?x)

1101

(?P<negate>-)?

1102

(?P<fields>{FIELD_RE})

1103

(?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)

1104

(?:>(?P<strf_format>.+?))?

1105

(?P<remaining>

1106

(?P<alternate>(?<!\\),[^|&)]+)?

1107

(?:&(?P<replacement>.*?))?

1108

(?:\|(?P<default>.*?))?

1109

)$''')

1110

1111

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

1116

1117

def get_value(mdict):

1118

# Object traversal

1119

value = _traverse_infodict(mdict['fields'])

1120

# Negative

1121

if mdict['negate']:

1122

value = float_or_none(value)

1123

if value is not None:

1124

value *= -1

1125

# Do maths

1126

offset_key = mdict['maths']

1127

if offset_key:

1128

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1133

offset_key).group(0)

1134

offset_key = offset_key[len(item):]

1135

if operator is None:

1136

operator = MATH_FUNCTIONS[item]

1137

continue

1138

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1139

offset = float_or_none(item)

1140

if offset is None:

1141

offset = float_or_none(_traverse_infodict(item))

1142

try:

1143

value = operator(value, multiplier * offset)

1144

except (TypeError, ZeroDivisionError):

1145

return None

1146

operator = None

1147

# Datetime formatting

1148

if mdict['strf_format']:

1149

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1154

1155

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1156

return sanitize_filename(str(value), restricted=restricted, is_id=(

1157

bool(re.search(r'(^|[_.])id(\.|$)', key))

1158

if 'filename-sanitization' in self.params['compat_opts']

1159

else NO_DEFAULT))

1160

1161

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1162

sanitize = bool(sanitize)

1163

1164

def _dumpjson_default(obj):

1165

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1170

if not outer_mobj.group('has_key'):

1171

return outer_mobj.group(0)

1172

key = outer_mobj.group('key')

1173

mobj = re.match(INTERNAL_FORMAT_RE, key)

1174

initial_field = mobj.group('fields') if mobj else ''

1175

value, replacement, default = None, None, na

1176

while mobj:

1177

mobj = mobj.groupdict()

1178

default = mobj['default'] if mobj['default'] is not None else default

1179

value = get_value(mobj)

1180

replacement = mobj['replacement']

1181

if value is None and mobj['alternate']:

1182

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1187

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1188

fmt = f'0{field_size_compat_map[key]:d}d'

1189

1190

value = default if value is None else value if replacement is None else replacement

1191

1192

flags = outer_mobj.group('conversion') or ''

1193

str_fmt = f'{fmt[:-1]}s'

1194

if fmt[-1] == 'l': # list

1195

delim = '\n' if '#' in flags else ', '

1196

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1197

elif fmt[-1] == 'j': # json

1198

value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt

1199

elif fmt[-1] == 'q': # quoted

1200

value = map(str, variadic(value) if '#' in flags else [value])

1201

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1202

elif fmt[-1] == 'B': # bytes

1203

value = f'%{str_fmt}'.encode() % str(value).encode()

1204

value, fmt = value.decode('utf-8', 'ignore'), 's'

1205

elif fmt[-1] == 'U': # unicode normalized

1206

value, fmt = unicodedata.normalize(

1207

# "+" = compatibility equivalence, "#" = NFD

1208

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1209

value), str_fmt

1210

elif fmt[-1] == 'D': # decimal suffix

1211

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1212

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1213

factor=1024 if '#' in flags else 1000)

1214

elif fmt[-1] == 'S': # filename sanitization

1215

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1216

elif fmt[-1] == 'c':

1217

if value:

1218

value = str(value)[0]

1219

else:

1220

fmt = str_fmt

1221

elif fmt[-1] not in 'rs': # numeric

1222

value = float_or_none(value)

1223

if value is None:

1224

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1229

# So we convert it to repr first

1230

value, fmt = repr(value), str_fmt

1231

if fmt[-1] in 'csr':

1232

value = sanitizer(initial_field, value)

1233

1234

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1235

TMPL_DICT[key] = value

1236

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1237

1238

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1239

1240

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1241

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1242

return self.escape_outtmpl(outtmpl) % info_dict

1243

1244

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1245

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1246

if outtmpl is None:

1247

outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])

1248

try:

1249

outtmpl = self._outtmpl_expandpath(outtmpl)

1250

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1255

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1256

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1257

filename = replace_extension(filename, ext, final_ext)

1258

elif tmpl_type:

1259

force_ext = OUTTMPL_TYPES[tmpl_type]

1260

if force_ext:

1261

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1262

1263

# https://github.com/blackjack4494/youtube-dlc/issues/85

1264

trim_file_name = self.params.get('trim_file_name', False)

1265

if trim_file_name:

1266

no_ext, *ext = filename.rsplit('.', 2)

1267

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1268

1269

return filename

1270

except ValueError as err:

1271

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1272

return None

1273

1274

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1275

"""Generate the output filename"""

1276

if outtmpl:

1277

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1278

dir_type = None

1279

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1280

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1285

pass

1286

elif filename == '-':

1287

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1288

elif os.path.isabs(filename):

1289

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1290

if filename == '-' or not filename:

1291

return filename

1292

1293

return self.get_output_path(dir_type, filename)

1294

1295

def _match_entry(self, info_dict, incomplete=False, silent=False):

1296

""" Returns None if the file should be downloaded """

1297

1298

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1299

1300

def check_filter():

1301

if 'title' in info_dict:

1302

# This can happen when we're just evaluating the playlist

1303

title = info_dict['title']

1304

matchtitle = self.params.get('matchtitle', False)

1305

if matchtitle:

1306

if not re.search(matchtitle, title, re.IGNORECASE):

1307

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1308

rejecttitle = self.params.get('rejecttitle', False)

1309

if rejecttitle:

1310

if re.search(rejecttitle, title, re.IGNORECASE):

1311

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1312

date = info_dict.get('upload_date')

1313

if date is not None:

1314

dateRange = self.params.get('daterange', DateRange())

1315

if date not in dateRange:

1316

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1317

view_count = info_dict.get('view_count')

1318

if view_count is not None:

1319

min_views = self.params.get('min_views')

1320

if min_views is not None and view_count < min_views:

1321

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1322

max_views = self.params.get('max_views')

1323

if max_views is not None and view_count > max_views:

1324

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1325

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1326

return 'Skipping "%s" because it is age restricted' % video_title

1327

1328

match_filter = self.params.get('match_filter')

1329

if match_filter is not None:

1330

try:

1331

ret = match_filter(info_dict, incomplete=incomplete)

1332

except TypeError:

1333

# For backward compatibility

1334

ret = None if incomplete else match_filter(info_dict)

1335

if ret is NO_DEFAULT:

1336

while True:

1337

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1338

reply = input(self._format_screen(

1339

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1340

if reply in {'y', ''}:

1341

return None

1342

elif reply == 'n':

1343

return f'Skipping {video_title}'

1344

elif ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1349

reason = '%s has already been recorded in the archive' % video_title

1350

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1351

else:

1352

reason = check_filter()

1353

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1354

if reason is not None:

1355

if not silent:

1356

self.to_screen('[download] ' + reason)

1357

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1363

'''Set the keys from extra_info in info dict if they are missing'''

1364

for key, value in extra_info.items():

1365

info_dict.setdefault(key, value)

1366

1367

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1368

process=True, force_generic_extractor=False):

1369

"""

1370

Return a list with a dictionary for each video extracted.

1371

1372

Arguments:

1373

url -- URL to extract

1374

1375

Keyword arguments:

1376

download -- whether to download videos during extraction

1377

ie_key -- extractor key hint

1378

extra_info -- dictionary containing the extra values to add to each result

1379

process -- whether to resolve all unresolved references (URLs, playlist items),

1380

must be True for download to work.

1381

force_generic_extractor -- force using the generic extractor

1382

"""

1383

1384

if extra_info is None:

1385

extra_info = {}

1386

1387

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1396

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1401

'and will probably not work.')

1402

1403

temp_id = ie.get_temp_id(url)

1404

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1405

self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')

1406

if self.params.get('break_on_existing', False):

1407

raise ExistingVideoReached()

1408

break

1409

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1410

else:

1411

self.report_error('no suitable InfoExtractor for URL %s' % url)

1412

1413

def _handle_extraction_exceptions(func):

1414

@functools.wraps(func)

1415

def wrapper(self, *args, **kwargs):

1416

while True:

1417

try:

1418

return func(self, *args, **kwargs)

1419

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1420

raise

1421

except ReExtractInfo as e:

1422

if e.expected:

1423

self.to_screen(f'{e}; Re-extracting data')

1424

else:

1425

self.to_stderr('\r')

1426

self.report_warning(f'{e}; Re-extracting data')

1427

continue

1428

except GeoRestrictedError as e:

1429

msg = e.msg

1430

if e.countries:

1431

msg += '\nThis video is available in %s.' % ', '.join(

1432

map(ISO3166Utils.short2full, e.countries))

1433

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1434

self.report_error(msg)

1435

except ExtractorError as e: # An error we somewhat expected

1436

self.report_error(str(e), e.format_traceback())

1437

except Exception as e:

1438

if self.params.get('ignoreerrors'):

1439

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result):

1446

if (not self.params.get('wait_for_video')

1447

or ie_result.get('_type', 'video') != 'video'

1448

or ie_result.get('formats') or ie_result.get('url')):

1449

return

1450

1451

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)

1457

last_msg = msg

1458

1459

min_wait, max_wait = self.params.get('wait_for_video')

1460

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1461

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1462

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1463

self.report_warning('Release time of video is not known')

1464

elif (diff or 0) <= 0:

1465

self.report_warning('Video should already be available according to extracted info')

1466

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1467

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1468

1469

wait_till = time.time() + diff

1470

try:

1471

while True:

1472

diff = wait_till - time.time()

1473

if diff <= 0:

1474

progress('')

1475

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1476

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1477

time.sleep(1)

1478

except KeyboardInterrupt:

1479

progress('')

1480

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1481

except BaseException as e:

1482

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@_handle_extraction_exceptions

1487

def __extract_info(self, url, ie, download, extra_info, process):

1488

ie_result = ie.extract(url)

1489

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1490

return

1491

if isinstance(ie_result, list):

1492

# Backwards compatibility: old IE result format

1493

ie_result = {

1494

'_type': 'compat_list',

1495

'entries': ie_result,

1496

}

1497

if extra_info.get('original_url'):

1498

ie_result.setdefault('original_url', extra_info['original_url'])

1499

self.add_default_extra_info(ie_result, ie, url)

1500

if process:

1501

self._wait_for_video(ie_result)

1502

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1507

if url is not None:

1508

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1513

if webpage_url:

1514

self.add_extra_info(ie_result, {

1515

'webpage_url_basename': url_basename(webpage_url),

1516

'webpage_url_domain': get_domain(webpage_url),

1517

})

1518

if ie is not None:

1519

self.add_extra_info(ie_result, {

1520

'extractor': ie.IE_NAME,

1521

'extractor_key': ie.ie_key(),

1522

})

1523

1524

def process_ie_result(self, ie_result, download=True, extra_info=None):

1525

"""

1526

Take the result of the ie(may be modified) and resolve all unresolved

1527

references (URLs, playlist items).

1528

1529

It will also download the videos if 'download'.

1530

Returns the resolved ie_result.

1531

"""

1532

if extra_info is None:

1533

extra_info = {}

1534

result_type = ie_result.get('_type', 'video')

1535

1536

if result_type in ('url', 'url_transparent'):

1537

ie_result['url'] = sanitize_url(ie_result['url'])

1538

if ie_result.get('original_url'):

1539

extra_info.setdefault('original_url', ie_result['original_url'])

1540

1541

extract_flat = self.params.get('extract_flat', False)

1542

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1543

or extract_flat is True):

1544

info_copy = ie_result.copy()

1545

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1546

if ie and not ie_result.get('id'):

1547

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1548

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1549

self.add_extra_info(info_copy, extra_info)

1550

info_copy, _ = self.pre_process(info_copy)

1551

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1552

self._raise_pending_errors(info_copy)

1553

if self.params.get('force_write_download_archive', False):

1554

self.record_download_archive(info_copy)

1555

return ie_result

1556

1557

if result_type == 'video':

1558

self.add_extra_info(ie_result, extra_info)

1559

ie_result = self.process_video_result(ie_result, download=download)

1560

self._raise_pending_errors(ie_result)

1561

additional_urls = (ie_result or {}).get('additional_urls')

1562

if additional_urls:

1563

# TODO: Improve MetadataParserPP to allow setting a list

1564

if isinstance(additional_urls, compat_str):

1565

additional_urls = [additional_urls]

1566

self.to_screen(

1567

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1568

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1569

ie_result['additional_entries'] = [

1570

self.extract_info(

1571

url, download, extra_info=extra_info,

1572

force_generic_extractor=self.params.get('force_generic_extractor'))

1573

for url in additional_urls

1574

]

1575

return ie_result

1576

elif result_type == 'url':

1577

# We have to add extra_info to the results because it may be

1578

# contained in a playlist

1579

return self.extract_info(

1580

ie_result['url'], download,

1581

ie_key=ie_result.get('ie_key'),

1582

extra_info=extra_info)

1583

elif result_type == 'url_transparent':

1584

# Use the information from the embedding page

1585

info = self.extract_info(

1586

ie_result['url'], ie_key=ie_result.get('ie_key'),

1587

extra_info=extra_info, download=False, process=False)

1588

1589

# extract_info may return None when ignoreerrors is enabled and

1590

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

new_result = info.copy()

1596

new_result.update(filter_dict(ie_result, lambda k, v: (

1597

v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))

1598

1599

# Extracted info may not be a video result (i.e.

1600

# info.get('_type', 'video') != video) but rather an url or

1601

# url_transparent. In such cases outer metadata (from ie_result)

1602

# should be propagated to inner one (info). For this to happen

1603

# _type of info should be overridden with url_transparent. This

1604

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1605

if new_result.get('_type') == 'url':

1606

new_result['_type'] = 'url_transparent'

1607

1608

return self.process_ie_result(

1609

new_result, download=download, extra_info=extra_info)

1610

elif result_type in ('playlist', 'multi_video'):

1611

# Protect from infinite recursion due to recursively nested playlists

1612

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1613

webpage_url = ie_result['webpage_url']

1614

if webpage_url in self._playlist_urls:

1615

self.to_screen(

1616

'[download] Skipping already downloaded playlist: %s'

1617

% ie_result.get('title') or ie_result.get('id'))

1618

return

1619

1620

self._playlist_level += 1

1621

self._playlist_urls.add(webpage_url)

1622

self._fill_common_fields(ie_result, False)

1623

self._sanitize_thumbnails(ie_result)

1624

try:

1625

return self.__process_playlist(ie_result, download)

1626

finally:

1627

self._playlist_level -= 1

1628

if not self._playlist_level:

1629

self._playlist_urls.clear()

1630

elif result_type == 'compat_list':

1631

self.report_warning(

1632

'Extractor %s returned a compat_list result. '

1633

'It needs to be updated.' % ie_result.get('extractor'))

1634

1635

def _fixup(r):

1636

self.add_extra_info(r, {

1637

'extractor': ie_result['extractor'],

1638

'webpage_url': ie_result['webpage_url'],

1639

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1640

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1641

'extractor_key': ie_result['extractor_key'],

1642

})

1643

return r

1644

ie_result['entries'] = [

1645

self.process_ie_result(_fixup(r), download, extra_info)

1646

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1651

1652

def _ensure_dir_exists(self, path):

1653

return make_dir(path, self.report_error)

1654

1655

@staticmethod

1656

def _playlist_infodict(ie_result, **kwargs):

1657

return {

1658

**ie_result,

1659

'playlist': ie_result.get('title') or ie_result.get('id'),

1660

'playlist_id': ie_result.get('id'),

1661

'playlist_title': ie_result.get('title'),

1662

'playlist_uploader': ie_result.get('uploader'),

1663

'playlist_uploader_id': ie_result.get('uploader_id'),

'playlist_index': 0,

**kwargs,

}

def __process_playlist(self, ie_result, download):

1669

"""Process each entry in the playlist"""

1670

title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'

1671

self.to_screen(f'[download] Downloading playlist: {title}')

1672

1673

all_entries = PlaylistEntries(self, ie_result)

1674

entries = orderedSet(all_entries.get_requested_items())

1675

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])

1676

n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count

1677

1678

_infojson_written = False

1679

write_playlist_files = self.params.get('allow_playlist_files', True)

1680

if write_playlist_files and self.params.get('list_thumbnails'):

1681

self.list_thumbnails(ie_result)

1682

if write_playlist_files and not self.params.get('simulate'):

1683

ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)

1684

_infojson_written = self._write_info_json(

1685

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1686

if _infojson_written is None:

1687

return

1688

if self._write_description('playlist', ie_result,

1689

self.prepare_filename(ie_copy, 'pl_description')) is None:

1690

return

1691

# TODO: This should be passed to ThumbnailsConvertor if necessary

1692

self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1693

1694

if self.params.get('playlistreverse', False):

1695

entries = entries[::-1]

1696

if self.params.get('playlistrandom', False):

1697

random.shuffle(entries)

1698

1699

self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'

1700

f'{format_field(ie_result, "playlist_count", " of %s")}')

1701

1702

failures = 0

1703

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1704

for i, (playlist_index, entry) in enumerate(entries, 1):

1705

# TODO: Add auto-generated fields

1706

if self._match_entry(entry, incomplete=True) is not None:

1707

continue

1708

1709

if 'playlist-index' in self.params.get('compat_opts', []):

1710

playlist_index = ie_result['requested_entries'][i - 1]

1711

self.to_screen('[download] Downloading video %s of %s' % (

1712

self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1713

1714

entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')

1715

entry_result = self.__process_iterable_entry(entry, download, {

1716

'n_entries': n_entries,

1717

'__last_playlist_index': max(ie_result['requested_entries']),

1718

'playlist_count': ie_result.get('playlist_count'),

1719

'playlist_index': playlist_index,

1720

'playlist_autonumber': i,

1721

'playlist': title,

1722

'playlist_id': ie_result.get('id'),

1723

'playlist_title': ie_result.get('title'),

1724

'playlist_uploader': ie_result.get('uploader'),

1725

'playlist_uploader_id': ie_result.get('uploader_id'),

1726

'extractor': ie_result['extractor'],

1727

'webpage_url': ie_result['webpage_url'],

1728

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1729

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1730

'extractor_key': ie_result['extractor_key'],

})

if not entry_result:

failures += 1

if failures >= max_failures:

1735

self.report_error(

1736

f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')

1737

break

1738

entries[i - 1] = (playlist_index, entry_result)

1739

1740

# Update with processed data

1741

ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])

1742

1743

# Write the updated info to json

1744

if _infojson_written is True and self._write_info_json(

1745

'updated playlist', ie_result,

1746

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1747

return

1748

1749

ie_result = self.run_all_pps('playlist', ie_result)

1750

self.to_screen(f'[download] Finished downloading playlist: {title}')

1751

return ie_result

1752

1753

@_handle_extraction_exceptions

1754

def __process_iterable_entry(self, entry, download, extra_info):

1755

return self.process_ie_result(

1756

entry, download=download, extra_info=extra_info)

1757

1758

def _build_format_filter(self, filter_spec):

1759

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1770

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1771

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1772

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1773

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1774

m = operator_rex.fullmatch(filter_spec)

1775

if m:

1776

try:

1777

comparison_value = int(m.group('value'))

1778

except ValueError:

1779

comparison_value = parse_filesize(m.group('value'))

1780

if comparison_value is None:

1781

comparison_value = parse_filesize(m.group('value') + 'B')

1782

if comparison_value is None:

1783

raise ValueError(

1784

'Invalid value %r in format specification %r' % (

1785

m.group('value'), filter_spec))

1786

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1792

'$=': lambda attr, value: attr.endswith(value),

1793

'*=': lambda attr, value: value in attr,

1794

'~=': lambda attr, value: value.search(attr) is not None

1795

}

1796

str_operator_rex = re.compile(r'''(?x)\s*

1797

(?P<key>[a-zA-Z0-9._-]+)\s*

1798

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1799

(?P<quote>["'])?

1800

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1801

(?(quote)(?P=quote))\s*

1802

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1803

m = str_operator_rex.fullmatch(filter_spec)

1804

if m:

1805

if m.group('op') == '~=':

1806

comparison_value = re.compile(m.group('value'))

1807

else:

1808

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1809

str_op = STR_OPERATORS[m.group('op')]

1810

if m.group('negation'):

1811

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1817

1818

def _filter(f):

1819

actual_value = f.get(m.group('key'))

1820

if actual_value is None:

1821

return m.group('none_inclusive')

1822

return op(actual_value, comparison_value)

1823

return _filter

1824

1825

def _check_formats(self, formats):

1826

for f in formats:

1827

self.to_screen('[info] Testing format %s' % f['format_id'])

1828

path = self.get_output_path('temp')

1829

if not self._ensure_dir_exists(f'{path}/'):

1830

continue

1831

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1832

temp_file.close()

1833

try:

1834

success, _ = self.dl(temp_file.name, f, test=True)

1835

except (DownloadError, OSError, ValueError) + network_exceptions:

1836

success = False

1837

finally:

1838

if os.path.exists(temp_file.name):

1839

try:

1840

os.remove(temp_file.name)

1841

except OSError:

1842

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1847

1848

def _default_format_spec(self, info_dict, download=True):

1849

1850

def can_merge():

1851

merger = FFmpegMergerPP(self)

1852

return merger.available and merger.can_merge()

1853

1854

prefer_best = (

1855

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live') and not self.params.get('live_from_start')

1860

or self.outtmpl_dict['default'] == '-'))

1861

compat = (

1862

prefer_best

1863

or self.params.get('allow_multiple_audio_streams', False)

1864

or 'format-spec' in self.params['compat_opts'])

1865

1866

return (

1867

'best/bestvideo+bestaudio' if prefer_best

1868

else 'bestvideo*+bestaudio/best' if not compat

1869

else 'bestvideo+bestaudio/best')

1870

1871

def build_format_selector(self, format_spec):

1872

def syntax_error(note, start):

1873

message = (

1874

'Invalid format specification: '

1875

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

1876

return SyntaxError(message)

1877

1878

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1883

1884

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1885

'video': self.params.get('allow_multiple_video_streams', False)}

1886

1887

check_formats = self.params.get('check_formats') == 'selected'

1888

1889

def _parse_filter(tokens):

1890

filter_parts = []

1891

for type, string, start, _, _ in tokens:

1892

if type == tokenize.OP and string == ']':

1893

return ''.join(filter_parts)

1894

else:

1895

filter_parts.append(string)

1896

1897

def _remove_unused_ops(tokens):

1898

# Remove operators that we don't use and join them with the surrounding strings

1899

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1900

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1901

last_string, last_start, last_end, last_line = None, None, None, None

1902

for type, string, start, end, line in tokens:

1903

if type == tokenize.OP and string == '[':

1904

if last_string:

1905

yield tokenize.NAME, last_string, last_start, last_end, last_line

1906

last_string = None

1907

yield type, string, start, end, line

1908

# everything inside brackets will be handled by _parse_filter

1909

for type, string, start, end, line in tokens:

1910

yield type, string, start, end, line

1911

if type == tokenize.OP and string == ']':

1912

break

1913

elif type == tokenize.OP and string in ALLOWED_OPS:

1914

if last_string:

1915

yield tokenize.NAME, last_string, last_start, last_end, last_line

1916

last_string = None

1917

yield type, string, start, end, line

1918

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1925

if last_string:

1926

yield tokenize.NAME, last_string, last_start, last_end, last_line

1927

1928

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1929

selectors = []

1930

current_selector = None

1931

for type, string, start, _, _ in tokens:

1932

# ENCODING is only defined in python 3.x

1933

if type == getattr(tokenize, 'ENCODING', None):

1934

continue

1935

elif type in [tokenize.NAME, tokenize.NUMBER]:

1936

current_selector = FormatSelector(SINGLE, string, [])

1937

elif type == tokenize.OP:

1938

if string == ')':

1939

if not inside_group:

1940

# ')' will be handled by the parentheses group

1941

tokens.restore_last_token()

1942

break

1943

elif inside_merge and string in ['/', ',']:

1944

tokens.restore_last_token()

1945

break

1946

elif inside_choice and string == ',':

1947

tokens.restore_last_token()

1948

break

1949

elif string == ',':

1950

if not current_selector:

1951

raise syntax_error('"," must follow a format selector', start)

1952

selectors.append(current_selector)

1953

current_selector = None

1954

elif string == '/':

1955

if not current_selector:

1956

raise syntax_error('"/" must follow a format selector', start)

1957

first_choice = current_selector

1958

second_choice = _parse_format_selection(tokens, inside_choice=True)

1959

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1960

elif string == '[':

1961

if not current_selector:

1962

current_selector = FormatSelector(SINGLE, 'best', [])

1963

format_filter = _parse_filter(tokens)

1964

current_selector.filters.append(format_filter)

1965

elif string == '(':

1966

if current_selector:

1967

raise syntax_error('Unexpected "("', start)

1968

group = _parse_format_selection(tokens, inside_group=True)

1969

current_selector = FormatSelector(GROUP, group, [])

1970

elif string == '+':

1971

if not current_selector:

1972

raise syntax_error('Unexpected "+"', start)

1973

selector_1 = current_selector

1974

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1975

if not selector_2:

1976

raise syntax_error('Expected a selector', start)

1977

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1978

else:

1979

raise syntax_error(f'Operator not recognized: "{string}"', start)

1980

elif type == tokenize.ENDMARKER:

1981

break

1982

if current_selector:

1983

selectors.append(current_selector)

1984

return selectors

1985

1986

def _merge(formats_pair):

1987

format_1, format_2 = formats_pair

1988

1989

formats_info = []

1990

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1991

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1992

1993

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1994

get_no_more = {'video': False, 'audio': False}

1995

for (i, fmt_info) in enumerate(formats_info):

1996

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

1997

formats_info.pop(i)

1998

continue

1999

for aud_vid in ['audio', 'video']:

2000

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2001

if get_no_more[aud_vid]:

2002

formats_info.pop(i)

2003

break

2004

get_no_more[aud_vid] = True

2005

2006

if len(formats_info) == 1:

2007

return formats_info[0]

2008

2009

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2010

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2011

2012

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2013

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2014

2015

output_ext = self.params.get('merge_output_format')

2016

if not output_ext:

2017

if the_only_video:

2018

output_ext = the_only_video['ext']

2019

elif the_only_audio and not video_fmts:

2020

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2025

2026

new_dict = {

2027

'requested_formats': formats_info,

2028

'format': '+'.join(filtered('format')),

2029

'format_id': '+'.join(filtered('format_id')),

2030

'ext': output_ext,

2031

'protocol': '+'.join(map(determine_protocol, formats_info)),

2032

'language': '+'.join(orderedSet(filtered('language'))) or None,

2033

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2034

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2035

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2041

'height': the_only_video.get('height'),

2042

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2043

'fps': the_only_video.get('fps'),

2044

'dynamic_range': the_only_video.get('dynamic_range'),

2045

'vcodec': the_only_video.get('vcodec'),

2046

'vbr': the_only_video.get('vbr'),

2047

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2053

'abr': the_only_audio.get('abr'),

2054

'asr': the_only_audio.get('asr'),

})

return new_dict

def _check_formats(formats):

2060

if not check_formats:

2061

yield from formats

2062

return

2063

yield from self._check_formats(formats)

2064

2065

def _build_selector_function(selector):

2066

if isinstance(selector, list): # ,

2067

fs = [_build_selector_function(s) for s in selector]

2068

2069

def selector_function(ctx):

2070

for f in fs:

2071

yield from f(ctx)

2072

return selector_function

2073

2074

elif selector.type == GROUP: # ()

2075

selector_function = _build_selector_function(selector.selector)

2076

2077

elif selector.type == PICKFIRST: # /

2078

fs = [_build_selector_function(s) for s in selector.selector]

2079

2080

def selector_function(ctx):

2081

for f in fs:

2082

picked_formats = list(f(ctx))

2083

if picked_formats:

2084

return picked_formats

2085

return []

2086

2087

elif selector.type == MERGE: # +

2088

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2089

2090

def selector_function(ctx):

2091

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2092

yield _merge(pair)

2093

2094

elif selector.type == SINGLE: # atom

2095

format_spec = selector.selector or 'best'

2096

2097

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2098

if format_spec == 'all':

2099

def selector_function(ctx):

2100

yield from _check_formats(ctx['formats'][::-1])

2101

elif format_spec == 'mergeall':

2102

def selector_function(ctx):

2103

formats = list(_check_formats(

2104

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2105

if not formats:

2106

return

2107

merged_format = formats[-1]

2108

for f in formats[-2::-1]:

2109

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2119

format_reverse = mobj.group('bw')[0] == 'b'

2120

format_type = (mobj.group('type') or [None])[0]

2121

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2122

format_modified = mobj.group('mod') is not None

2123

2124

format_fallback = not format_type and not format_modified # for b, w

2125

_filter_f = (

2126

(lambda f: f.get('%scodec' % format_type) != 'none')

2127

if format_type and format_modified # bv*, ba*, wv*, wa*

2128

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2129

if format_type # bv, ba, wv, wa

2130

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2131

if not format_modified # b, w

2132

else lambda f: True) # b*, w*

2133

filter_f = lambda f: _filter_f(f) and (

2134

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2135

else:

2136

if format_spec in self._format_selection_exts['audio']:

2137

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2138

elif format_spec in self._format_selection_exts['video']:

2139

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2140

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2141

elif format_spec in self._format_selection_exts['storyboards']:

2142

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2143

else:

2144

filter_f = lambda f: f.get('format_id') == format_spec # id

2145

2146

def selector_function(ctx):

2147

formats = list(ctx['formats'])

2148

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2149

if not matches:

2150

if format_fallback and ctx['incomplete_formats']:

2151

# for extractors with incomplete formats (audio only (soundcloud)

2152

# or video only (imgur)) best/worst will fallback to

2153

# best/worst {video,audio}-only format

2154

matches = formats

2155

elif seperate_fallback and not ctx['has_merged_format']:

2156

# for compatibility with youtube-dl when there is no pre-merged format

2157

matches = list(filter(seperate_fallback, formats))

2158

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2159

try:

2160

yield matches[format_idx - 1]

2161

except LazyList.IndexError:

2162

return

2163

2164

filters = [self._build_format_filter(f) for f in selector.filters]

2165

2166

def final_selector(ctx):

2167

ctx_copy = dict(ctx)

2168

for _filter in filters:

2169

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2170

return selector_function(ctx_copy)

2171

return final_selector

2172

2173

stream = io.BytesIO(format_spec.encode())

2174

try:

2175

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2176

except tokenize.TokenError:

2177

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2178

2179

class TokenIterator:

2180

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2189

raise StopIteration()

2190

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2197

self.counter -= 1

2198

2199

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2200

return _build_selector_function(parsed_selector)

2201

2202

def _calc_headers(self, info_dict):

2203

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2204

2205

cookies = self._calc_cookies(info_dict['url'])

2206

if cookies:

2207

res['Cookie'] = cookies

2208

2209

if 'X-Forwarded-For' not in res:

2210

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2211

if x_forwarded_for_ip:

2212

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, url):

2217

pr = sanitized_Request(url)

2218

self.cookiejar.add_cookie_header(pr)

2219

return pr.get_header('Cookie')

2220

2221

def _sort_thumbnails(self, thumbnails):

2222

thumbnails.sort(key=lambda t: (

2223

t.get('preference') if t.get('preference') is not None else -1,

2224

t.get('width') if t.get('width') is not None else -1,

2225

t.get('height') if t.get('height') is not None else -1,

2226

t.get('id') if t.get('id') is not None else '',

2227

t.get('url')))

2228

2229

def _sanitize_thumbnails(self, info_dict):

2230

thumbnails = info_dict.get('thumbnails')

2231

if thumbnails is None:

2232

thumbnail = info_dict.get('thumbnail')

2233

if thumbnail:

2234

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2239

for t in thumbnails:

2240

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2241

try:

2242

self.urlopen(HEADRequest(t['url']))

2243

except network_exceptions as err:

2244

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2249

for i, t in enumerate(thumbnails):

2250

if t.get('id') is None:

2251

t['id'] = '%d' % i

2252

if t.get('width') and t.get('height'):

2253

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2254

t['url'] = sanitize_url(t['url'])

2255

2256

if self.params.get('check_formats') is True:

2257

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2258

else:

2259

info_dict['thumbnails'] = thumbnails

2260

2261

def _fill_common_fields(self, info_dict, is_video=True):

2262

# TODO: move sanitization here

2263

if is_video:

2264

# playlists are allowed to lack "title"

2265

title = info_dict.get('title', NO_DEFAULT)

2266

if title is NO_DEFAULT:

2267

raise ExtractorError('Missing "title" field in extractor result',

2268

video_id=info_dict['id'], ie=info_dict['extractor'])

2269

info_dict['fulltitle'] = title

2270

if not title:

2271

if title == '':

2272

self.write_debug('Extractor gave empty title. Creating a generic title')

2273

else:

2274

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2275

info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'

2276

2277

if info_dict.get('duration') is not None:

2278

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2279

2280

for ts_key, date_key in (

2281

('timestamp', 'upload_date'),

2282

('release_timestamp', 'release_date'),

2283

('modified_timestamp', 'modified_date'),

2284

):

2285

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2286

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2287

# see http://bugs.python.org/issue1646728)

2288

with contextlib.suppress(ValueError, OverflowError, OSError):

2289

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2290

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2291

2292

live_keys = ('is_live', 'was_live')

2293

live_status = info_dict.get('live_status')

2294

if live_status is None:

2295

for key in live_keys:

2296

if info_dict.get(key) is False:

2297

continue

2298

if info_dict.get(key):

2299

live_status = key

2300

break

2301

if all(info_dict.get(key) is False for key in live_keys):

2302

live_status = 'not_live'

2303

if live_status:

2304

info_dict['live_status'] = live_status

2305

for key in live_keys:

2306

if info_dict.get(key) is None:

2307

info_dict[key] = (live_status == key)

2308

2309

# Auto generate title fields corresponding to the *_number fields when missing

2310

# in order to always have clean titles. This is very common for TV series.

2311

for field in ('chapter', 'season', 'episode'):

2312

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2313

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2314

2315

def _raise_pending_errors(self, info):

2316

err = info.pop('__pending_error', None)

2317

if err:

2318

self.report_error(err, tb=False)

2319

2320

def process_video_result(self, info_dict, download=True):

2321

assert info_dict.get('_type', 'video') == 'video'

2322

self._num_videos += 1

2323

2324

if 'id' not in info_dict:

2325

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2326

elif not info_dict.get('id'):

2327

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2328

2329

def report_force_conversion(field, field_not, conversion):

2330

self.report_warning(

2331

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2332

% (field, field_not, conversion))

2333

2334

def sanitize_string_field(info, string_field):

2335

field = info.get(string_field)

2336

if field is None or isinstance(field, compat_str):

2337

return

2338

report_force_conversion(string_field, 'a string', 'string')

2339

info[string_field] = compat_str(field)

2340

2341

def sanitize_numeric_fields(info):

2342

for numeric_field in self._NUMERIC_FIELDS:

2343

field = info.get(numeric_field)

2344

if field is None or isinstance(field, (int, float)):

2345

continue

2346

report_force_conversion(numeric_field, 'numeric', 'int')

2347

info[numeric_field] = int_or_none(field)

2348

2349

sanitize_string_field(info_dict, 'id')

2350

sanitize_numeric_fields(info_dict)

2351

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2352

self.report_warning('"duration" field is negative, there is an error in extractor')

2353

2354

if 'playlist' not in info_dict:

2355

# It isn't part of a playlist

2356

info_dict['playlist'] = None

2357

info_dict['playlist_index'] = None

2358

2359

self._sanitize_thumbnails(info_dict)

2360

2361

thumbnail = info_dict.get('thumbnail')

2362

thumbnails = info_dict.get('thumbnails')

2363

if thumbnail:

2364

info_dict['thumbnail'] = sanitize_url(thumbnail)

2365

elif thumbnails:

2366

info_dict['thumbnail'] = thumbnails[-1]['url']

2367

2368

if info_dict.get('display_id') is None and 'id' in info_dict:

2369

info_dict['display_id'] = info_dict['id']

2370

2371

self._fill_common_fields(info_dict)

2372

2373

for cc_kind in ('subtitles', 'automatic_captions'):

2374

cc = info_dict.get(cc_kind)

2375

if cc:

2376

for _, subtitle in cc.items():

2377

for subtitle_format in subtitle:

2378

if subtitle_format.get('url'):

2379

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2380

if subtitle_format.get('ext') is None:

2381

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2382

2383

automatic_captions = info_dict.get('automatic_captions')

2384

subtitles = info_dict.get('subtitles')

2385

2386

info_dict['requested_subtitles'] = self.process_subtitles(

2387

info_dict['id'], subtitles, automatic_captions)

2388

2389

if info_dict.get('formats') is None:

2390

# There's only one format available

2391

formats = [info_dict]

2392

else:

2393

formats = info_dict['formats']

2394

2395

# or None ensures --clean-infojson removes it

2396

info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None

2397

if not self.params.get('allow_unplayable_formats'):

2398

formats = [f for f in formats if not f.get('has_drm')]

2399

if info_dict['_has_drm'] and all(

2400

f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2401

self.report_warning(

2402

'This video is DRM protected and only images are available for download. '

2403

'Use --list-formats to see them')

2404

2405

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2406

if not get_from_start:

2407

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2408

if info_dict.get('is_live') and formats:

2409

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2410

if get_from_start and not formats:

2411

self.raise_no_formats(info_dict, msg=(

2412

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2413

'If you want to download from the current time, use --no-live-from-start'))

2414

2415

if not formats:

2416

self.raise_no_formats(info_dict)

2417

2418

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2423

'there is an error in extractor')

2424

return False

2425

if isinstance(url, bytes):

2426

sanitize_string_field(f, 'url')

2427

return True

2428

2429

# Filter out malformed formats for better extraction robustness

2430

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2435

for i, format in enumerate(formats):

2436

sanitize_string_field(format, 'format_id')

2437

sanitize_numeric_fields(format)

2438

format['url'] = sanitize_url(format['url'])

2439

if not format.get('format_id'):

2440

format['format_id'] = compat_str(i)

2441

else:

2442

# Sanitize format_id from characters used in format selector expression

2443

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2444

format_id = format['format_id']

2445

if format_id not in formats_dict:

2446

formats_dict[format_id] = []

2447

formats_dict[format_id].append(format)

2448

2449

# Make sure all formats have unique format_id

2450

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2451

for format_id, ambiguous_formats in formats_dict.items():

2452

ambigious_id = len(ambiguous_formats) > 1

2453

for i, format in enumerate(ambiguous_formats):

2454

if ambigious_id:

2455

format['format_id'] = '%s-%d' % (format_id, i)

2456

if format.get('ext') is None:

2457

format['ext'] = determine_ext(format['url']).lower()

2458

# Ensure there is no conflict between id and ext in format selection

2459

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2460

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2461

format['format_id'] = 'f%s' % format['format_id']

2462

2463

for i, format in enumerate(formats):

2464

if format.get('format') is None:

2465

format['format'] = '{id} - {res}{note}'.format(

2466

id=format['format_id'],

2467

res=self.format_resolution(format),

2468

note=format_field(format, 'format_note', ' (%s)'),

2469

)

2470

if format.get('protocol') is None:

2471

format['protocol'] = determine_protocol(format)

2472

if format.get('resolution') is None:

2473

format['resolution'] = self.format_resolution(format, default=None)

2474

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2475

format['dynamic_range'] = 'SDR'

2476

if (info_dict.get('duration') and format.get('tbr')

2477

and not format.get('filesize') and not format.get('filesize_approx')):

2478

format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))

2479

2480

# Add HTTP headers, so that external programs can use them from the

2481

# json output

2482

full_format_info = info_dict.copy()

2483

full_format_info.update(format)

2484

format['http_headers'] = self._calc_headers(full_format_info)

2485

# Remove private housekeeping stuff

2486

if '__x_forwarded_for_ip' in info_dict:

2487

del info_dict['__x_forwarded_for_ip']

2488

2489

if self.params.get('check_formats') is True:

2490

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2491

2492

if not formats or formats[0] is not info_dict:

2493

# only set the 'formats' fields if the original info_dict list them

2494

# otherwise we end up with a circular reference, the first (and unique)

2495

# element in the 'formats' field in info_dict is info_dict itself,

2496

# which can't be exported to json

2497

info_dict['formats'] = formats

2498

2499

info_dict, _ = self.pre_process(info_dict)

2500

2501

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2502

return info_dict

2503

2504

self.post_extract(info_dict)

2505

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2506

2507

# The pre-processors may have modified the formats

2508

formats = info_dict.get('formats', [info_dict])

2509

2510

list_only = self.params.get('simulate') is None and (

2511

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2512

interactive_format_selection = not list_only and self.format_selector == '-'

2513

if self.params.get('list_thumbnails'):

2514

self.list_thumbnails(info_dict)

2515

if self.params.get('listsubtitles'):

2516

if 'automatic_captions' in info_dict:

2517

self.list_subtitles(

2518

info_dict['id'], automatic_captions, 'automatic captions')

2519

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2520

if self.params.get('listformats') or interactive_format_selection:

2521

self.list_formats(info_dict)

2522

if list_only:

2523

# Without this printing, -F --print-json will not work

2524

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2525

return info_dict

2526

2527

format_selector = self.format_selector

2528

if format_selector is None:

2529

req_format = self._default_format_spec(info_dict, download=download)

2530

self.write_debug('Default format spec: %s' % req_format)

2531

format_selector = self.build_format_selector(req_format)

2532

2533

while True:

2534

if interactive_format_selection:

2535

req_format = input(

2536

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2537

try:

2538

format_selector = self.build_format_selector(req_format)

2539

except SyntaxError as err:

2540

self.report_error(err, tb=False, is_error=False)

2541

continue

2542

2543

formats_to_download = list(format_selector({

2544

'formats': formats,

2545

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2546

'incomplete_formats': (

2547

# All formats are video-only or

2548

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2549

# all formats are audio-only

2550

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),

2551

}))

2552

if interactive_format_selection and not formats_to_download:

2553

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2558

if not self.params.get('ignore_no_formats_error'):

2559

raise ExtractorError(

2560

'Requested format is not available. Use --list-formats for a list of available formats',

2561

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2562

self.report_warning('Requested format is not available')

2563

# Process what we can, even without any available formats.

2564

formats_to_download = [{}]

2565

2566

requested_ranges = self.params.get('download_ranges')

2567

if requested_ranges:

2568

requested_ranges = tuple(requested_ranges(info_dict, self))

2569

2570

best_format, downloaded_formats = formats_to_download[-1], []

if download:

if best_format:

def to_screen(*msg):

self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')

2575

2576

to_screen(f'Downloading {len(formats_to_download)} format(s):',

2577

(f['format_id'] for f in formats_to_download))

2578

if requested_ranges:

2579

to_screen(f'Downloading {len(requested_ranges)} time ranges:',

2580

(f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))

2581

max_downloads_reached = False

2582

2583

for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):

2584

new_info = self._copy_infodict(info_dict)

new_info.update(fmt)

if chapter:

new_info.update({

'section_start': chapter.get('start_time'),

2589

'section_end': chapter.get('end_time', 0),

2590

'section_title': chapter.get('title'),

2591

'section_number': chapter.get('index'),

2592

})

2593

downloaded_formats.append(new_info)

2594

try:

2595

self.process_info(new_info)

2596

except MaxDownloadsReached:

2597

max_downloads_reached = True

2598

self._raise_pending_errors(new_info)

2599

# Remove copied info

2600

for key, val in tuple(new_info.items()):

2601

if info_dict.get(key) == val:

2602

new_info.pop(key)

2603

if max_downloads_reached:

2604

break

2605

2606

write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}

2607

assert write_archive.issubset({True, False, 'ignore'})

2608

if True in write_archive and False not in write_archive:

2609

self.record_download_archive(info_dict)

2610

2611

info_dict['requested_downloads'] = downloaded_formats

2612

info_dict = self.run_all_pps('after_video', info_dict)

2613

if max_downloads_reached:

2614

raise MaxDownloadsReached()

2615

2616

# We update the info dict with the selected best quality format (backwards compatibility)

2617

info_dict.update(best_format)

2618

return info_dict

2619

2620

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2621

"""Select the requested subtitles and their format"""

2622

available_subs, normal_sub_langs = {}, []

2623

if normal_subtitles and self.params.get('writesubtitles'):

2624

available_subs.update(normal_subtitles)

2625

normal_sub_langs = tuple(normal_subtitles.keys())

2626

if automatic_captions and self.params.get('writeautomaticsub'):

2627

for lang, cap_info in automatic_captions.items():

2628

if lang not in available_subs:

2629

available_subs[lang] = cap_info

2630

2631

if (not self.params.get('writesubtitles') and not

2632

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = tuple(available_subs.keys())

2637

if self.params.get('allsubtitles', False):

2638

requested_langs = all_sub_langs

2639

elif self.params.get('subtitleslangs', False):

2640

# A list is used so that the order of languages will be the same as

2641

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2642

requested_langs = []

2643

for lang_re in self.params.get('subtitleslangs'):

2644

discard = lang_re[0] == '-'

2645

if discard:

2646

lang_re = lang_re[1:]

if lang_re == 'all':

if discard:

requested_langs = []

else:

requested_langs.extend(all_sub_langs)

2652

continue

2653

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2654

if discard:

2655

for lang in current_langs:

2656

while lang in requested_langs:

2657

requested_langs.remove(lang)

2658

else:

2659

requested_langs.extend(current_langs)

2660

requested_langs = orderedSet(requested_langs)

2661

elif normal_sub_langs:

2662

requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]

2663

else:

2664

requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]

2665

if requested_langs:

2666

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2667

2668

formats_query = self.params.get('subtitlesformat', 'best')

2669

formats_preference = formats_query.split('/') if formats_query else []

2670

subs = {}

2671

for lang in requested_langs:

2672

formats = available_subs.get(lang)

2673

if formats is None:

2674

self.report_warning(f'{lang} subtitles not available for {video_id}')

2675

continue

2676

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2688

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2693

if info_dict is None:

2694

return

2695

info_copy = info_dict.copy()

2696

info_copy['formats_table'] = self.render_formats_table(info_dict)

2697

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2698

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2699

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2700

2701

def format_tmpl(tmpl):

2702

mobj = re.match(r'\w+(=?)$', tmpl)

2703

if mobj and mobj.group(1):

2704

return f'{tmpl[:-1]} = %({tmpl[:-1]})r'

elif mobj:

return f'%({tmpl})s'

return tmpl

for tmpl in self.params['forceprint'].get(key, []):

2710

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2711

2712

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2713

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2714

tmpl = format_tmpl(tmpl)

2715

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2716

if self._ensure_dir_exists(filename):

2717

with open(filename, 'a', encoding='utf-8') as f:

2718

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2719

2720

def __forced_printings(self, info_dict, filename, incomplete):

2721

def print_mandatory(field, actual_field=None):

2722

if actual_field is None:

2723

actual_field = field

2724

if (self.params.get('force%s' % field, False)

2725

and (not incomplete or info_dict.get(actual_field) is not None)):

2726

self.to_stdout(info_dict[actual_field])

2727

2728

def print_optional(field):

2729

if (self.params.get('force%s' % field, False)

2730

and info_dict.get(field) is not None):

2731

self.to_stdout(info_dict[field])

2732

2733

info_dict = info_dict.copy()

2734

if filename is not None:

2735

info_dict['filename'] = filename

2736

if info_dict.get('requested_formats') is not None:

2737

# For RTMP URLs, also include the playpath

2738

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2739

elif info_dict.get('url'):

2740

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2741

2742

if (self.params.get('forcejson')

2743

or self.params['forceprint'].get('video')

2744

or self.params['print_to_file'].get('video')):

2745

self.post_extract(info_dict)

2746

self._forceprint('video', info_dict)

2747

2748

print_mandatory('title')

2749

print_mandatory('id')

2750

print_mandatory('url', 'urls')

2751

print_optional('thumbnail')

2752

print_optional('description')

2753

print_optional('filename')

2754

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2755

self.to_stdout(formatSeconds(info_dict['duration']))

2756

print_mandatory('format')

2757

2758

if self.params.get('forcejson'):

2759

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2760

2761

def dl(self, name, info, subtitle=False, test=False):

2762

if not info.get('url'):

2763

self.raise_no_formats(info, True)

2764

2765

if test:

2766

verbose = self.params.get('verbose')

2767

params = {

2768

'test': True,

2769

'quiet': self.params.get('quiet') or not verbose,

2770

'verbose': verbose,

2771

'noprogress': not verbose,

2772

'nopart': True,

2773

'skip_unavailable_fragments': False,

2774

'keep_fragments': False,

2775

'overwrites': True,

2776

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2781

if not test:

2782

for ph in self._progress_hooks:

2783

fd.add_progress_hook(ph)

2784

urls = '", "'.join(

2785

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2786

for f in info.get('requested_formats', []) or [info])

2787

self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')

2788

2789

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2790

# But it may contain objects that are not deep-copyable

2791

new_info = self._copy_infodict(info)

2792

if new_info.get('http_headers') is None:

2793

new_info['http_headers'] = self._calc_headers(new_info)

2794

return fd.download(name, new_info, subtitle)

2795

2796

def existing_file(self, filepaths, *, default_overwrite=True):

2797

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2798

if existing_files and not self.params.get('overwrites', default_overwrite):

2799

return existing_files[0]

2800

2801

for file in existing_files:

2802

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2807

"""Process a single resolved IE result. (Modifies it in-place)"""

2808

2809

assert info_dict.get('_type', 'video') == 'video'

2810

original_infodict = info_dict

2811

2812

if 'format' not in info_dict and 'ext' in info_dict:

2813

info_dict['format'] = info_dict['ext']

2814

2815

# This is mostly just for backward compatibility of process_info

2816

# As a side-effect, this allows for format-specific filters

2817

if self._match_entry(info_dict) is not None:

2818

info_dict['__write_download_archive'] = 'ignore'

2819

return

2820

2821

# Does nothing under normal operation - for backward compatibility of process_info

2822

self.post_extract(info_dict)

2823

self._num_downloads += 1

2824

2825

# info_dict['_filename'] needs to be set for backward compatibility

2826

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2827

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2832

2833

def check_max_downloads():

2834

if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):

2835

raise MaxDownloadsReached()

2836

2837

if self.params.get('simulate'):

2838

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2839

check_max_downloads()

2840

return

2841

2842

if full_filename is None:

2843

return

2844

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2845

return

2846

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2847

return

2848

2849

if self._write_description('video', info_dict,

2850

self.prepare_filename(info_dict, 'description')) is None:

2851

return

2852

2853

sub_files = self._write_subtitles(info_dict, temp_filename)

2854

if sub_files is None:

2855

return

2856

files_to_move.update(dict(sub_files))

2857

2858

thumb_files = self._write_thumbnails(

2859

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2860

if thumb_files is None:

2861

return

2862

files_to_move.update(dict(thumb_files))

2863

2864

infofn = self.prepare_filename(info_dict, 'infojson')

2865

_infojson_written = self._write_info_json('video', info_dict, infofn)

2866

if _infojson_written:

2867

info_dict['infojson_filename'] = infofn

2868

# For backward compatibility, even though it was a private field

2869

info_dict['__infojson_filename'] = infofn

2870

elif _infojson_written is None:

2871

return

2872

2873

# Note: Annotations are deprecated

2874

annofn = None

2875

if self.params.get('writeannotations', False):

2876

annofn = self.prepare_filename(info_dict, 'annotation')

2877

if annofn:

2878

if not self._ensure_dir_exists(encodeFilename(annofn)):

2879

return

2880

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2881

self.to_screen('[info] Video annotations are already present')

2882

elif not info_dict.get('annotations'):

2883

self.report_warning('There are no annotations to write.')

2884

else:

2885

try:

2886

self.to_screen('[info] Writing video annotations to: ' + annofn)

2887

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2888

annofile.write(info_dict['annotations'])

2889

except (KeyError, TypeError):

2890

self.report_warning('There are no annotations to write.')

2891

except OSError:

2892

self.report_error('Cannot write annotations file: ' + annofn)

2893

return

2894

2895

# Write internet shortcut files

2896

def _write_link_file(link_type):

2897

url = try_get(info_dict['webpage_url'], iri_to_uri)

2898

if not url:

2899

self.report_warning(

2900

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

2901

return True

2902

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

2903

if not self._ensure_dir_exists(encodeFilename(linkfn)):

2904

return False

2905

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2906

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

2907

return True

2908

try:

2909

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

2910

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

2911

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

2912

template_vars = {'url': url}

2913

if link_type == 'desktop':

2914

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

2915

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

2916

except OSError:

2917

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

2923

'webloc': self.params.get('writewebloclink'),

2924

'desktop': self.params.get('writedesktoplink'),

2925

}

2926

if self.params.get('writelink'):

2927

link_type = ('webloc' if sys.platform == 'darwin'

2928

else 'desktop' if sys.platform.startswith('linux')

2929

else 'url')

2930

write_links[link_type] = True

2931

2932

if any(should_write and not _write_link_file(link_type)

2933

for link_type, should_write in write_links.items()):

2934

return

2935

2936

def replace_info_dict(new_info):

2937

nonlocal info_dict

2938

if new_info == info_dict:

2939

return

2940

info_dict.clear()

2941

info_dict.update(new_info)

2942

2943

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2944

replace_info_dict(new_info)

2945

2946

if self.params.get('skip_download'):

2947

info_dict['filepath'] = temp_filename

2948

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2949

info_dict['__files_to_move'] = files_to_move

2950

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

2951

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2952

else:

2953

# Download

2954

info_dict.setdefault('__postprocessors', [])

2955

try:

2956

2957

def existing_video_file(*filepaths):

2958

ext = info_dict.get('ext')

2959

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

2960

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

2961

default_overwrite=False)

2962

if file:

2963

info_dict['ext'] = os.path.splitext(file)[1][1:]

return file

success = True

merger, fd = FFmpegMergerPP(self), None

2968

if info_dict.get('protocol') or info_dict.get('url'):

2969

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

2970

if fd is not FFmpegFD and (

2971

info_dict.get('section_start') or info_dict.get('section_end')):

2972

msg = ('This format cannot be partially downloaded' if merger.available

2973

else 'You have requested downloading the video partially, but ffmpeg is not installed')

2974

self.report_error(f'{msg}. Aborting')

2975

return

2976

2977

if info_dict.get('requested_formats') is not None:

2978

2979

def compatible_formats(formats):

2980

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2981

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2982

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2983

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = {format.get('ext') for format in formats}

2988

COMPATIBLE_EXTS = (

2989

{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},

2990

{'webm'},

2991

)

2992

for ext_sets in COMPATIBLE_EXTS:

2993

if ext_sets.issuperset(exts):

2994

return True

2995

# TODO: Check acodec/vcodec

2996

return False

2997

2998

requested_formats = info_dict['requested_formats']

2999

old_ext = info_dict['ext']

3000

if self.params.get('merge_output_format') is None:

3001

if not compatible_formats(requested_formats):

3002

info_dict['ext'] = 'mkv'

3003

self.report_warning(

3004

'Requested formats are incompatible for merge and will be merged into mkv')

3005

if (info_dict['ext'] == 'webm'

3006

and info_dict.get('thumbnails')

3007

# check with type instead of pp_key, __name__, or isinstance

3008

# since we dont want any custom PPs to trigger this

3009

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721

3010

info_dict['ext'] = 'mkv'

3011

self.report_warning(

3012

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3013

new_ext = info_dict['ext']

3014

3015

def correct_ext(filename, ext=new_ext):

3016

if filename == '-':

3017

return filename

3018

filename_real_ext = os.path.splitext(filename)[1][1:]

3019

filename_wo_ext = (

3020

os.path.splitext(filename)[0]

3021

if filename_real_ext in (old_ext, new_ext)

3022

else filename)

3023

return f'{filename_wo_ext}.{ext}'

3024

3025

# Ensure filename always has a correct extension for successful merge

3026

full_filename = correct_ext(full_filename)

3027

temp_filename = correct_ext(temp_filename)

3028

dl_filename = existing_video_file(full_filename, temp_filename)

3029

info_dict['__real_download'] = False

3030

3031

downloaded = []

3032

if dl_filename is not None:

3033

self.report_file_already_downloaded(dl_filename)

3034

elif fd:

3035

for f in requested_formats if fd != FFmpegFD else []:

3036

f['filepath'] = fname = prepend_extension(

3037

correct_ext(temp_filename, info_dict['ext']),

3038

'f%s' % f['format_id'], info_dict['ext'])

3039

downloaded.append(fname)

3040

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3041

success, real_download = self.dl(temp_filename, info_dict)

3042

info_dict['__real_download'] = real_download

3043

else:

3044

if self.params.get('allow_unplayable_formats'):

3045

self.report_warning(

3046

'You have requested merging of multiple formats '

3047

'while also allowing unplayable formats to be downloaded. '

3048

'The formats won\'t be merged to prevent data corruption.')

3049

elif not merger.available:

3050

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3051

if not self.params.get('ignoreerrors'):

3052

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3053

return

3054

self.report_warning(f'{msg}. The formats won\'t be merged')

3055

3056

if temp_filename == '-':

3057

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3058

else 'but the formats are incompatible for simultaneous download' if merger.available

3059

else 'but ffmpeg is not installed')

3060

self.report_warning(

3061

f'You have requested downloading multiple formats to stdout {reason}. '

3062

'The formats will be streamed one after the other')

3063

fname = temp_filename

3064

for f in requested_formats:

3065

new_info = dict(info_dict)

3066

del new_info['requested_formats']

3067

new_info.update(f)

3068

if temp_filename != '-':

3069

fname = prepend_extension(

3070

correct_ext(temp_filename, new_info['ext']),

3071

'f%s' % f['format_id'], new_info['ext'])

3072

if not self._ensure_dir_exists(fname):

3073

return

3074

f['filepath'] = fname

3075

downloaded.append(fname)

3076

partial_success, real_download = self.dl(fname, new_info)

3077

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3078

success = success and partial_success

3079

3080

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3081

info_dict['__postprocessors'].append(merger)

3082

info_dict['__files_to_merge'] = downloaded

3083

# Even if there were no downloads, it is being merged only now

3084

info_dict['__real_download'] = True

3085

else:

3086

for file in downloaded:

3087

files_to_move[file] = None

3088

else:

3089

# Just a single file

3090

dl_filename = existing_video_file(full_filename, temp_filename)

3091

if dl_filename is None or dl_filename == temp_filename:

3092

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3093

# So we should try to resume the download

3094

success, real_download = self.dl(temp_filename, info_dict)

3095

info_dict['__real_download'] = real_download

3096

else:

3097

self.report_file_already_downloaded(dl_filename)

3098

3099

dl_filename = dl_filename or temp_filename

3100

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3101

3102

except network_exceptions as err:

3103

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3104

return

3105

except OSError as err:

3106

raise UnavailableVideoError(err)

3107

except (ContentTooShortError, ) as err:

3108

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3109

return

3110

3111

self._raise_pending_errors(info_dict)

3112

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3117

vid = info_dict['id']

3118

3119

if fixup_policy in ('ignore', 'never'):

3120

return

3121

elif fixup_policy == 'warn':

3122

do_fixup = 'warn'

3123

elif fixup_policy != 'force':

3124

assert fixup_policy in ('detect_or_warn', None)

3125

if not info_dict.get('__real_download'):

3126

do_fixup = False

3127

3128

def ffmpeg_fixup(cndn, msg, cls):

3129

if not (do_fixup and cndn):

3130

return

3131

elif do_fixup == 'warn':

3132

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3137

else:

3138

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3139

3140

stretched_ratio = info_dict.get('stretched_ratio')

3141

ffmpeg_fixup(

3142

stretched_ratio not in (1, None),

3143

f'Non-uniform pixel ratio {stretched_ratio}',

3144

FFmpegFixupStretchedPP)

3145

3146

ffmpeg_fixup(

3147

(info_dict.get('requested_formats') is None

3148

and info_dict.get('container') == 'm4a_dash'

3149

and info_dict.get('ext') == 'm4a'),

3150

'writing DASH m4a. Only some players support this container',

3151

FFmpegFixupM4aPP)

3152

3153

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3154

downloader = downloader.FD_NAME if downloader else None

3155

3156

if info_dict.get('requested_formats') is None: # Not necessary if doing merger

3157

ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')

3158

or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,

3159

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3160

FFmpegFixupM3u8PP)

3161

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3162

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3163

3164

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3165

ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3170

except PostProcessingError as err:

3171

self.report_error('Postprocessing: %s' % str(err))

3172

return

3173

try:

3174

for ph in self._post_hooks:

3175

ph(info_dict['filepath'])

3176

except Exception as err:

3177

self.report_error('post hooks: %s' % str(err))

3178

return

3179

info_dict['__write_download_archive'] = True

3180

3181

assert info_dict is original_infodict # Make sure the info_dict was modified in-place

3182

if self.params.get('force_write_download_archive'):

3183

info_dict['__write_download_archive'] = True

3184

check_max_downloads()

3185

3186

def __download_wrapper(self, func):

3187

@functools.wraps(func)

3188

def wrapper(*args, **kwargs):

3189

try:

3190

res = func(*args, **kwargs)

3191

except UnavailableVideoError as e:

3192

self.report_error(e)

3193

except DownloadCancelled as e:

3194

self.to_screen(f'[info] {e}')

3195

if not self.params.get('break_per_url'):

3196

raise

3197

else:

3198

if self.params.get('dump_single_json', False):

3199

self.post_extract(res)

3200

self.to_stdout(json.dumps(self.sanitize_info(res)))

3201

return wrapper

3202

3203

def download(self, url_list):

3204

"""Download a given list of URLs."""

3205

url_list = variadic(url_list) # Passing a single URL is a common mistake

3206

outtmpl = self.outtmpl_dict['default']

3207

if (len(url_list) > 1

3208

and outtmpl != '-'

3209

and '%' not in outtmpl

3210

and self.params.get('max_downloads') != 1):

3211

raise SameFileError(outtmpl)

3212

3213

for url in url_list:

3214

self.__download_wrapper(self.extract_info)(

3215

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3216

3217

return self._download_retcode

3218

3219

def download_with_info_file(self, info_filename):

3220

with contextlib.closing(fileinput.FileInput(

3221

[info_filename], mode='r',

3222

openhook=fileinput.hook_encoded('utf-8'))) as f:

3223

# FileInput doesn't have a read method, we can't call json.load

3224

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3225

try:

3226

self.__download_wrapper(self.process_ie_result)(info, download=True)

3227

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3228

if not isinstance(e, EntryNotInPlaylist):

3229

self.to_stderr('\r')

3230

webpage_url = info.get('webpage_url')

3231

if webpage_url is not None:

3232

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3233

return self.download([webpage_url])

3234

else:

3235

raise

3236

return self._download_retcode

3237

3238

@staticmethod

3239

def sanitize_info(info_dict, remove_private_keys=False):

3240

''' Sanitize the infodict for converting to json '''

3241

if info_dict is None:

3242

return info_dict

3243

info_dict.setdefault('epoch', int(time.time()))

3244

info_dict.setdefault('_type', 'video')

3245

3246

if remove_private_keys:

3247

reject = lambda k, v: v is None or k.startswith('__') or k in {

3248

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3249

'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',

3250

}

3251

else:

3252

reject = lambda k, v: False

3253

3254

def filter_fn(obj):

3255

if isinstance(obj, dict):

3256

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3257

elif isinstance(obj, (list, tuple, set, LazyList)):

3258

return list(map(filter_fn, obj))

3259

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3265

3266

@staticmethod

3267

def filter_requested_info(info_dict, actually_filter=True):

3268

''' Alias of sanitize_info for backward compatibility '''

3269

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3270

3271

def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):

3272

for filename in set(filter(None, files_to_delete)):

3273

if msg:

3274

self.to_screen(msg % filename)

try:

os.remove(filename)

except OSError:

self.report_warning(f'Unable to delete file {filename}')

3279

if filename in info.get('__files_to_move', []): # NB: Delete even if None

3280

del info['__files_to_move'][filename]

3281

3282

@staticmethod

3283

def post_extract(info_dict):

3284

def actual_post_extract(info_dict):

3285

if info_dict.get('_type') in ('playlist', 'multi_video'):

3286

for video_dict in info_dict.get('entries', {}):

3287

actual_post_extract(video_dict or {})

3288

return

3289

3290

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3291

info_dict.update(post_extractor())

3292

3293

actual_post_extract(info_dict or {})

3294

3295

def run_pp(self, pp, infodict):

3296

files_to_delete = []

3297

if '__files_to_move' not in infodict:

3298

infodict['__files_to_move'] = {}

3299

try:

3300

files_to_delete, infodict = pp.run(infodict)

3301

except PostProcessingError as e:

3302

# Must be True and not 'only_download'

3303

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3309

return infodict

3310

if self.params.get('keepvideo', False):

3311

for f in files_to_delete:

3312

infodict['__files_to_move'].setdefault(f, '')

3313

else:

3314

self._delete_downloaded_files(

3315

*files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')

3316

return infodict

3317

3318

def run_all_pps(self, key, info, *, additional_pps=None):

3319

self._forceprint(key, info)

3320

for pp in (additional_pps or []) + self._pps[key]:

3321

info = self.run_pp(pp, info)

3322

return info

3323

3324

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3325

info = dict(ie_info)

3326

info['__files_to_move'] = files_to_move or {}

3327

try:

3328

info = self.run_all_pps(key, info)

3329

except PostProcessingError as err:

3330

msg = f'Preprocessing: {err}'

3331

info.setdefault('__pending_error', msg)

3332

self.report_error(msg, is_error=False)

3333

return info, info.pop('__files_to_move', None)

3334

3335

def post_process(self, filename, info, files_to_move=None):

3336

"""Run all the postprocessors on the given file."""

3337

info['filepath'] = filename

3338

info['__files_to_move'] = files_to_move or {}

3339

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3340

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3341

del info['__files_to_move']

3342

return self.run_all_pps('after_move', info)

3343

3344

def _make_archive_id(self, info_dict):

3345

video_id = info_dict.get('id')

3346

if not video_id:

3347

return

3348

# Future-proof against any change in case

3349

# and backwards compatibility with prior versions

3350

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3351

if extractor is None:

3352

url = str_or_none(info_dict.get('url'))

3353

if not url:

3354

return

3355

# Try to find matching extractor for the URL and take its ie_key

3356

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return f'{extractor.lower()} {video_id}'

3363

3364

def in_download_archive(self, info_dict):

3365

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

3370

if not vid_id:

3371

return False # Incomplete video information

3372

3373

return vid_id in self.archive

3374

3375

def record_download_archive(self, info_dict):

3376

fn = self.params.get('download_archive')

3377

if fn is None:

3378

return

3379

vid_id = self._make_archive_id(info_dict)

3380

assert vid_id

3381

self.write_debug(f'Adding to archive: {vid_id}')

3382

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3383

archive_file.write(vid_id + '\n')

3384

self.archive.add(vid_id)

3385

3386

@staticmethod

3387

def format_resolution(format, default='unknown'):

3388

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3389

return 'audio only'

3390

if format.get('resolution') is not None:

3391

return format['resolution']

3392

if format.get('width') and format.get('height'):

3393

return '%dx%d' % (format['width'], format['height'])

3394

elif format.get('height'):

3395

return '%sp' % format['height']

3396

elif format.get('width'):

3397

return '%dx?' % format['width']

3398

return default

3399

3400

def _list_format_headers(self, *headers):

3401

if self.params.get('listformats_table', True) is not False:

3402

return [self._format_out(header, self.Styles.HEADERS) for header in headers]

3403

return headers

3404

3405

def _format_note(self, fdict):

3406

res = ''

3407

if fdict.get('ext') in ['f4f', 'f4m']:

3408

res += '(unsupported)'

3409

if fdict.get('language'):

3410

if res:

3411

res += ' '

3412

res += '[%s]' % fdict['language']

3413

if fdict.get('format_note') is not None:

3414

if res:

3415

res += ' '

3416

res += fdict['format_note']

3417

if fdict.get('tbr') is not None:

3418

if res:

3419

res += ', '

3420

res += '%4dk' % fdict['tbr']

3421

if fdict.get('container') is not None:

3422

if res:

3423

res += ', '

3424

res += '%s container' % fdict['container']

3425

if (fdict.get('vcodec') is not None

3426

and fdict.get('vcodec') != 'none'):

3427

if res:

3428

res += ', '

3429

res += fdict['vcodec']

3430

if fdict.get('vbr') is not None:

3431

res += '@'

3432

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3433

res += 'video@'

3434

if fdict.get('vbr') is not None:

3435

res += '%4dk' % fdict['vbr']

3436

if fdict.get('fps') is not None:

3437

if res:

3438

res += ', '

3439

res += '%sfps' % fdict['fps']

3440

if fdict.get('acodec') is not None:

3441

if res:

3442

res += ', '

3443

if fdict['acodec'] == 'none':

3444

res += 'video only'

3445

else:

3446

res += '%-5s' % fdict['acodec']

3447

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3452

res += '@%3dk' % fdict['abr']

3453

if fdict.get('asr') is not None:

3454

res += ' (%5dHz)' % fdict['asr']

3455

if fdict.get('filesize') is not None:

3456

if res:

3457

res += ', '

3458

res += format_bytes(fdict['filesize'])

3459

elif fdict.get('filesize_approx') is not None:

3460

if res:

3461

res += ', '

3462

res += '~' + format_bytes(fdict['filesize_approx'])

3463

return res

3464

3465

def render_formats_table(self, info_dict):

3466

if not info_dict.get('formats') and not info_dict.get('url'):

3467

return None

3468

3469

formats = info_dict.get('formats', [info_dict])

3470

if not self.params.get('listformats_table', True) is not False:

3471

table = [

3472

[

3473

format_field(f, 'format_id'),

3474

format_field(f, 'ext'),

3475

self.format_resolution(f),

3476

self._format_note(f)

3477

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3478

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3479

3480

delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3481

table = [

3482

[

3483

self._format_out(format_field(f, 'format_id'), self.Styles.ID),

3484

format_field(f, 'ext'),

3485

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3486

format_field(f, 'fps', '\t%d'),

3487

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3488

delim,

3489

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3490

format_field(f, 'tbr', '\t%dk'),

3491

shorten_protocol_name(f.get('protocol', '')),

3492

delim,

3493

format_field(f, 'vcodec', default='unknown').replace(

3494

'none', 'images' if f.get('acodec') == 'none'

3495

else self._format_out('audio only', self.Styles.SUPPRESS)),

3496

format_field(f, 'vbr', '\t%dk'),

3497

format_field(f, 'acodec', default='unknown').replace(

3498

'none', '' if f.get('vcodec') == 'none'

3499

else self._format_out('video only', self.Styles.SUPPRESS)),

3500

format_field(f, 'abr', '\t%dk'),

3501

format_field(f, 'asr', '\t%dHz'),

3502

join_nonempty(

3503

self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3504

format_field(f, 'language', '[%s]'),

3505

join_nonempty(format_field(f, 'format_note'),

3506

format_field(f, 'container', ignore=(None, f.get('ext'))),

3507

delim=', '),

3508

delim=' '),

3509

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3510

header_line = self._list_format_headers(

3511

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3512

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3513

3514

return render_table(

3515

header_line, table, hide_empty=True,

3516

delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3517

3518

def render_thumbnails_table(self, info_dict):

3519

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3524

[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])

3525

3526

def render_subtitles_table(self, video_id, subtitles):

3527

def _row(lang, formats):

3528

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3529

if len(set(names)) == 1:

3530

names = [] if names[0] == 'unknown' else names[:1]

3531

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3537

[_row(lang, formats) for lang, formats in subtitles.items()],

3538

hide_empty=True)

3539

3540

def __list_table(self, video_id, name, func, *args):

3541

table = func(*args)

3542

if not table:

3543

self.to_screen(f'{video_id} has no {name}')

3544

return

3545

self.to_screen(f'[info] Available {name} for {video_id}:')

3546

self.to_stdout(table)

3547

3548

def list_formats(self, info_dict):

3549

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3550

3551

def list_thumbnails(self, info_dict):

3552

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3553

3554

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3555

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3556

3557

def urlopen(self, req):

3558

""" Start an HTTP download """

3559

if isinstance(req, str):

3560

req = sanitized_Request(req)

3561

return self._opener.open(req, timeout=self._socket_timeout)

3562

3563

def print_debug_header(self):

3564

if not self.params.get('verbose'):

3565

return

3566

3567

# These imports can be slow. So import them only as needed

3568

from .extractor.extractors import _LAZY_LOADER

3569

from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors

3570

3571

def get_encoding(stream):

3572

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3573

if not supports_terminal_sequences(stream):

3574

from .utils import WINDOWS_VT_MODE # Must be imported locally

3575

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3576

return ret

3577

3578

encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (

3579

locale.getpreferredencoding(),

3580

sys.getfilesystemencoding(),

3581

self.get_encoding(),

3582

', '.join(

3583

f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_

3584

if stream is not None and key != 'console')

3585

)

3586

3587

logger = self.params.get('logger')

3588

if logger:

3589

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3590

write_debug(encoding_str)

3591

else:

3592

write_string(f'[debug] {encoding_str}\n', encoding=None)

3593

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3594

3595

source = detect_variant()

3596

write_debug(join_nonempty(

3597

'yt-dlp version', __version__,

3598

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3599

'' if source == 'unknown' else f'({source})',

3600

delim=' '))

3601

if not _LAZY_LOADER:

3602

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3603

write_debug('Lazy loading extractors is forcibly disabled')

3604

else:

3605

write_debug('Lazy loading extractors is disabled')

3606

if plugin_extractors or plugin_postprocessors:

3607

write_debug('Plugins: %s' % [

3608

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3609

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3610

if self.params['compat_opts']:

3611

write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))

3612

3613

if source == 'source':

3614

try:

3615

stdout, _, _ = Popen.run(

3616

['git', 'rev-parse', '--short', 'HEAD'],

3617

text=True, cwd=os.path.dirname(os.path.abspath(__file__)),

3618

stdout=subprocess.PIPE, stderr=subprocess.PIPE)

3619

if re.fullmatch('[0-9a-f]+', stdout.strip()):

3620

write_debug(f'Git HEAD: {stdout.strip()}')

3621

except Exception:

3622

with contextlib.suppress(Exception):

3623

sys.exc_clear()

3624

3625

def python_implementation():

3626

impl_name = platform.python_implementation()

3627

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3628

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3629

return impl_name

3630

3631

write_debug('Python version %s (%s %s) - %s' % (

3632

platform.python_version(),

3633

python_implementation(),

3634

platform.architecture()[0],

3635

platform_name()))

3636

3637

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3638

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3639

if ffmpeg_features:

3640

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3641

3642

exe_versions['rtmpdump'] = rtmpdump_version()

3643

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3644

exe_str = ', '.join(

3645

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3646

) or 'none'

3647

write_debug('exe versions: %s' % exe_str)

3648

3649

from .compat.compat_utils import get_package_info

3650

from .dependencies import available_dependencies

3651

3652

write_debug('Optional libraries: %s' % (', '.join(sorted({

3653

join_nonempty(*get_package_info(m)) for m in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3659

if hasattr(handler, 'proxies'):

3660

proxy_map.update(handler.proxies)

3661

write_debug(f'Proxy map: {proxy_map}')

3662

3663

# Not implemented

3664

if False and self.params.get('call_home'):

3665

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()

3666

write_debug('Public IP address: %s' % ipaddr)

3667

latest_version = self.urlopen(

3668

'https://yt-dl.org/latest/version').read().decode()

3669

if version_tuple(latest_version) > version_tuple(__version__):

3670

self.report_warning(

3671

'You are using an outdated version (newest version: %s)! '

3672

'See https://yt-dl.org/update if you need help updating.' %

3673

latest_version)

3674

3675

def _setup_opener(self):

3676

if hasattr(self, '_opener'):

3677

return

3678

timeout_val = self.params.get('socket_timeout')

3679

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3680

3681

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3682

opts_cookiefile = self.params.get('cookiefile')

3683

opts_proxy = self.params.get('proxy')

3684

3685

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3686

3687

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3688

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3693

else:

3694

proxies = compat_urllib_request.getproxies()

3695

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3696

if 'http' in proxies and 'https' not in proxies:

3697

proxies['https'] = proxies['http']

3698

proxy_handler = PerRequestProxyHandler(proxies)

3699

3700

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3701

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3702

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3703

redirect_handler = YoutubeDLRedirectHandler()

3704

data_handler = urllib.request.DataHandler()

3705

3706

# When passing our own FileHandler instance, build_opener won't add the

3707

# default FileHandler and allows us to disable the file protocol, which

3708

# can be used for malicious purposes (see

3709

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3710

file_handler = compat_urllib_request.FileHandler()

3711

3712

def file_open(*args, **kwargs):

3713

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3714

file_handler.file_open = file_open

3715

3716

opener = compat_urllib_request.build_opener(

3717

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3718

3719

# Delete the default user-agent header, which would otherwise apply in

3720

# cases where our custom HTTP handler doesn't come into play

3721

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3722

opener.addheaders = []

3723

self._opener = opener

3724

3725

def encode(self, s):

3726

if isinstance(s, bytes):

3727

return s # Already encoded

3728

3729

try:

3730

return s.encode(self.get_encoding())

3731

except UnicodeEncodeError as err:

3732

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3733

raise

3734

3735

def get_encoding(self):

3736

encoding = self.params.get('encoding')

3737

if encoding is None:

3738

encoding = preferredencoding()

3739

return encoding

3740

3741

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3742

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3743

if overwrite is None:

3744

overwrite = self.params.get('overwrites', True)

3745

if not self.params.get('writeinfojson'):

3746

return False

3747

elif not infofn:

3748

self.write_debug(f'Skipping writing {label} infojson')

3749

return False

3750

elif not self._ensure_dir_exists(infofn):

3751

return None

3752

elif not overwrite and os.path.exists(infofn):

3753

self.to_screen(f'[info] {label.title()} metadata is already present')

3754

return 'exists'

3755

3756

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3757

try:

3758

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3759

return True

3760

except OSError:

3761

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

3762

return None

3763

3764

def _write_description(self, label, ie_result, descfn):

3765

''' Write description and returns True = written, False = skip, None = error '''

3766

if not self.params.get('writedescription'):

3767

return False

3768

elif not descfn:

3769

self.write_debug(f'Skipping writing {label} description')

3770

return False

3771

elif not self._ensure_dir_exists(descfn):

3772

return None

3773

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3774

self.to_screen(f'[info] {label.title()} description is already present')

3775

elif ie_result.get('description') is None:

3776

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3781

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3782

descfile.write(ie_result['description'])

3783

except OSError:

3784

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3789

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3790

ret = []

3791

subtitles = info_dict.get('requested_subtitles')

3792

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3793

# subtitles download errors are already managed as troubles in relevant IE

3794

# that way it will silently go on when used with unsupporting IE

3795

return ret

3796

3797

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3798

if not sub_filename_base:

3799

self.to_screen('[info] Skipping writing video subtitles')

3800

return ret

3801

for sub_lang, sub_info in subtitles.items():

3802

sub_format = sub_info['ext']

3803

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3804

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3805

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3806

if existing_sub:

3807

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3808

sub_info['filepath'] = existing_sub

3809

ret.append((existing_sub, sub_filename_final))

3810

continue

3811

3812

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3813

if sub_info.get('data') is not None:

3814

try:

3815

# Use newline='' to prevent conversion of newline characters

3816

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3817

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3818

subfile.write(sub_info['data'])

3819

sub_info['filepath'] = sub_filename

3820

ret.append((sub_filename, sub_filename_final))

3821

continue

3822

except OSError:

3823

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3828

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3829

self.dl(sub_filename, sub_copy, subtitle=True)

3830

sub_info['filepath'] = sub_filename

3831

ret.append((sub_filename, sub_filename_final))

3832

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3833

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

3834

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

3835

if not self.params.get('ignoreerrors'):

3836

self.report_error(msg)

3837

raise DownloadError(msg)

3838

self.report_warning(msg)

3839

return ret

3840

3841

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3842

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3843

write_all = self.params.get('write_all_thumbnails', False)

3844

thumbnails, ret = [], []

3845

if write_all or self.params.get('writethumbnail', False):

3846

thumbnails = info_dict.get('thumbnails') or []

3847

multiple = write_all and len(thumbnails) > 1

3848

3849

if thumb_filename_base is None:

3850

thumb_filename_base = filename

3851

if thumbnails and not thumb_filename_base:

3852

self.write_debug(f'Skipping writing {label} thumbnail')

3853

return ret

3854

3855

for idx, t in list(enumerate(thumbnails))[::-1]:

3856

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3857

thumb_display_id = f'{label} thumbnail {t["id"]}'

3858

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3859

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3860

3861

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

3862

if existing_thumb:

3863

self.to_screen('[info] %s is already present' % (

3864

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3865

t['filepath'] = existing_thumb

3866

ret.append((existing_thumb, thumb_filename_final))

3867

else:

3868

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3869

try:

3870

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

3871

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3872

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3873

shutil.copyfileobj(uf, thumbf)

3874

ret.append((thumb_filename, thumb_filename_final))

3875

t['filepath'] = thumb_filename

3876

except network_exceptions as err:

3877

thumbnails.pop(idx)

3878

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3879

if ret and not write_all:

3880

break

3881

return ret