jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	import collections
	3	import contextlib
	4	import datetime
	5	import errno
	6	import fileinput
	7	import functools
	8	import io
	9	import itertools
	10	import json
	11	import locale
	12	import operator
	13	import os
	14	import platform
	15	import random
	16	import re
	17	import shutil
	18	import subprocess
	19	import sys
	20	import tempfile
	21	import time
	22	import tokenize
	23	import traceback
	24	import unicodedata
	25	import urllib.request
	26	from string import ascii_letters
	27
	28	from .cache import Cache
	29	from .compat import (
	30	compat_get_terminal_size,
	31	compat_os_name,
	32	compat_shlex_quote,
	33	compat_str,
	34	compat_urllib_error,
	35	compat_urllib_request,
	36	windows_enable_vt_mode,
	37	)
	38	from .cookies import load_cookies
	39	from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
	40	from .downloader.rtmp import rtmpdump_version
	41	from .extractor import _LAZY_LOADER
	42	from .extractor import _PLUGIN_CLASSES as plugin_extractors
	43	from .extractor import gen_extractor_classes, get_info_extractor
	44	from .extractor.openload import PhantomJSwrapper
	45	from .minicurses import format_text
	46	from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
	47	from .postprocessor import (
	48	EmbedThumbnailPP,
	49	FFmpegFixupDuplicateMoovPP,
	50	FFmpegFixupDurationPP,
	51	FFmpegFixupM3u8PP,
	52	FFmpegFixupM4aPP,
	53	FFmpegFixupStretchedPP,
	54	FFmpegFixupTimestampPP,
	55	FFmpegMergerPP,
	56	FFmpegPostProcessor,
	57	MoveFilesAfterDownloadPP,
	58	get_postprocessor,
	59	)
	60	from .update import detect_variant
	61	from .utils import (
	62	DEFAULT_OUTTMPL,
	63	LINK_TEMPLATES,
	64	NO_DEFAULT,
	65	OUTTMPL_TYPES,
	66	POSTPROCESS_WHEN,
	67	STR_FORMAT_RE_TMPL,
	68	STR_FORMAT_TYPES,
	69	ContentTooShortError,
	70	DateRange,
	71	DownloadCancelled,
	72	DownloadError,
	73	EntryNotInPlaylist,
	74	ExistingVideoReached,
	75	ExtractorError,
	76	GeoRestrictedError,
	77	HEADRequest,
	78	InAdvancePagedList,
	79	ISO3166Utils,
	80	LazyList,
	81	MaxDownloadsReached,
	82	Namespace,
	83	PagedList,
	84	PerRequestProxyHandler,
	85	Popen,
	86	PostProcessingError,
	87	ReExtractInfo,
	88	RejectedVideoReached,
	89	SameFileError,
	90	UnavailableVideoError,
	91	YoutubeDLCookieProcessor,
	92	YoutubeDLHandler,
	93	YoutubeDLRedirectHandler,
	94	age_restricted,
	95	args_to_str,
	96	date_from_str,
	97	determine_ext,
	98	determine_protocol,
	99	encode_compat_str,
	100	encodeFilename,
	101	error_to_compat_str,
	102	expand_path,
	103	filter_dict,
	104	float_or_none,
	105	format_bytes,
	106	format_decimal_suffix,
	107	format_field,
	108	formatSeconds,
	109	get_domain,
	110	int_or_none,
	111	iri_to_uri,
	112	join_nonempty,
	113	locked_file,
	114	make_dir,
	115	make_HTTPS_handler,
	116	merge_headers,
	117	network_exceptions,
	118	number_of_digits,
	119	orderedSet,
	120	parse_filesize,
	121	platform_name,
	122	preferredencoding,
	123	prepend_extension,
	124	register_socks_protocols,
	125	remove_terminal_sequences,
	126	render_table,
	127	replace_extension,
	128	sanitize_filename,
	129	sanitize_path,
	130	sanitize_url,
	131	sanitized_Request,
	132	std_headers,
	133	str_or_none,
	134	strftime_or_none,
	135	subtitles_filename,
	136	supports_terminal_sequences,
	137	timetuple_from_msec,
	138	to_high_limit_path,
	139	traverse_obj,
	140	try_get,
	141	url_basename,
	142	variadic,
	143	version_tuple,
	144	write_json_file,
	145	write_string,
	146	)
	147	from .version import RELEASE_GIT_HEAD, __version__
	148
	149	if compat_os_name == 'nt':
	150	import ctypes
	151
	152
	153	class YoutubeDL:
	154	"""YoutubeDL class.
	155
	156	YoutubeDL objects are the ones responsible of downloading the
	157	actual video file and writing it to disk if the user has requested
	158	it, among some other tasks. In most cases there should be one per
	159	program. As, given a video URL, the downloader doesn't know how to
	160	extract all the needed information, task that InfoExtractors do, it
	161	has to pass the URL to one of them.
	162
	163	For this, YoutubeDL objects have a method that allows
	164	InfoExtractors to be registered in a given order. When it is passed
	165	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	166	finds that reports being able to handle it. The InfoExtractor extracts
	167	all the information about the video or videos the URL refers to, and
	168	YoutubeDL process the extracted information, possibly using a File
	169	Downloader to download the video.
	170
	171	YoutubeDL objects accept a lot of parameters. In order not to saturate
	172	the object constructor with arguments, it receives a dictionary of
	173	options instead. These options are available through the params
	174	attribute for the InfoExtractors to use. The YoutubeDL also
	175	registers itself as the downloader in charge for the InfoExtractors
	176	that are added to it, so this is a "mutual registration".
	177
	178	Available options:
	179
	180	username: Username for authentication purposes.
	181	password: Password for authentication purposes.
	182	videopassword: Password for accessing a video.
	183	ap_mso: Adobe Pass multiple-system operator identifier.
	184	ap_username: Multiple-system operator account username.
	185	ap_password: Multiple-system operator account password.
	186	usenetrc: Use netrc for authentication instead.
	187	verbose: Print additional info to stdout.
	188	quiet: Do not print messages to stdout.
	189	no_warnings: Do not print out anything for warnings.
	190	forceprint: A dict with keys WHEN mapped to a list of templates to
	191	print to stdout. The allowed keys are video or any of the
	192	items in utils.POSTPROCESS_WHEN.
	193	For compatibility, a single list is also accepted
	194	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	195	a list of tuples with (template, filename)
	196	forceurl: Force printing final URL. (Deprecated)
	197	forcetitle: Force printing title. (Deprecated)
	198	forceid: Force printing ID. (Deprecated)
	199	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	200	forcedescription: Force printing description. (Deprecated)
	201	forcefilename: Force printing final filename. (Deprecated)
	202	forceduration: Force printing duration. (Deprecated)
	203	forcejson: Force printing info_dict as JSON.
	204	dump_single_json: Force printing the info_dict of the whole playlist
	205	(or video) as a single JSON line.
	206	force_write_download_archive: Force writing download archive regardless
	207	of 'skip_download' or 'simulate'.
	208	simulate: Do not download the video files. If unset (or None),
	209	simulate only if listsubtitles, listformats or list_thumbnails is used
	210	format: Video format code. see "FORMAT SELECTION" for more details.
	211	You can also pass a function. The function takes 'ctx' as
	212	argument and returns the formats to download.
	213	See "build_format_selector" for an implementation
	214	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	215	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	216	extracting metadata even if the video is not actually
	217	available for download (experimental)
	218	format_sort: A list of fields by which to sort the video formats.
	219	See "Sorting Formats" for more details.
	220	format_sort_force: Force the given format_sort. see "Sorting Formats"
	221	for more details.
	222	prefer_free_formats: Whether to prefer video formats with free containers
	223	over non-free ones of same quality.
	224	allow_multiple_video_streams: Allow multiple video streams to be merged
	225	into a single file
	226	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	227	into a single file
	228	check_formats Whether to test if the formats are downloadable.
	229	Can be True (check all), False (check none),
	230	'selected' (check selected formats),
	231	or None (check only if requested by extractor)
	232	paths: Dictionary of output paths. The allowed keys are 'home'
	233	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	234	outtmpl: Dictionary of templates for output names. Allowed keys
	235	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	236	For compatibility with youtube-dl, a single string can also be used
	237	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	238	restrictfilenames: Do not allow "&" and spaces in file names
	239	trim_file_name: Limit length of filename (extension excluded)
	240	windowsfilenames: Force the filenames to be windows compatible
	241	ignoreerrors: Do not stop on download/postprocessing errors.
	242	Can be 'only_download' to ignore only download errors.
	243	Default is 'only_download' for CLI, but False for API
	244	skip_playlist_after_errors: Number of allowed failures until the rest of
	245	the playlist is skipped
	246	force_generic_extractor: Force downloader to use the generic extractor
	247	overwrites: Overwrite all video and metadata files if True,
	248	overwrite only non-video files if None
	249	and don't overwrite any file if False
	250	For compatibility with youtube-dl,
	251	"nooverwrites" may also be used instead
	252	playliststart: Playlist item to start at.
	253	playlistend: Playlist item to end at.
	254	playlist_items: Specific indices of playlist to download.
	255	playlistreverse: Download playlist items in reverse order.
	256	playlistrandom: Download playlist items in random order.
	257	matchtitle: Download only matching titles.
	258	rejecttitle: Reject downloads for matching titles.
	259	logger: Log messages to a logging.Logger instance.
	260	logtostderr: Log messages to stderr instead of stdout.
	261	consoletitle: Display progress in console window's titlebar.
	262	writedescription: Write the video description to a .description file
	263	writeinfojson: Write the video description to a .info.json file
	264	clean_infojson: Remove private fields from the infojson
	265	getcomments: Extract video comments. This will not be written to disk
	266	unless writeinfojson is also given
	267	writeannotations: Write the video annotations to a .annotations.xml file
	268	writethumbnail: Write the thumbnail image to a file
	269	allow_playlist_files: Whether to write playlists' description, infojson etc
	270	also to disk when using the 'write*' options
	271	write_all_thumbnails: Write all thumbnail formats to files
	272	writelink: Write an internet shortcut file, depending on the
	273	current platform (.url/.webloc/.desktop)
	274	writeurllink: Write a Windows internet shortcut file (.url)
	275	writewebloclink: Write a macOS internet shortcut file (.webloc)
	276	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	277	writesubtitles: Write the video subtitles to a file
	278	writeautomaticsub: Write the automatically generated subtitles to a file
	279	allsubtitles: Deprecated - Use subtitleslangs = ['all']
	280	Downloads all the subtitles of the video
	281	(requires writesubtitles or writeautomaticsub)
	282	listsubtitles: Lists all available subtitles for the video
	283	subtitlesformat: The format code for subtitles
	284	subtitleslangs: List of languages of the subtitles to download (can be regex).
	285	The list may contain "all" to refer to all the available
	286	subtitles. The language can be prefixed with a "-" to
	287	exclude it from the requested languages. Eg: ['all', '-live_chat']
	288	keepvideo: Keep the video file after post-processing
	289	daterange: A DateRange object, download only if the upload_date is in the range.
	290	skip_download: Skip the actual download of the video file
	291	cachedir: Location of the cache files in the filesystem.
	292	False to disable filesystem cache.
	293	noplaylist: Download single video instead of a playlist if in doubt.
	294	age_limit: An integer representing the user's age in years.
	295	Unsuitable videos for the given age are skipped.
	296	min_views: An integer representing the minimum view count the video
	297	must have in order to not be skipped.
	298	Videos without view count information are always
	299	downloaded. None for no limit.
	300	max_views: An integer representing the maximum view count.
	301	Videos that are more popular than that are not
	302	downloaded.
	303	Videos without view count information are always
	304	downloaded. None for no limit.
	305	download_archive: File name of a file where all downloads are recorded.
	306	Videos already present in the file are not downloaded
	307	again.
	308	break_on_existing: Stop the download process after attempting to download a
	309	file that is in the archive.
	310	break_on_reject: Stop the download process when encountering a video that
	311	has been filtered out.
	312	break_per_url: Whether break_on_reject and break_on_existing
	313	should act on each input URL as opposed to for the entire queue
	314	cookiefile: File name where cookies should be read from and dumped to
	315	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	316	name/pathfrom where cookies are loaded, and the name of the
	317	keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
	318	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	319	support RFC 5746 secure renegotiation
	320	nocheckcertificate: Do not verify SSL certificates
	321	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	322	At the moment, this is only supported by YouTube.
	323	http_headers: A dictionary of custom headers to be used for all requests
	324	proxy: URL of the proxy server to use
	325	geo_verification_proxy: URL of the proxy to use for IP address verification
	326	on geo-restricted sites.
	327	socket_timeout: Time to wait for unresponsive hosts, in seconds
	328	bidi_workaround: Work around buggy terminals without bidirectional text
	329	support, using fridibi
	330	debug_printtraffic:Print out sent and received HTTP traffic
	331	include_ads: Download ads as well (deprecated)
	332	default_search: Prepend this string if an input url is not valid.
	333	'auto' for elaborate guessing
	334	encoding: Use this encoding instead of the system-specified.
	335	extract_flat: Do not resolve URLs, return the immediate result.
	336	Pass in 'in_playlist' to only show this behavior for
	337	playlist items.
	338	wait_for_video: If given, wait for scheduled streams to become available.
	339	The value should be a tuple containing the range
	340	(min_secs, max_secs) to wait between retries
	341	postprocessors: A list of dictionaries, each with an entry
	342	* key: The name of the postprocessor. See
	343	yt_dlp/postprocessor/__init__.py for a list.
	344	* when: When to run the postprocessor. Allowed values are
	345	the entries of utils.POSTPROCESS_WHEN
	346	Assumed to be 'post_process' if not given
	347	post_hooks: Deprecated - Register a custom postprocessor instead
	348	A list of functions that get called as the final step
	349	for each video file, after all postprocessors have been
	350	called. The filename will be passed as the only argument.
	351	progress_hooks: A list of functions that get called on download
	352	progress, with a dictionary with the entries
	353	* status: One of "downloading", "error", or "finished".
	354	Check this first and ignore unknown values.
	355	* info_dict: The extracted info_dict
	356
	357	If status is one of "downloading", or "finished", the
	358	following properties may also be present:
	359	* filename: The final filename (always present)
	360	* tmpfilename: The filename we're currently writing to
	361	* downloaded_bytes: Bytes on disk
	362	* total_bytes: Size of the whole file, None if unknown
	363	* total_bytes_estimate: Guess of the eventual file size,
	364	None if unavailable.
	365	* elapsed: The number of seconds since download started.
	366	* eta: The estimated time in seconds, None if unknown
	367	* speed: The download speed in bytes/second, None if
	368	unknown
	369	* fragment_index: The counter of the currently
	370	downloaded video fragment.
	371	* fragment_count: The number of fragments (= individual
	372	files that will be merged)
	373
	374	Progress hooks are guaranteed to be called at least once
	375	(with status "finished") if the download is successful.
	376	postprocessor_hooks: A list of functions that get called on postprocessing
	377	progress, with a dictionary with the entries
	378	* status: One of "started", "processing", or "finished".
	379	Check this first and ignore unknown values.
	380	* postprocessor: Name of the postprocessor
	381	* info_dict: The extracted info_dict
	382
	383	Progress hooks are guaranteed to be called at least twice
	384	(with status "started" and "finished") if the processing is successful.
	385	merge_output_format: Extension to use when merging formats.
	386	final_ext: Expected final extension; used to detect when the file was
	387	already downloaded and converted
	388	fixup: Automatically correct known faults of the file.
	389	One of:
	390	- "never": do nothing
	391	- "warn": only emit a warning
	392	- "detect_or_warn": check whether we can do anything
	393	about it, warn otherwise (default)
	394	source_address: Client-side IP address to bind to.
	395	call_home: Boolean, true iff we are allowed to contact the
	396	yt-dlp servers for debugging. (BROKEN)
	397	sleep_interval_requests: Number of seconds to sleep between requests
	398	during extraction
	399	sleep_interval: Number of seconds to sleep before each download when
	400	used alone or a lower bound of a range for randomized
	401	sleep before each download (minimum possible number
	402	of seconds to sleep) when used along with
	403	max_sleep_interval.
	404	max_sleep_interval:Upper bound of a range for randomized sleep before each
	405	download (maximum possible number of seconds to sleep).
	406	Must only be used along with sleep_interval.
	407	Actual sleep time will be a random float from range
	408	[sleep_interval; max_sleep_interval].
	409	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	410	listformats: Print an overview of available video formats and exit.
	411	list_thumbnails: Print a table of all thumbnails and exit.
	412	match_filter: A function that gets called with the info_dict of
	413	every video.
	414	If it returns a message, the video is ignored.
	415	If it returns None, the video is downloaded.
	416	If it returns utils.NO_DEFAULT, the user is interactively
	417	asked whether to download the video.
	418	match_filter_func in utils.py is one example for this.
	419	no_color: Do not emit color codes in output.
	420	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	421	HTTP header
	422	geo_bypass_country:
	423	Two-letter ISO 3166-2 country code that will be used for
	424	explicit geographic restriction bypassing via faking
	425	X-Forwarded-For HTTP header
	426	geo_bypass_ip_block:
	427	IP range in CIDR notation that will be used similarly to
	428	geo_bypass_country
	429
	430	The following options determine which downloader is picked:
	431	external_downloader: A dictionary of protocol keys and the executable of the
	432	external downloader to use for it. The allowed protocols
	433	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	434	Set the value to 'native' to use the native downloader
	435	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	436	or {'m3u8': 'ffmpeg'} instead.
	437	Use the native HLS downloader instead of ffmpeg/avconv
	438	if True, otherwise use ffmpeg/avconv if False, otherwise
	439	use downloader suggested by extractor if None.
	440	compat_opts: Compatibility options. See "Differences in default behavior".
	441	The following options do not work when used through the API:
	442	filename, abort-on-error, multistreams, no-live-chat, format-sort
	443	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	444	Refer __init__.py for their implementation
	445	progress_template: Dictionary of templates for progress outputs.
	446	Allowed keys are 'download', 'postprocess',
	447	'download-title' (console title) and 'postprocess-title'.
	448	The template is mapped on a dictionary with keys 'progress' and 'info'
	449
	450	The following parameters are not used by YoutubeDL itself, they are used by
	451	the downloader (see yt_dlp/downloader/common.py):
	452	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	453	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	454	continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	455	external_downloader_args, concurrent_fragment_downloads.
	456
	457	The following options are used by the post processors:
	458	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	459	otherwise prefer ffmpeg. (avconv support is deprecated)
	460	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	461	to the binary or its containing directory.
	462	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	463	and a list of additional command-line arguments for the
	464	postprocessor/executable. The dict can also have "PP+EXE" keys
	465	which are used when the given exe is used by the given PP.
	466	Use 'default' as the name for arguments to passed to all PP
	467	For compatibility with youtube-dl, a single list of args
	468	can also be used
	469
	470	The following options are used by the extractors:
	471	extractor_retries: Number of times to retry for known errors
	472	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	473	hls_split_discontinuity: Split HLS playlists to different formats at
	474	discontinuities such as ad breaks (default: False)
	475	extractor_args: A dictionary of arguments to be passed to the extractors.
	476	See "EXTRACTOR ARGUMENTS" for details.
	477	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	478	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	479	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	480	If True (default), DASH manifests and related
	481	data will be downloaded and processed by extractor.
	482	You can reduce network I/O by disabling it if you don't
	483	care about DASH. (only for youtube)
	484	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	485	If True (default), HLS manifests and related
	486	data will be downloaded and processed by extractor.
	487	You can reduce network I/O by disabling it if you don't
	488	care about HLS. (only for youtube)
	489	"""
	490
	491	_NUMERIC_FIELDS = {
	492	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	493	'timestamp', 'release_timestamp',
	494	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	495	'average_rating', 'comment_count', 'age_limit',
	496	'start_time', 'end_time',
	497	'chapter_number', 'season_number', 'episode_number',
	498	'track_number', 'disc_number', 'release_year',
	499	}
	500

1

#!/usr/bin/env python3

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import platform

import random

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import unicodedata

import urllib.request

26

from string import ascii_letters

27

28

from .cache import Cache

29

from .compat import (

30

compat_get_terminal_size,

compat_os_name,

compat_shlex_quote,

compat_str,

compat_urllib_error,

compat_urllib_request,

36

windows_enable_vt_mode,

37

)

38

from .cookies import load_cookies

39

from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name

40

from .downloader.rtmp import rtmpdump_version

41

from .extractor import _LAZY_LOADER

42

from .extractor import _PLUGIN_CLASSES as plugin_extractors

43

from .extractor import gen_extractor_classes, get_info_extractor

44

from .extractor.openload import PhantomJSwrapper

45

from .minicurses import format_text

46

from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors

47

from .postprocessor import (

48

EmbedThumbnailPP,

49

FFmpegFixupDuplicateMoovPP,

50

FFmpegFixupDurationPP,

51

FFmpegFixupM3u8PP,

52

FFmpegFixupM4aPP,

53

FFmpegFixupStretchedPP,

54

FFmpegFixupTimestampPP,

55

FFmpegMergerPP,

56

FFmpegPostProcessor,

57

MoveFilesAfterDownloadPP,

58

get_postprocessor,

59

)

60

from .update import detect_variant

from .utils import (

DEFAULT_OUTTMPL,

LINK_TEMPLATES,

NO_DEFAULT,

OUTTMPL_TYPES,

POSTPROCESS_WHEN,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

ContentTooShortError,

DateRange,

DownloadCancelled,

DownloadError,

EntryNotInPlaylist,

ExistingVideoReached,

ExtractorError,

GeoRestrictedError,

HEADRequest,

InAdvancePagedList,

ISO3166Utils,

LazyList,

MaxDownloadsReached,

Namespace,

PagedList,

PerRequestProxyHandler,

Popen,

PostProcessingError,

ReExtractInfo,

RejectedVideoReached,

89

SameFileError,

90

UnavailableVideoError,

91

YoutubeDLCookieProcessor,

92

YoutubeDLHandler,

93

YoutubeDLRedirectHandler,

age_restricted,

args_to_str,

date_from_str,

determine_ext,

determine_protocol,

encode_compat_str,

encodeFilename,

error_to_compat_str,

expand_path,

filter_dict,

float_or_none,

format_bytes,

format_decimal_suffix,

format_field,

formatSeconds,

get_domain,

int_or_none,

iri_to_uri,

join_nonempty,

locked_file,

make_dir,

make_HTTPS_handler,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

parse_filesize,

platform_name,

preferredencoding,

prepend_extension,

register_socks_protocols,

125

remove_terminal_sequences,

render_table,

replace_extension,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_get,

url_basename,

variadic,

version_tuple,

write_json_file,

write_string,

)

from .version import RELEASE_GIT_HEAD, __version__

148

149

if compat_os_name == 'nt':

import ctypes

class YoutubeDL:

"""YoutubeDL class.

YoutubeDL objects are the ones responsible of downloading the

157

actual video file and writing it to disk if the user has requested

158

it, among some other tasks. In most cases there should be one per

159

program. As, given a video URL, the downloader doesn't know how to

160

extract all the needed information, task that InfoExtractors do, it

161

has to pass the URL to one of them.

162

163

For this, YoutubeDL objects have a method that allows

164

InfoExtractors to be registered in a given order. When it is passed

165

a URL, the YoutubeDL object handles it to the first InfoExtractor it

166

finds that reports being able to handle it. The InfoExtractor extracts

167

all the information about the video or videos the URL refers to, and

168

YoutubeDL process the extracted information, possibly using a File

169

Downloader to download the video.

170

171

YoutubeDL objects accept a lot of parameters. In order not to saturate

172

the object constructor with arguments, it receives a dictionary of

173

options instead. These options are available through the params

174

attribute for the InfoExtractors to use. The YoutubeDL also

175

registers itself as the downloader in charge for the InfoExtractors

176

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

181

password: Password for authentication purposes.

182

videopassword: Password for accessing a video.

183

ap_mso: Adobe Pass multiple-system operator identifier.

184

ap_username: Multiple-system operator account username.

185

ap_password: Multiple-system operator account password.

186

usenetrc: Use netrc for authentication instead.

187

verbose: Print additional info to stdout.

188

quiet: Do not print messages to stdout.

189

no_warnings: Do not print out anything for warnings.

190

forceprint: A dict with keys WHEN mapped to a list of templates to

191

print to stdout. The allowed keys are video or any of the

192

items in utils.POSTPROCESS_WHEN.

193

For compatibility, a single list is also accepted

194

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

195

a list of tuples with (template, filename)

196

forceurl: Force printing final URL. (Deprecated)

197

forcetitle: Force printing title. (Deprecated)

198

forceid: Force printing ID. (Deprecated)

199

forcethumbnail: Force printing thumbnail URL. (Deprecated)

200

forcedescription: Force printing description. (Deprecated)

201

forcefilename: Force printing final filename. (Deprecated)

202

forceduration: Force printing duration. (Deprecated)

203

forcejson: Force printing info_dict as JSON.

204

dump_single_json: Force printing the info_dict of the whole playlist

205

(or video) as a single JSON line.

206

force_write_download_archive: Force writing download archive regardless

207

of 'skip_download' or 'simulate'.

208

simulate: Do not download the video files. If unset (or None),

209

simulate only if listsubtitles, listformats or list_thumbnails is used

210

format: Video format code. see "FORMAT SELECTION" for more details.

211

You can also pass a function. The function takes 'ctx' as

212

argument and returns the formats to download.

213

See "build_format_selector" for an implementation

214

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

215

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

216

extracting metadata even if the video is not actually

217

available for download (experimental)

218

format_sort: A list of fields by which to sort the video formats.

219

See "Sorting Formats" for more details.

220

format_sort_force: Force the given format_sort. see "Sorting Formats"

221

for more details.

222

prefer_free_formats: Whether to prefer video formats with free containers

223

over non-free ones of same quality.

224

allow_multiple_video_streams: Allow multiple video streams to be merged

225

into a single file

226

allow_multiple_audio_streams: Allow multiple audio streams to be merged

227

into a single file

228

check_formats Whether to test if the formats are downloadable.

229

Can be True (check all), False (check none),

230

'selected' (check selected formats),

231

or None (check only if requested by extractor)

232

paths: Dictionary of output paths. The allowed keys are 'home'

233

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

234

outtmpl: Dictionary of templates for output names. Allowed keys

235

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

236

For compatibility with youtube-dl, a single string can also be used

237

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

238

restrictfilenames: Do not allow "&" and spaces in file names

239

trim_file_name: Limit length of filename (extension excluded)

240

windowsfilenames: Force the filenames to be windows compatible

241

ignoreerrors: Do not stop on download/postprocessing errors.

242

Can be 'only_download' to ignore only download errors.

243

Default is 'only_download' for CLI, but False for API

244

skip_playlist_after_errors: Number of allowed failures until the rest of

245

the playlist is skipped

246

force_generic_extractor: Force downloader to use the generic extractor

247

overwrites: Overwrite all video and metadata files if True,

248

overwrite only non-video files if None

249

and don't overwrite any file if False

250

For compatibility with youtube-dl,

251

"nooverwrites" may also be used instead

252

playliststart: Playlist item to start at.

253

playlistend: Playlist item to end at.

254

playlist_items: Specific indices of playlist to download.

255

playlistreverse: Download playlist items in reverse order.

256

playlistrandom: Download playlist items in random order.

257

matchtitle: Download only matching titles.

258

rejecttitle: Reject downloads for matching titles.

259

logger: Log messages to a logging.Logger instance.

260

logtostderr: Log messages to stderr instead of stdout.

261

consoletitle: Display progress in console window's titlebar.

262

writedescription: Write the video description to a .description file

263

writeinfojson: Write the video description to a .info.json file

264

clean_infojson: Remove private fields from the infojson

265

getcomments: Extract video comments. This will not be written to disk

266

unless writeinfojson is also given

267

writeannotations: Write the video annotations to a .annotations.xml file

268

writethumbnail: Write the thumbnail image to a file

269

allow_playlist_files: Whether to write playlists' description, infojson etc

270

also to disk when using the 'write*' options

271

write_all_thumbnails: Write all thumbnail formats to files

272

writelink: Write an internet shortcut file, depending on the

273

current platform (.url/.webloc/.desktop)

274

writeurllink: Write a Windows internet shortcut file (.url)

275

writewebloclink: Write a macOS internet shortcut file (.webloc)

276

writedesktoplink: Write a Linux internet shortcut file (.desktop)

277

writesubtitles: Write the video subtitles to a file

278

writeautomaticsub: Write the automatically generated subtitles to a file

279

allsubtitles: Deprecated - Use subtitleslangs = ['all']

280

Downloads all the subtitles of the video

281

(requires writesubtitles or writeautomaticsub)

282

listsubtitles: Lists all available subtitles for the video

283

subtitlesformat: The format code for subtitles

284

subtitleslangs: List of languages of the subtitles to download (can be regex).

285

The list may contain "all" to refer to all the available

286

subtitles. The language can be prefixed with a "-" to

287

exclude it from the requested languages. Eg: ['all', '-live_chat']

288

keepvideo: Keep the video file after post-processing

289

daterange: A DateRange object, download only if the upload_date is in the range.

290

skip_download: Skip the actual download of the video file

291

cachedir: Location of the cache files in the filesystem.

292

False to disable filesystem cache.

293

noplaylist: Download single video instead of a playlist if in doubt.

294

age_limit: An integer representing the user's age in years.

295

Unsuitable videos for the given age are skipped.

296

min_views: An integer representing the minimum view count the video

297

must have in order to not be skipped.

298

Videos without view count information are always

299

downloaded. None for no limit.

300

max_views: An integer representing the maximum view count.

301

Videos that are more popular than that are not

302

downloaded.

303

Videos without view count information are always

304

downloaded. None for no limit.

305

download_archive: File name of a file where all downloads are recorded.

306

Videos already present in the file are not downloaded

307

again.

308

break_on_existing: Stop the download process after attempting to download a

309

file that is in the archive.

310

break_on_reject: Stop the download process when encountering a video that

311

has been filtered out.

312

break_per_url: Whether break_on_reject and break_on_existing

313

should act on each input URL as opposed to for the entire queue

314

cookiefile: File name where cookies should be read from and dumped to

315

cookiesfrombrowser: A tuple containing the name of the browser, the profile

316

name/pathfrom where cookies are loaded, and the name of the

317

keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')

318

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

319

support RFC 5746 secure renegotiation

320

nocheckcertificate: Do not verify SSL certificates

321

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

322

At the moment, this is only supported by YouTube.

323

http_headers: A dictionary of custom headers to be used for all requests

324

proxy: URL of the proxy server to use

325

geo_verification_proxy: URL of the proxy to use for IP address verification

326

on geo-restricted sites.

327

socket_timeout: Time to wait for unresponsive hosts, in seconds

328

bidi_workaround: Work around buggy terminals without bidirectional text

329

support, using fridibi

330

debug_printtraffic:Print out sent and received HTTP traffic

331

include_ads: Download ads as well (deprecated)

332

default_search: Prepend this string if an input url is not valid.

333

'auto' for elaborate guessing

334

encoding: Use this encoding instead of the system-specified.

335

extract_flat: Do not resolve URLs, return the immediate result.

336

Pass in 'in_playlist' to only show this behavior for

337

playlist items.

338

wait_for_video: If given, wait for scheduled streams to become available.

339

The value should be a tuple containing the range

340

(min_secs, max_secs) to wait between retries

341

postprocessors: A list of dictionaries, each with an entry

342

* key: The name of the postprocessor. See

343

yt_dlp/postprocessor/__init__.py for a list.

344

* when: When to run the postprocessor. Allowed values are

345

the entries of utils.POSTPROCESS_WHEN

346

Assumed to be 'post_process' if not given

347

post_hooks: Deprecated - Register a custom postprocessor instead

348

A list of functions that get called as the final step

349

for each video file, after all postprocessors have been

350

called. The filename will be passed as the only argument.

351

progress_hooks: A list of functions that get called on download

352

progress, with a dictionary with the entries

353

* status: One of "downloading", "error", or "finished".

354

Check this first and ignore unknown values.

355

* info_dict: The extracted info_dict

356

357

If status is one of "downloading", or "finished", the

358

following properties may also be present:

359

* filename: The final filename (always present)

360

* tmpfilename: The filename we're currently writing to

361

* downloaded_bytes: Bytes on disk

362

* total_bytes: Size of the whole file, None if unknown

363

* total_bytes_estimate: Guess of the eventual file size,

364

None if unavailable.

365

* elapsed: The number of seconds since download started.

366

* eta: The estimated time in seconds, None if unknown

367

* speed: The download speed in bytes/second, None if

368

unknown

369

* fragment_index: The counter of the currently

370

downloaded video fragment.

371

* fragment_count: The number of fragments (= individual

372

files that will be merged)

373

374

Progress hooks are guaranteed to be called at least once

375

(with status "finished") if the download is successful.

376

postprocessor_hooks: A list of functions that get called on postprocessing

377

progress, with a dictionary with the entries

378

* status: One of "started", "processing", or "finished".

379

Check this first and ignore unknown values.

380

* postprocessor: Name of the postprocessor

381

* info_dict: The extracted info_dict

382

383

Progress hooks are guaranteed to be called at least twice

384

(with status "started" and "finished") if the processing is successful.

385

merge_output_format: Extension to use when merging formats.

386

final_ext: Expected final extension; used to detect when the file was

387

already downloaded and converted

388

fixup: Automatically correct known faults of the file.

389

One of:

390

- "never": do nothing

391

- "warn": only emit a warning

392

- "detect_or_warn": check whether we can do anything

393

about it, warn otherwise (default)

394

source_address: Client-side IP address to bind to.

395

call_home: Boolean, true iff we are allowed to contact the

396

yt-dlp servers for debugging. (BROKEN)

397

sleep_interval_requests: Number of seconds to sleep between requests

398

during extraction

399

sleep_interval: Number of seconds to sleep before each download when

400

used alone or a lower bound of a range for randomized

401

sleep before each download (minimum possible number

402

of seconds to sleep) when used along with

403

max_sleep_interval.

404

max_sleep_interval:Upper bound of a range for randomized sleep before each

405

download (maximum possible number of seconds to sleep).

406

Must only be used along with sleep_interval.

407

Actual sleep time will be a random float from range

408

[sleep_interval; max_sleep_interval].

409

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

410

listformats: Print an overview of available video formats and exit.

411

list_thumbnails: Print a table of all thumbnails and exit.

412

match_filter: A function that gets called with the info_dict of

413

every video.

414

If it returns a message, the video is ignored.

415

If it returns None, the video is downloaded.

416

If it returns utils.NO_DEFAULT, the user is interactively

417

asked whether to download the video.

418

match_filter_func in utils.py is one example for this.

419

no_color: Do not emit color codes in output.

420

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

421

HTTP header

422

geo_bypass_country:

423

Two-letter ISO 3166-2 country code that will be used for

424

explicit geographic restriction bypassing via faking

425

X-Forwarded-For HTTP header

426

geo_bypass_ip_block:

427

IP range in CIDR notation that will be used similarly to

428

geo_bypass_country

429

430

The following options determine which downloader is picked:

431

external_downloader: A dictionary of protocol keys and the executable of the

432

external downloader to use for it. The allowed protocols

433

434

Set the value to 'native' to use the native downloader

435

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

436

or {'m3u8': 'ffmpeg'} instead.

437

Use the native HLS downloader instead of ffmpeg/avconv

438

if True, otherwise use ffmpeg/avconv if False, otherwise

439

use downloader suggested by extractor if None.

440

compat_opts: Compatibility options. See "Differences in default behavior".

441

The following options do not work when used through the API:

442

filename, abort-on-error, multistreams, no-live-chat, format-sort

443

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

444

Refer __init__.py for their implementation

445

progress_template: Dictionary of templates for progress outputs.

446

Allowed keys are 'download', 'postprocess',

447

'download-title' (console title) and 'postprocess-title'.

448

The template is mapped on a dictionary with keys 'progress' and 'info'

449

450

The following parameters are not used by YoutubeDL itself, they are used by

451

the downloader (see yt_dlp/downloader/common.py):

452

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

453

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

454

continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

455

external_downloader_args, concurrent_fragment_downloads.

456

457

The following options are used by the post processors:

458

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

459

otherwise prefer ffmpeg. (avconv support is deprecated)

460

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

461

to the binary or its containing directory.

462

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

463

and a list of additional command-line arguments for the

464

postprocessor/executable. The dict can also have "PP+EXE" keys

465

which are used when the given exe is used by the given PP.

466

Use 'default' as the name for arguments to passed to all PP

467

For compatibility with youtube-dl, a single list of args

468

can also be used

469

470

The following options are used by the extractors:

471

extractor_retries: Number of times to retry for known errors

472

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

473

hls_split_discontinuity: Split HLS playlists to different formats at

474

discontinuities such as ad breaks (default: False)

475

extractor_args: A dictionary of arguments to be passed to the extractors.

476

See "EXTRACTOR ARGUMENTS" for details.

477

Eg: {'youtube': {'skip': ['dash', 'hls']}}

478

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

479

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

480

If True (default), DASH manifests and related

481

data will be downloaded and processed by extractor.

482

You can reduce network I/O by disabling it if you don't

483

care about DASH. (only for youtube)

484

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

485

If True (default), HLS manifests and related

486

data will be downloaded and processed by extractor.

487

You can reduce network I/O by disabling it if you don't

488

care about HLS. (only for youtube)

"""

_NUMERIC_FIELDS = {

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

493

'timestamp', 'release_timestamp',

494

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

495

'average_rating', 'comment_count', 'age_limit',

496

'start_time', 'end_time',

497

'chapter_number', 'season_number', 'episode_number',

498

'track_number', 'disc_number', 'release_year',

}

_format_fields = {

# NB: Keep in sync with the docstring of extractor/common.py

503

'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',

504

'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',

505

'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',

506

'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',

507

'preference', 'language', 'language_preference', 'quality', 'source_preference',

508

'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',

509

'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'

510

}

511

_format_selection_exts = {

512

'audio': {'m4a', 'mp3', 'ogg', 'aac'},

513

'video': {'mp4', 'flv', 'webm', '3gp'},

514

'storyboards': {'mhtml'},

515

}

516

517

def __init__(self, params=None, auto_init=True):

518

"""Create a FileDownloader object with the given options.

519

@param auto_init Whether to load the default extractors and print header (if verbose).

520

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self.params = params

self._ies = {}

self._ies_instances = {}

527

self._pps = {k: [] for k in POSTPROCESS_WHEN}

528

self._printed_messages = set()

529

self._first_webpage_request = True

530

self._post_hooks = []

531

self._progress_hooks = []

532

self._postprocessor_hooks = []

533

self._download_retcode = 0

534

self._num_downloads = 0

535

self._num_videos = 0

536

self._playlist_level = 0

537

self._playlist_urls = set()

538

self.cache = Cache(self)

539

540

windows_enable_vt_mode()

541

self._out_files = {

542

'error': sys.stderr,

543

'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,

544

'console': None if compat_os_name == 'nt' else next(

545

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

546

}

547

self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']

548

self._allow_colors = {

549

type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])

550

for type_ in ('screen', 'error')

551

}

552

553

if sys.version_info < (3, 6):

554

self.report_warning(

555

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

556

557

if self.params.get('allow_unplayable_formats'):

558

self.report_warning(

559

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

560

'This is a developer option intended for debugging. \n'

561

' If you experience any issues while using this option, '

562

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

563

564

def check_deprecated(param, option, suggestion):

565

if self.params.get(param) is not None:

566

self.report_warning(f'{option} is deprecated. Use {suggestion} instead')

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

571

if self.params.get('geo_verification_proxy') is None:

572

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

573

574

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

575

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

576

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

577

578

for msg in self.params.get('_warnings', []):

579

self.report_warning(msg)

580

for msg in self.params.get('_deprecation_warnings', []):

581

self.deprecation_warning(msg)

582

583

if 'list-formats' in self.params.get('compat_opts', []):

584

self.params['listformats_table'] = False

585

586

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

587

# nooverwrites was unnecessarily changed to overwrites

588

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

589

# This ensures compatibility with both keys

590

self.params['overwrites'] = not self.params['nooverwrites']

591

elif self.params.get('overwrites') is None:

592

self.params.pop('overwrites', None)

593

else:

594

self.params['nooverwrites'] = not self.params['overwrites']

595

596

self.params.setdefault('forceprint', {})

597

self.params.setdefault('print_to_file', {})

598

599

# Compatibility with older syntax

600

if not isinstance(params['forceprint'], dict):

601

self.params['forceprint'] = {'video': params['forceprint']}

602

603

if self.params.get('bidi_workaround', False):

604

try:

605

import pty

606

master, slave = pty.openpty()

607

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

612

sp_kwargs = dict(

613

stdin=subprocess.PIPE,

614

stdout=slave,

615

stderr=self._out_files['error'])

616

try:

617

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

618

except OSError:

619

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

620

self._output_channel = os.fdopen(master, 'rb')

621

except OSError as ose:

622

if ose.errno == errno.ENOENT:

623

self.report_warning(

624

'Could not find fribidi executable, ignoring --bidi-workaround. '

625

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if auto_init:

if auto_init != 'no_verbose_header':

631

self.print_debug_header()

632

self.add_default_info_extractors()

633

634

if (sys.platform != 'win32'

635

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

636

and not self.params.get('restrictfilenames', False)):

637

# Unicode filesystem API will throw errors (#1474, #13027)

638

self.report_warning(

639

'Assuming --restrict-filenames since file system encoding '

640

'cannot encode all characters. '

641

'Set the LC_ALL environment variable to fix this.')

642

self.params['restrictfilenames'] = True

643

644

self.outtmpl_dict = self.parse_outtmpl()

645

646

# Creating format selector here allows us to catch syntax errors before the extraction

647

self.format_selector = (

648

self.params.get('format') if self.params.get('format') in (None, '-')

649

else self.params['format'] if callable(self.params['format'])

650

else self.build_format_selector(self.params['format']))

651

652

# Set http_headers defaults according to std_headers

653

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

654

655

hooks = {

656

'post_hooks': self.add_post_hook,

657

'progress_hooks': self.add_progress_hook,

658

'postprocessor_hooks': self.add_postprocessor_hook,

659

}

660

for opt, fn in hooks.items():

661

for ph in self.params.get(opt, []):

662

fn(ph)

663

664

for pp_def_raw in self.params.get('postprocessors', []):

665

pp_def = dict(pp_def_raw)

666

when = pp_def.pop('when', 'post_process')

667

self.add_post_processor(

668

get_postprocessor(pp_def.pop('key'))(self, **pp_def),

when=when)

self._setup_opener()

register_socks_protocols()

673

674

def preload_download_archive(fn):

675

"""Preload the archive, if any is specified"""

676

if fn is None:

677

return False

678

self.write_debug(f'Loading archive file {fn!r}')

679

try:

680

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

681

for line in archive_file:

682

self.archive.add(line.strip())

683

except OSError as ioe:

684

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

691

692

def warn_if_short_id(self, argv):

693

# short YouTube ID starting with dash?

694

idxs = [

695

i for i, a in enumerate(argv)

696

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

701

+ ['--'] + [argv[i] for i in idxs]

702

)

703

self.report_warning(

704

'Long argument string detected. '

705

'Use -- to separate parameters and URLs, like this:\n%s' %

706

args_to_str(correct_argv))

707

708

def add_info_extractor(self, ie):

709

"""Add an InfoExtractor object to the end of the list."""

710

ie_key = ie.ie_key()

711

self._ies[ie_key] = ie

712

if not isinstance(ie, type):

713

self._ies_instances[ie_key] = ie

714

ie.set_downloader(self)

715

716

def _get_info_extractor_class(self, ie_key):

717

ie = self._ies.get(ie_key)

718

if ie is None:

719

ie = get_info_extractor(ie_key)

720

self.add_info_extractor(ie)

721

return ie

722

723

def get_info_extractor(self, ie_key):

724

"""

725

Get an instance of an IE with name ie_key, it will try to get one from

726

the _ies list, if there's no instance it will create a new one and add

727

it to the extractor list.

728

"""

729

ie = self._ies_instances.get(ie_key)

730

if ie is None:

731

ie = get_info_extractor(ie_key)()

732

self.add_info_extractor(ie)

733

return ie

734

735

def add_default_info_extractors(self):

736

"""

737

Add the InfoExtractors returned by gen_extractors to the end of the list

738

"""

739

for ie in gen_extractor_classes():

740

self.add_info_extractor(ie)

741

742

def add_post_processor(self, pp, when='post_process'):

743

"""Add a PostProcessor object to the end of the chain."""

744

self._pps[when].append(pp)

745

pp.set_downloader(self)

746

747

def add_post_hook(self, ph):

748

"""Add the post hook"""

749

self._post_hooks.append(ph)

750

751

def add_progress_hook(self, ph):

752

"""Add the download progress hook"""

753

self._progress_hooks.append(ph)

754

755

def add_postprocessor_hook(self, ph):

756

"""Add the postprocessing progress hook"""

757

self._postprocessor_hooks.append(ph)

758

for pps in self._pps.values():

759

for pp in pps:

760

pp.add_progress_hook(ph)

761

762

def _bidi_workaround(self, message):

763

if not hasattr(self, '_output_channel'):

764

return message

765

766

assert hasattr(self, '_output_process')

767

assert isinstance(message, compat_str)

768

line_count = message.count('\n') + 1

769

self._output_process.stdin.write((message + '\n').encode('utf-8'))

770

self._output_process.stdin.flush()

771

res = ''.join(self._output_channel.readline().decode('utf-8')

772

for _ in range(line_count))

773

return res[:-len('\n')]

774

775

def _write_string(self, message, out=None, only_once=False):

776

if only_once:

777

if message in self._printed_messages:

778

return

779

self._printed_messages.add(message)

780

write_string(message, out=out, encoding=self.params.get('encoding'))

781

782

def to_stdout(self, message, skip_eol=False, quiet=None):

783

"""Print message to stdout"""

784

if quiet is not None:

785

self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')

786

self._write_string(

787

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

788

self._out_files['print'])

789

790

def to_screen(self, message, skip_eol=False, quiet=None):

791

"""Print message to screen if not in quiet mode"""

792

if self.params.get('logger'):

793

self.params['logger'].debug(message)

794

return

795

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

796

return

797

self._write_string(

798

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

799

self._out_files['screen'])

800

801

def to_stderr(self, message, only_once=False):

802

"""Print message to stderr"""

803

assert isinstance(message, compat_str)

804

if self.params.get('logger'):

805

self.params['logger'].error(message)

806

else:

807

self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)

808

809

def _send_console_code(self, code):

810

if compat_os_name == 'nt' or not self._out_files['console']:

811

return

812

self._write_string(code, self._out_files['console'])

813

814

def to_console_title(self, message):

815

if not self.params.get('consoletitle', False):

816

return

817

message = remove_terminal_sequences(message)

818

if compat_os_name == 'nt':

819

if ctypes.windll.kernel32.GetConsoleWindow():

820

# c_wchar_p() might not be necessary if `message` is

821

# already of type unicode()

822

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

823

else:

824

self._send_console_code(f'\033]0;{message}\007')

825

826

def save_console_title(self):

827

if not self.params.get('consoletitle') or self.params.get('simulate'):

828

return

829

self._send_console_code('\033[22;0t') # Save the title on stack

830

831

def restore_console_title(self):

832

if not self.params.get('consoletitle') or self.params.get('simulate'):

833

return

834

self._send_console_code('\033[23;0t') # Restore the title from stack

835

836

def __enter__(self):

837

self.save_console_title()

838

return self

839

840

def __exit__(self, *args):

841

self.restore_console_title()

842

843

if self.params.get('cookiefile') is not None:

844

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

845

846

def trouble(self, message=None, tb=None, is_error=True):

847

"""Determine action to take when a download problem appears.

848

849

Depending on if the downloader has been configured to ignore

850

download errors or not, this method may throw an exception or

851

not when errors are found, after printing the message.

852

853

@param tb If given, is additional traceback information

854

@param is_error Whether to raise error according to ignorerrors

855

"""

856

if message is not None:

857

self.to_stderr(message)

858

if self.params.get('verbose'):

859

if tb is None:

860

if sys.exc_info()[0]: # if .trouble has been called from an except block

861

tb = ''

862

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

863

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

864

tb += encode_compat_str(traceback.format_exc())

865

else:

866

tb_data = traceback.format_list(traceback.extract_stack())

867

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

873

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

874

exc_info = sys.exc_info()[1].exc_info

875

else:

876

exc_info = sys.exc_info()

877

raise DownloadError(message, exc_info)

878

self._download_retcode = 1

Styles = Namespace(

HEADERS='yellow',

EMPHASIS='light blue',

FILENAME='green',

ID='green',

DELIM='blue',

ERROR='red',

WARNING='yellow',

SUPPRESS='light black',

889

)

890

891

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

text = str(text)

if test_encoding:

original_text = text

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

896

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

897

text = text.encode(encoding, 'ignore').decode(encoding)

898

if fallback is not None and text != original_text:

899

text = fallback

900

return format_text(text, f) if allow_colors else text if fallback is None else fallback

901

902

def _format_screen(self, *args, **kwargs):

903

return self._format_text(

904

self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)

905

906

def _format_err(self, *args, **kwargs):

907

return self._format_text(

908

self._out_files['error'], self._allow_colors['error'], *args, **kwargs)

909

910

def report_warning(self, message, only_once=False):

911

'''

912

Print the message to stderr, it will be prefixed with 'WARNING:'

913

If stderr is a tty file the 'WARNING:' will be colored

914

'''

915

if self.params.get('logger') is not None:

916

self.params['logger'].warning(message)

917

else:

918

if self.params.get('no_warnings'):

919

return

920

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

921

922

def deprecation_warning(self, message):

923

if self.params.get('logger') is not None:

924

self.params['logger'].warning(f'DeprecationWarning: {message}')

925

else:

926

self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)

927

928

def report_error(self, message, *args, **kwargs):

929

'''

930

Do the same as trouble, but prefixes the message with 'ERROR:', colored

931

in red if stderr is a tty file.

932

'''

933

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

934

935

def write_debug(self, message, only_once=False):

936

'''Log debug message or Print message to stderr'''

937

if not self.params.get('verbose', False):

938

return

939

message = '[debug] %s' % message

940

if self.params.get('logger'):

941

self.params['logger'].debug(message)

942

else:

943

self.to_stderr(message, only_once)

944

945

def report_file_already_downloaded(self, file_name):

946

"""Report file has already been fully downloaded."""

947

try:

948

self.to_screen('[download] %s has already been downloaded' % file_name)

949

except UnicodeEncodeError:

950

self.to_screen('[download] The file has already been downloaded')

951

952

def report_file_delete(self, file_name):

953

"""Report that existing file will be deleted."""

954

try:

955

self.to_screen('Deleting existing file %s' % file_name)

956

except UnicodeEncodeError:

957

self.to_screen('Deleting existing file')

958

959

def raise_no_formats(self, info, forced=False, *, msg=None):

960

has_drm = info.get('_has_drm')

961

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

962

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

963

if forced or not ignored:

964

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

965

expected=has_drm or ignored or expected)

966

else:

967

self.report_warning(msg)

968

969

def parse_outtmpl(self):

970

outtmpl_dict = self.params.get('outtmpl', {})

971

if not isinstance(outtmpl_dict, dict):

972

outtmpl_dict = {'default': outtmpl_dict}

973

# Remove spaces in the default template

974

if self.params.get('restrictfilenames'):

975

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

976

else:

977

sanitize = lambda x: x

978

outtmpl_dict.update({

979

k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()

980

if outtmpl_dict.get(k) is None})

981

for _, val in outtmpl_dict.items():

982

if isinstance(val, bytes):

983

self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')

984

return outtmpl_dict

985

986

def get_output_path(self, dir_type='', filename=None):

987

paths = self.params.get('paths', {})

988

assert isinstance(paths, dict)

989

path = os.path.join(

990

expand_path(paths.get('home', '').strip()),

991

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

992

filename or '')

993

return sanitize_path(path, force=self.params.get('windowsfilenames'))

994

995

@staticmethod

996

def _outtmpl_expandpath(outtmpl):

997

# expand_path translates '%%' into '%' and '$$' into '$'

998

# correspondingly that is not what we want since we need to keep

999

# '%%' intact for template dict substitution step. Working around

1000

# with boundary-alike separator hack.

1001

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

1002

outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')

1003

1004

# outtmpl should be expand_path'ed before template dict substitution

1005

# because meta fields may contain env variables we don't want to

1006

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

1007

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1008

return expand_path(outtmpl).replace(sep, '')

1009

1010

@staticmethod

1011

def escape_outtmpl(outtmpl):

1012

''' Escape any remaining strings like %s, %abc% etc. '''

1013

return re.sub(

1014

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1015

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1020

''' @return None or Exception object '''

1021

outtmpl = re.sub(

1022

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),

1023

lambda mobj: f'{mobj.group(0)[:-1]}s',

1024

cls._outtmpl_expandpath(outtmpl))

1025

try:

1026

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1027

return None

1028

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1033

info_dict = dict(info_dict)

1034

info_dict.pop('__postprocessors', None)

1035

return info_dict

1036

1037

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1038

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1039

@param sanitize Whether to sanitize the output as a filename.

1040

For backward compatibility, a function can also be passed

1041

"""

1042

1043

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1044

1045

info_dict = self._copy_infodict(info_dict)

1046

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1047

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1048

if info_dict.get('duration', None) is not None

1049

else None)

1050

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

1051

info_dict['video_autonumber'] = self._num_videos

1052

if info_dict.get('resolution') is None:

1053

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1054

1055

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1056

# of %(field)s to %(field)0Nd for backward compatibility

1057

field_size_compat_map = {

1058

'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),

1059

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1060

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1070

# where keys (except first) can be string, int or slice

1071

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

1072

MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

1073

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1074

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

1075

(?P<negate>-)?

1076

(?P<fields>{field})

1077

(?P<maths>(?:{math_op}{math_field})*)

1078

(?:>(?P<strf_format>.+?))?

1079

(?P<remaining>

1080

(?P<alternate>(?<!\\),[^|&)]+)?

1081

(?:&(?P<replacement>.*?))?

1082

(?:\|(?P<default>.*?))?

1083

)$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

1084

1085

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

1090

1091

def get_value(mdict):

1092

# Object traversal

1093

value = _traverse_infodict(mdict['fields'])

1094

# Negative

1095

if mdict['negate']:

1096

value = float_or_none(value)

1097

if value is not None:

1098

value *= -1

1099

# Do maths

1100

offset_key = mdict['maths']

1101

if offset_key:

1102

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1107

offset_key).group(0)

1108

offset_key = offset_key[len(item):]

1109

if operator is None:

1110

operator = MATH_FUNCTIONS[item]

1111

continue

1112

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1113

offset = float_or_none(item)

1114

if offset is None:

1115

offset = float_or_none(_traverse_infodict(item))

1116

try:

1117

value = operator(value, multiplier * offset)

1118

except (TypeError, ZeroDivisionError):

1119

return None

1120

operator = None

1121

# Datetime formatting

1122

if mdict['strf_format']:

1123

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1128

1129

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1130

return sanitize_filename(str(value), restricted=restricted, is_id=(

1131

bool(re.search(r'(^|[_.])id(\.|$)', key))

1132

if 'filename-sanitization' in self.params.get('compat_opts', [])

1133

else NO_DEFAULT))

1134

1135

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1136

sanitize = bool(sanitize)

1137

1138

def _dumpjson_default(obj):

1139

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1144

if not outer_mobj.group('has_key'):

1145

return outer_mobj.group(0)

1146

key = outer_mobj.group('key')

1147

mobj = re.match(INTERNAL_FORMAT_RE, key)

1148

initial_field = mobj.group('fields') if mobj else ''

1149

value, replacement, default = None, None, na

1150

while mobj:

1151

mobj = mobj.groupdict()

1152

default = mobj['default'] if mobj['default'] is not None else default

1153

value = get_value(mobj)

1154

replacement = mobj['replacement']

1155

if value is None and mobj['alternate']:

1156

mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])

else:

break

fmt = outer_mobj.group('format')

1161

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1162

fmt = f'0{field_size_compat_map[key]:d}d'

1163

1164

value = default if value is None else value if replacement is None else replacement

1165

1166

flags = outer_mobj.group('conversion') or ''

1167

str_fmt = f'{fmt[:-1]}s'

1168

if fmt[-1] == 'l': # list

1169

delim = '\n' if '#' in flags else ', '

1170

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1171

elif fmt[-1] == 'j': # json

1172

value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt

1173

elif fmt[-1] == 'q': # quoted

1174

value = map(str, variadic(value) if '#' in flags else [value])

1175

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1176

elif fmt[-1] == 'B': # bytes

1177

value = f'%{str_fmt}'.encode() % str(value).encode('utf-8')

1178

value, fmt = value.decode('utf-8', 'ignore'), 's'

1179

elif fmt[-1] == 'U': # unicode normalized

1180

value, fmt = unicodedata.normalize(

1181

# "+" = compatibility equivalence, "#" = NFD

1182

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1183

value), str_fmt

1184

elif fmt[-1] == 'D': # decimal suffix

1185

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1186

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1187

factor=1024 if '#' in flags else 1000)

1188

elif fmt[-1] == 'S': # filename sanitization

1189

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1190

elif fmt[-1] == 'c':

1191

if value:

1192

value = str(value)[0]

1193

else:

1194

fmt = str_fmt

1195

elif fmt[-1] not in 'rs': # numeric

1196

value = float_or_none(value)

1197

if value is None:

1198

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1203

# So we convert it to repr first

1204

value, fmt = repr(value), str_fmt

1205

if fmt[-1] in 'csr':

1206

value = sanitizer(initial_field, value)

1207

1208

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1209

TMPL_DICT[key] = value

1210

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1211

1212

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1213

1214

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1215

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1216

return self.escape_outtmpl(outtmpl) % info_dict

1217

1218

def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):

1219

assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'

1220

if outtmpl is None:

1221

outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])

1222

try:

1223

outtmpl = self._outtmpl_expandpath(outtmpl)

1224

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('', 'temp'):

1229

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1230

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1231

filename = replace_extension(filename, ext, final_ext)

1232

elif tmpl_type:

1233

force_ext = OUTTMPL_TYPES[tmpl_type]

1234

if force_ext:

1235

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1236

1237

# https://github.com/blackjack4494/youtube-dlc/issues/85

1238

trim_file_name = self.params.get('trim_file_name', False)

1239

if trim_file_name:

1240

no_ext, *ext = filename.rsplit('.', 2)

1241

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1242

1243

return filename

1244

except ValueError as err:

1245

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1246

return None

1247

1248

def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):

1249

"""Generate the output filename"""

1250

if outtmpl:

1251

assert not dir_type, 'outtmpl and dir_type are mutually exclusive'

1252

dir_type = None

1253

filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)

1254

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1259

pass

1260

elif filename == '-':

1261

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1262

elif os.path.isabs(filename):

1263

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1264

if filename == '-' or not filename:

1265

return filename

1266

1267

return self.get_output_path(dir_type, filename)

1268

1269

def _match_entry(self, info_dict, incomplete=False, silent=False):

1270

""" Returns None if the file should be downloaded """

1271

1272

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1273

1274

def check_filter():

1275

if 'title' in info_dict:

1276

# This can happen when we're just evaluating the playlist

1277

title = info_dict['title']

1278

matchtitle = self.params.get('matchtitle', False)

1279

if matchtitle:

1280

if not re.search(matchtitle, title, re.IGNORECASE):

1281

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1282

rejecttitle = self.params.get('rejecttitle', False)

1283

if rejecttitle:

1284

if re.search(rejecttitle, title, re.IGNORECASE):

1285

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1286

date = info_dict.get('upload_date')

1287

if date is not None:

1288

dateRange = self.params.get('daterange', DateRange())

1289

if date not in dateRange:

1290

return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'

1291

view_count = info_dict.get('view_count')

1292

if view_count is not None:

1293

min_views = self.params.get('min_views')

1294

if min_views is not None and view_count < min_views:

1295

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1296

max_views = self.params.get('max_views')

1297

if max_views is not None and view_count > max_views:

1298

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1299

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1300

return 'Skipping "%s" because it is age restricted' % video_title

1301

1302

match_filter = self.params.get('match_filter')

1303

if match_filter is not None:

1304

try:

1305

ret = match_filter(info_dict, incomplete=incomplete)

1306

except TypeError:

1307

# For backward compatibility

1308

ret = None if incomplete else match_filter(info_dict)

1309

if ret is NO_DEFAULT:

1310

while True:

1311

filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)

1312

reply = input(self._format_screen(

1313

f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()

1314

if reply in {'y', ''}:

1315

return None

1316

elif reply == 'n':

1317

return f'Skipping {video_title}'

1318

return True

1319

elif ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1324

reason = '%s has already been recorded in the archive' % video_title

1325

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1326

else:

1327

reason = check_filter()

1328

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1329

if reason is not None:

1330

if not silent:

1331

self.to_screen('[download] ' + reason)

1332

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1338

'''Set the keys from extra_info in info dict if they are missing'''

1339

for key, value in extra_info.items():

1340

info_dict.setdefault(key, value)

1341

1342

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1343

process=True, force_generic_extractor=False):

1344

"""

1345

Return a list with a dictionary for each video extracted.

1346

1347

Arguments:

1348

url -- URL to extract

1349

1350

Keyword arguments:

1351

download -- whether to download videos during extraction

1352

ie_key -- extractor key hint

1353

extra_info -- dictionary containing the extra values to add to each result

1354

process -- whether to resolve all unresolved references (URLs, playlist items),

1355

must be True for download to work.

1356

force_generic_extractor -- force using the generic extractor

1357

"""

1358

1359

if extra_info is None:

1360

extra_info = {}

1361

1362

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1371

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1376

'and will probably not work.')

1377

1378

temp_id = ie.get_temp_id(url)

1379

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1380

self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')

1381

if self.params.get('break_on_existing', False):

1382

raise ExistingVideoReached()

1383

break

1384

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1385

else:

1386

self.report_error('no suitable InfoExtractor for URL %s' % url)

1387

1388

def __handle_extraction_exceptions(func):

1389

@functools.wraps(func)

1390

def wrapper(self, *args, **kwargs):

1391

while True:

1392

try:

1393

return func(self, *args, **kwargs)

1394

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1395

raise

1396

except ReExtractInfo as e:

1397

if e.expected:

1398

self.to_screen(f'{e}; Re-extracting data')

1399

else:

1400

self.to_stderr('\r')

1401

self.report_warning(f'{e}; Re-extracting data')

1402

continue

1403

except GeoRestrictedError as e:

1404

msg = e.msg

1405

if e.countries:

1406

msg += '\nThis video is available in %s.' % ', '.join(

1407

map(ISO3166Utils.short2full, e.countries))

1408

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1409

self.report_error(msg)

1410

except ExtractorError as e: # An error we somewhat expected

1411

self.report_error(str(e), e.format_traceback())

1412

except Exception as e:

1413

if self.params.get('ignoreerrors'):

1414

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result):

1421

if (not self.params.get('wait_for_video')

1422

or ie_result.get('_type', 'video') != 'video'

1423

or ie_result.get('formats') or ie_result.get('url')):

1424

return

1425

1426

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)

1432

last_msg = msg

1433

1434

min_wait, max_wait = self.params.get('wait_for_video')

1435

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1436

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1437

diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)

1438

self.report_warning('Release time of video is not known')

1439

elif (diff or 0) <= 0:

1440

self.report_warning('Video should already be available according to extracted info')

1441

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1442

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1443

1444

wait_till = time.time() + diff

1445

try:

1446

while True:

1447

diff = wait_till - time.time()

1448

if diff <= 0:

1449

progress('')

1450

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1451

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1452

time.sleep(1)

1453

except KeyboardInterrupt:

1454

progress('')

1455

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1456

except BaseException as e:

1457

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@__handle_extraction_exceptions

1462

def __extract_info(self, url, ie, download, extra_info, process):

1463

ie_result = ie.extract(url)

1464

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1465

return

1466

if isinstance(ie_result, list):

1467

# Backwards compatibility: old IE result format

1468

ie_result = {

1469

'_type': 'compat_list',

1470

'entries': ie_result,

1471

}

1472

if extra_info.get('original_url'):

1473

ie_result.setdefault('original_url', extra_info['original_url'])

1474

self.add_default_extra_info(ie_result, ie, url)

1475

if process:

1476

self._wait_for_video(ie_result)

1477

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1482

if url is not None:

1483

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1488

if webpage_url:

1489

self.add_extra_info(ie_result, {

1490

'webpage_url_basename': url_basename(webpage_url),

1491

'webpage_url_domain': get_domain(webpage_url),

1492

})

1493

if ie is not None:

1494

self.add_extra_info(ie_result, {

1495

'extractor': ie.IE_NAME,

1496

'extractor_key': ie.ie_key(),

1497

})

1498

1499

def process_ie_result(self, ie_result, download=True, extra_info=None):

1500

"""

1501

Take the result of the ie(may be modified) and resolve all unresolved

1502

references (URLs, playlist items).

1503

1504

It will also download the videos if 'download'.

1505

Returns the resolved ie_result.

1506

"""

1507

if extra_info is None:

1508

extra_info = {}

1509

result_type = ie_result.get('_type', 'video')

1510

1511

if result_type in ('url', 'url_transparent'):

1512

ie_result['url'] = sanitize_url(ie_result['url'])

1513

if ie_result.get('original_url'):

1514

extra_info.setdefault('original_url', ie_result['original_url'])

1515

1516

extract_flat = self.params.get('extract_flat', False)

1517

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1518

or extract_flat is True):

1519

info_copy = ie_result.copy()

1520

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1521

if ie and not ie_result.get('id'):

1522

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1523

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1524

self.add_extra_info(info_copy, extra_info)

1525

info_copy, _ = self.pre_process(info_copy)

1526

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1527

if self.params.get('force_write_download_archive', False):

1528

self.record_download_archive(info_copy)

1529

return ie_result

1530

1531

if result_type == 'video':

1532

self.add_extra_info(ie_result, extra_info)

1533

ie_result = self.process_video_result(ie_result, download=download)

1534

additional_urls = (ie_result or {}).get('additional_urls')

1535

if additional_urls:

1536

# TODO: Improve MetadataParserPP to allow setting a list

1537

if isinstance(additional_urls, compat_str):

1538

additional_urls = [additional_urls]

1539

self.to_screen(

1540

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1541

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1542

ie_result['additional_entries'] = [

1543

self.extract_info(

1544

url, download, extra_info=extra_info,

1545

force_generic_extractor=self.params.get('force_generic_extractor'))

1546

for url in additional_urls

1547

]

1548

return ie_result

1549

elif result_type == 'url':

1550

# We have to add extra_info to the results because it may be

1551

# contained in a playlist

1552

return self.extract_info(

1553

ie_result['url'], download,

1554

ie_key=ie_result.get('ie_key'),

1555

extra_info=extra_info)

1556

elif result_type == 'url_transparent':

1557

# Use the information from the embedding page

1558

info = self.extract_info(

1559

ie_result['url'], ie_key=ie_result.get('ie_key'),

1560

extra_info=extra_info, download=False, process=False)

1561

1562

# extract_info may return None when ignoreerrors is enabled and

1563

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

new_result = info.copy()

1569

new_result.update(filter_dict(ie_result, lambda k, v: (

1570

v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))

1571

1572

# Extracted info may not be a video result (i.e.

1573

# info.get('_type', 'video') != video) but rather an url or

1574

# url_transparent. In such cases outer metadata (from ie_result)

1575

# should be propagated to inner one (info). For this to happen

1576

# _type of info should be overridden with url_transparent. This

1577

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1578

if new_result.get('_type') == 'url':

1579

new_result['_type'] = 'url_transparent'

1580

1581

return self.process_ie_result(

1582

new_result, download=download, extra_info=extra_info)

1583

elif result_type in ('playlist', 'multi_video'):

1584

# Protect from infinite recursion due to recursively nested playlists

1585

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1586

webpage_url = ie_result['webpage_url']

1587

if webpage_url in self._playlist_urls:

1588

self.to_screen(

1589

'[download] Skipping already downloaded playlist: %s'

1590

% ie_result.get('title') or ie_result.get('id'))

1591

return

1592

1593

self._playlist_level += 1

1594

self._playlist_urls.add(webpage_url)

1595

self._fill_common_fields(ie_result, False)

1596

self._sanitize_thumbnails(ie_result)

1597

try:

1598

return self.__process_playlist(ie_result, download)

1599

finally:

1600

self._playlist_level -= 1

1601

if not self._playlist_level:

1602

self._playlist_urls.clear()

1603

elif result_type == 'compat_list':

1604

self.report_warning(

1605

'Extractor %s returned a compat_list result. '

1606

'It needs to be updated.' % ie_result.get('extractor'))

1607

1608

def _fixup(r):

1609

self.add_extra_info(r, {

1610

'extractor': ie_result['extractor'],

1611

'webpage_url': ie_result['webpage_url'],

1612

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1613

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1614

'extractor_key': ie_result['extractor_key'],

1615

})

1616

return r

1617

ie_result['entries'] = [

1618

self.process_ie_result(_fixup(r), download, extra_info)

1619

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1624

1625

def _ensure_dir_exists(self, path):

1626

return make_dir(path, self.report_error)

1627

1628

@staticmethod

1629

def _playlist_infodict(ie_result, **kwargs):

1630

return {

1631

**ie_result,

1632

'playlist': ie_result.get('title') or ie_result.get('id'),

1633

'playlist_id': ie_result.get('id'),

1634

'playlist_title': ie_result.get('title'),

1635

'playlist_uploader': ie_result.get('uploader'),

1636

'playlist_uploader_id': ie_result.get('uploader_id'),

'playlist_index': 0,

**kwargs,

}

def __process_playlist(self, ie_result, download):

1642

# We process each entry in the playlist

1643

playlist = ie_result.get('title') or ie_result.get('id')

1644

self.to_screen('[download] Downloading playlist: %s' % playlist)

1645

1646

if 'entries' not in ie_result:

1647

raise EntryNotInPlaylist('There are no entries')

1648

1649

MissingEntry = object()

1650

incomplete_entries = bool(ie_result.get('requested_entries'))

1651

if incomplete_entries:

1652

def fill_missing_entries(entries, indices):

1653

ret = [MissingEntry] * max(indices)

1654

for i, entry in zip(indices, entries):

1655

ret[i - 1] = entry

1656

return ret

1657

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1658

1659

playlist_results = []

1660

1661

playliststart = self.params.get('playliststart', 1)

1662

playlistend = self.params.get('playlistend')

1663

# For backwards compatibility, interpret -1 as whole list

1664

if playlistend == -1:

1665

playlistend = None

1666

1667

playlistitems_str = self.params.get('playlist_items')

1668

playlistitems = None

1669

if playlistitems_str is not None:

1670

def iter_playlistitems(format):

1671

for string_segment in format.split(','):

1672

if '-' in string_segment:

1673

start, end = string_segment.split('-')

1674

for item in range(int(start), int(end) + 1):

1675

yield int(item)

1676

else:

1677

yield int(string_segment)

1678

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1679

1680

ie_entries = ie_result['entries']

1681

if isinstance(ie_entries, list):

1682

playlist_count = len(ie_entries)

1683

msg = f'Collected {playlist_count} videos; downloading %d of them'

1684

ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count

1685

1686

def get_entry(i):

1687

return ie_entries[i - 1]

1688

else:

1689

msg = 'Downloading %d videos'

1690

if not isinstance(ie_entries, (PagedList, LazyList)):

1691

ie_entries = LazyList(ie_entries)

1692

elif isinstance(ie_entries, InAdvancePagedList):

1693

if ie_entries._pagesize == 1:

1694

playlist_count = ie_entries._pagecount

1695

1696

def get_entry(i):

1697

return YoutubeDL.__handle_extraction_exceptions(

1698

lambda self, i: ie_entries[i - 1]

1699

)(self, i)

1700

1701

entries, broken = [], False

1702

items = playlistitems if playlistitems is not None else itertools.count(playliststart)

for i in items:

if i == 0:

continue

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = get_entry(i)

if entry is MissingEntry:

1712

raise EntryNotInPlaylist()

1713

except (IndexError, EntryNotInPlaylist):

1714

if incomplete_entries:

1715

raise EntryNotInPlaylist(f'Entry {i} cannot be found')

1716

elif not playlistitems:

1717

break

1718

entries.append(entry)

1719

try:

1720

if entry is not None:

1721

# TODO: Add auto-generated fields

1722

self._match_entry(entry, incomplete=True, silent=True)

1723

except (ExistingVideoReached, RejectedVideoReached):

1724

broken = True

1725

break

1726

ie_result['entries'] = entries

1727

1728

# Save playlist_index before re-ordering

1729

entries = [

1730

((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)

1731

for i, entry in enumerate(entries, 1)

1732

if entry is not None]

1733

n_entries = len(entries)

1734

1735

if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):

1736

ie_result['playlist_count'] = n_entries

1737

1738

if not playlistitems and (playliststart != 1 or playlistend):

1739

playlistitems = list(range(playliststart, playliststart + n_entries))

1740

ie_result['requested_entries'] = playlistitems

1741

1742

_infojson_written = False

1743

write_playlist_files = self.params.get('allow_playlist_files', True)

1744

if write_playlist_files and self.params.get('list_thumbnails'):

1745

self.list_thumbnails(ie_result)

1746

if write_playlist_files and not self.params.get('simulate'):

1747

ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)

1748

_infojson_written = self._write_info_json(

1749

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1750

if _infojson_written is None:

1751

return

1752

if self._write_description('playlist', ie_result,

1753

self.prepare_filename(ie_copy, 'pl_description')) is None:

1754

return

1755

# TODO: This should be passed to ThumbnailsConvertor if necessary

1756

self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1757

1758

if self.params.get('playlistreverse', False):

1759

entries = entries[::-1]

1760

if self.params.get('playlistrandom', False):

1761

random.shuffle(entries)

1762

1763

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1764

1765

self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')

1766

failures = 0

1767

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1768

for i, entry_tuple in enumerate(entries, 1):

1769

playlist_index, entry = entry_tuple

1770

if 'playlist-index' in self.params.get('compat_opts', []):

1771

playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1

1772

self.to_screen('[download] Downloading video %s of %s' % (

1773

self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))

1774

# This __x_forwarded_for_ip thing is a bit ugly but requires

1775

# minimal changes

1776

if x_forwarded_for:

1777

entry['__x_forwarded_for_ip'] = x_forwarded_for

1778

extra = {

1779

'n_entries': n_entries,

1780

'__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1781

'playlist_count': ie_result.get('playlist_count'),

1782

'playlist_index': playlist_index,

1783

'playlist_autonumber': i,

1784

'playlist': playlist,

1785

'playlist_id': ie_result.get('id'),

1786

'playlist_title': ie_result.get('title'),

1787

'playlist_uploader': ie_result.get('uploader'),

1788

'playlist_uploader_id': ie_result.get('uploader_id'),

1789

'extractor': ie_result['extractor'],

1790

'webpage_url': ie_result['webpage_url'],

1791

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1792

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1793

'extractor_key': ie_result['extractor_key'],

1794

}

1795

1796

if self._match_entry(entry, incomplete=True) is not None:

1797

continue

1798

1799

entry_result = self.__process_iterable_entry(entry, download, extra)

1800

if not entry_result:

1801

failures += 1

1802

if failures >= max_failures:

1803

self.report_error(

1804

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1805

break

1806

playlist_results.append(entry_result)

1807

ie_result['entries'] = playlist_results

1808

1809

# Write the updated info to json

1810

if _infojson_written is True and self._write_info_json(

1811

'updated playlist', ie_result,

1812

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1813

return

1814

1815

ie_result = self.run_all_pps('playlist', ie_result)

1816

self.to_screen(f'[download] Finished downloading playlist: {playlist}')

1817

return ie_result

1818

1819

@__handle_extraction_exceptions

1820

def __process_iterable_entry(self, entry, download, extra_info):

1821

return self.process_ie_result(

1822

entry, download=download, extra_info=extra_info)

1823

1824

def _build_format_filter(self, filter_spec):

1825

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1836

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1837

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1838

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1839

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1840

m = operator_rex.fullmatch(filter_spec)

1841

if m:

1842

try:

1843

comparison_value = int(m.group('value'))

1844

except ValueError:

1845

comparison_value = parse_filesize(m.group('value'))

1846

if comparison_value is None:

1847

comparison_value = parse_filesize(m.group('value') + 'B')

1848

if comparison_value is None:

1849

raise ValueError(

1850

'Invalid value %r in format specification %r' % (

1851

m.group('value'), filter_spec))

1852

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1858

'$=': lambda attr, value: attr.endswith(value),

1859

'*=': lambda attr, value: value in attr,

1860

'~=': lambda attr, value: value.search(attr) is not None

1861

}

1862

str_operator_rex = re.compile(r'''(?x)\s*

1863

(?P<key>[a-zA-Z0-9._-]+)\s*

1864

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1865

(?P<quote>["'])?

1866

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1867

(?(quote)(?P=quote))\s*

1868

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1869

m = str_operator_rex.fullmatch(filter_spec)

1870

if m:

1871

if m.group('op') == '~=':

1872

comparison_value = re.compile(m.group('value'))

1873

else:

1874

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1875

str_op = STR_OPERATORS[m.group('op')]

1876

if m.group('negation'):

1877

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1883

1884

def _filter(f):

1885

actual_value = f.get(m.group('key'))

1886

if actual_value is None:

1887

return m.group('none_inclusive')

1888

return op(actual_value, comparison_value)

1889

return _filter

1890

1891

def _check_formats(self, formats):

1892

for f in formats:

1893

self.to_screen('[info] Testing format %s' % f['format_id'])

1894

path = self.get_output_path('temp')

1895

if not self._ensure_dir_exists(f'{path}/'):

1896

continue

1897

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1898

temp_file.close()

1899

try:

1900

success, _ = self.dl(temp_file.name, f, test=True)

1901

except (DownloadError, IOError, OSError, ValueError) + network_exceptions:

1902

success = False

1903

finally:

1904

if os.path.exists(temp_file.name):

1905

try:

1906

os.remove(temp_file.name)

1907

except OSError:

1908

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1913

1914

def _default_format_spec(self, info_dict, download=True):

1915

1916

def can_merge():

1917

merger = FFmpegMergerPP(self)

1918

return merger.available and merger.can_merge()

1919

1920

prefer_best = (

1921

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1926

or self.outtmpl_dict['default'] == '-'))

1927

compat = (

1928

prefer_best

1929

or self.params.get('allow_multiple_audio_streams', False)

1930

or 'format-spec' in self.params.get('compat_opts', []))

1931

1932

return (

1933

'best/bestvideo+bestaudio' if prefer_best

1934

else 'bestvideo*+bestaudio/best' if not compat

1935

else 'bestvideo+bestaudio/best')

1936

1937

def build_format_selector(self, format_spec):

1938

def syntax_error(note, start):

1939

message = (

1940

'Invalid format specification: '

1941

'{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))

1942

return SyntaxError(message)

1943

1944

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1949

1950

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1951

'video': self.params.get('allow_multiple_video_streams', False)}

1952

1953

check_formats = self.params.get('check_formats') == 'selected'

1954

1955

def _parse_filter(tokens):

1956

filter_parts = []

1957

for type, string, start, _, _ in tokens:

1958

if type == tokenize.OP and string == ']':

1959

return ''.join(filter_parts)

1960

else:

1961

filter_parts.append(string)

1962

1963

def _remove_unused_ops(tokens):

1964

# Remove operators that we don't use and join them with the surrounding strings

1965

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1966

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1967

last_string, last_start, last_end, last_line = None, None, None, None

1968

for type, string, start, end, line in tokens:

1969

if type == tokenize.OP and string == '[':

1970

if last_string:

1971

yield tokenize.NAME, last_string, last_start, last_end, last_line

1972

last_string = None

1973

yield type, string, start, end, line

1974

# everything inside brackets will be handled by _parse_filter

1975

for type, string, start, end, line in tokens:

1976

yield type, string, start, end, line

1977

if type == tokenize.OP and string == ']':

1978

break

1979

elif type == tokenize.OP and string in ALLOWED_OPS:

1980

if last_string:

1981

yield tokenize.NAME, last_string, last_start, last_end, last_line

1982

last_string = None

1983

yield type, string, start, end, line

1984

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1991

if last_string:

1992

yield tokenize.NAME, last_string, last_start, last_end, last_line

1993

1994

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1995

selectors = []

1996

current_selector = None

1997

for type, string, start, _, _ in tokens:

1998

# ENCODING is only defined in python 3.x

1999

if type == getattr(tokenize, 'ENCODING', None):

2000

continue

2001

elif type in [tokenize.NAME, tokenize.NUMBER]:

2002

current_selector = FormatSelector(SINGLE, string, [])

2003

elif type == tokenize.OP:

2004

if string == ')':

2005

if not inside_group:

2006

# ')' will be handled by the parentheses group

2007

tokens.restore_last_token()

2008

break

2009

elif inside_merge and string in ['/', ',']:

2010

tokens.restore_last_token()

2011

break

2012

elif inside_choice and string == ',':

2013

tokens.restore_last_token()

2014

break

2015

elif string == ',':

2016

if not current_selector:

2017

raise syntax_error('"," must follow a format selector', start)

2018

selectors.append(current_selector)

2019

current_selector = None

2020

elif string == '/':

2021

if not current_selector:

2022

raise syntax_error('"/" must follow a format selector', start)

2023

first_choice = current_selector

2024

second_choice = _parse_format_selection(tokens, inside_choice=True)

2025

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2026

elif string == '[':

2027

if not current_selector:

2028

current_selector = FormatSelector(SINGLE, 'best', [])

2029

format_filter = _parse_filter(tokens)

2030

current_selector.filters.append(format_filter)

2031

elif string == '(':

2032

if current_selector:

2033

raise syntax_error('Unexpected "("', start)

2034

group = _parse_format_selection(tokens, inside_group=True)

2035

current_selector = FormatSelector(GROUP, group, [])

2036

elif string == '+':

2037

if not current_selector:

2038

raise syntax_error('Unexpected "+"', start)

2039

selector_1 = current_selector

2040

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2041

if not selector_2:

2042

raise syntax_error('Expected a selector', start)

2043

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2044

else:

2045

raise syntax_error(f'Operator not recognized: "{string}"', start)

2046

elif type == tokenize.ENDMARKER:

2047

break

2048

if current_selector:

2049

selectors.append(current_selector)

2050

return selectors

2051

2052

def _merge(formats_pair):

2053

format_1, format_2 = formats_pair

2054

2055

formats_info = []

2056

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2057

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2058

2059

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2060

get_no_more = {'video': False, 'audio': False}

2061

for (i, fmt_info) in enumerate(formats_info):

2062

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2063

formats_info.pop(i)

2064

continue

2065

for aud_vid in ['audio', 'video']:

2066

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2067

if get_no_more[aud_vid]:

2068

formats_info.pop(i)

2069

break

2070

get_no_more[aud_vid] = True

2071

2072

if len(formats_info) == 1:

2073

return formats_info[0]

2074

2075

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2076

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2077

2078

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2079

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2080

2081

output_ext = self.params.get('merge_output_format')

2082

if not output_ext:

2083

if the_only_video:

2084

output_ext = the_only_video['ext']

2085

elif the_only_audio and not video_fmts:

2086

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2091

2092

new_dict = {

2093

'requested_formats': formats_info,

2094

'format': '+'.join(filtered('format')),

2095

'format_id': '+'.join(filtered('format_id')),

2096

'ext': output_ext,

2097

'protocol': '+'.join(map(determine_protocol, formats_info)),

2098

'language': '+'.join(orderedSet(filtered('language'))) or None,

2099

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2100

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2101

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2107

'height': the_only_video.get('height'),

2108

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2109

'fps': the_only_video.get('fps'),

2110

'dynamic_range': the_only_video.get('dynamic_range'),

2111

'vcodec': the_only_video.get('vcodec'),

2112

'vbr': the_only_video.get('vbr'),

2113

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2119

'abr': the_only_audio.get('abr'),

2120

'asr': the_only_audio.get('asr'),

})

return new_dict

def _check_formats(formats):

2126

if not check_formats:

2127

yield from formats

2128

return

2129

yield from self._check_formats(formats)

2130

2131

def _build_selector_function(selector):

2132

if isinstance(selector, list): # ,

2133

fs = [_build_selector_function(s) for s in selector]

2134

2135

def selector_function(ctx):

2136

for f in fs:

2137

yield from f(ctx)

2138

return selector_function

2139

2140

elif selector.type == GROUP: # ()

2141

selector_function = _build_selector_function(selector.selector)

2142

2143

elif selector.type == PICKFIRST: # /

2144

fs = [_build_selector_function(s) for s in selector.selector]

2145

2146

def selector_function(ctx):

2147

for f in fs:

2148

picked_formats = list(f(ctx))

2149

if picked_formats:

2150

return picked_formats

2151

return []

2152

2153

elif selector.type == MERGE: # +

2154

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2155

2156

def selector_function(ctx):

2157

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2158

yield _merge(pair)

2159

2160

elif selector.type == SINGLE: # atom

2161

format_spec = selector.selector or 'best'

2162

2163

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2164

if format_spec == 'all':

2165

def selector_function(ctx):

2166

yield from _check_formats(ctx['formats'][::-1])

2167

elif format_spec == 'mergeall':

2168

def selector_function(ctx):

2169

formats = list(_check_formats(

2170

f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))

2171

if not formats:

2172

return

2173

merged_format = formats[-1]

2174

for f in formats[-2::-1]:

2175

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2185

format_reverse = mobj.group('bw')[0] == 'b'

2186

format_type = (mobj.group('type') or [None])[0]

2187

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2188

format_modified = mobj.group('mod') is not None

2189

2190

format_fallback = not format_type and not format_modified # for b, w

2191

_filter_f = (

2192

(lambda f: f.get('%scodec' % format_type) != 'none')

2193

if format_type and format_modified # bv*, ba*, wv*, wa*

2194

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2195

if format_type # bv, ba, wv, wa

2196

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2197

if not format_modified # b, w

2198

else lambda f: True) # b*, w*

2199

filter_f = lambda f: _filter_f(f) and (

2200

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2201

else:

2202

if format_spec in self._format_selection_exts['audio']:

2203

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2204

elif format_spec in self._format_selection_exts['video']:

2205

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2206

seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'

2207

elif format_spec in self._format_selection_exts['storyboards']:

2208

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2209

else:

2210

filter_f = lambda f: f.get('format_id') == format_spec # id

2211

2212

def selector_function(ctx):

2213

formats = list(ctx['formats'])

2214

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2215

if not matches:

2216

if format_fallback and ctx['incomplete_formats']:

2217

# for extractors with incomplete formats (audio only (soundcloud)

2218

# or video only (imgur)) best/worst will fallback to

2219

# best/worst {video,audio}-only format

2220

matches = formats

2221

elif seperate_fallback and not ctx['has_merged_format']:

2222

# for compatibility with youtube-dl when there is no pre-merged format

2223

matches = list(filter(seperate_fallback, formats))

2224

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2225

try:

2226

yield matches[format_idx - 1]

2227

except LazyList.IndexError:

2228

return

2229

2230

filters = [self._build_format_filter(f) for f in selector.filters]

2231

2232

def final_selector(ctx):

2233

ctx_copy = dict(ctx)

2234

for _filter in filters:

2235

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2236

return selector_function(ctx_copy)

2237

return final_selector

2238

2239

stream = io.BytesIO(format_spec.encode('utf-8'))

2240

try:

2241

tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))

2242

except tokenize.TokenError:

2243

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2244

2245

class TokenIterator:

2246

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2255

raise StopIteration()

2256

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2263

self.counter -= 1

2264

2265

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2266

return _build_selector_function(parsed_selector)

2267

2268

def _calc_headers(self, info_dict):

2269

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2270

2271

cookies = self._calc_cookies(info_dict)

2272

if cookies:

2273

res['Cookie'] = cookies

2274

2275

if 'X-Forwarded-For' not in res:

2276

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2277

if x_forwarded_for_ip:

2278

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

2283

pr = sanitized_Request(info_dict['url'])

2284

self.cookiejar.add_cookie_header(pr)

2285

return pr.get_header('Cookie')

2286

2287

def _sort_thumbnails(self, thumbnails):

2288

thumbnails.sort(key=lambda t: (

2289

t.get('preference') if t.get('preference') is not None else -1,

2290

t.get('width') if t.get('width') is not None else -1,

2291

t.get('height') if t.get('height') is not None else -1,

2292

t.get('id') if t.get('id') is not None else '',

2293

t.get('url')))

2294

2295

def _sanitize_thumbnails(self, info_dict):

2296

thumbnails = info_dict.get('thumbnails')

2297

if thumbnails is None:

2298

thumbnail = info_dict.get('thumbnail')

2299

if thumbnail:

2300

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2305

for t in thumbnails:

2306

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2307

try:

2308

self.urlopen(HEADRequest(t['url']))

2309

except network_exceptions as err:

2310

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2315

for i, t in enumerate(thumbnails):

2316

if t.get('id') is None:

2317

t['id'] = '%d' % i

2318

if t.get('width') and t.get('height'):

2319

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2320

t['url'] = sanitize_url(t['url'])

2321

2322

if self.params.get('check_formats') is True:

2323

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2324

else:

2325

info_dict['thumbnails'] = thumbnails

2326

2327

def _fill_common_fields(self, info_dict, is_video=True):

2328

# TODO: move sanitization here

2329

if is_video:

2330

# playlists are allowed to lack "title"

2331

info_dict['fulltitle'] = info_dict.get('title')

2332

if 'title' not in info_dict:

2333

raise ExtractorError('Missing "title" field in extractor result',

2334

video_id=info_dict['id'], ie=info_dict['extractor'])

2335

elif not info_dict.get('title'):

2336

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2337

info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'

2338

2339

if info_dict.get('duration') is not None:

2340

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2341

2342

for ts_key, date_key in (

2343

('timestamp', 'upload_date'),

2344

('release_timestamp', 'release_date'),

2345

('modified_timestamp', 'modified_date'),

2346

):

2347

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2348

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2349

# see http://bugs.python.org/issue1646728)

2350

with contextlib.suppress(ValueError, OverflowError, OSError):

2351

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2352

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2353

2354

live_keys = ('is_live', 'was_live')

2355

live_status = info_dict.get('live_status')

2356

if live_status is None:

2357

for key in live_keys:

2358

if info_dict.get(key) is False:

2359

continue

2360

if info_dict.get(key):

2361

live_status = key

2362

break

2363

if all(info_dict.get(key) is False for key in live_keys):

2364

live_status = 'not_live'

2365

if live_status:

2366

info_dict['live_status'] = live_status

2367

for key in live_keys:

2368

if info_dict.get(key) is None:

2369

info_dict[key] = (live_status == key)

2370

2371

# Auto generate title fields corresponding to the *_number fields when missing

2372

# in order to always have clean titles. This is very common for TV series.

2373

for field in ('chapter', 'season', 'episode'):

2374

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2375

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2376

2377

def process_video_result(self, info_dict, download=True):

2378

assert info_dict.get('_type', 'video') == 'video'

2379

self._num_videos += 1

2380

2381

if 'id' not in info_dict:

2382

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2383

elif not info_dict.get('id'):

2384

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2385

2386

def report_force_conversion(field, field_not, conversion):

2387

self.report_warning(

2388

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2389

% (field, field_not, conversion))

2390

2391

def sanitize_string_field(info, string_field):

2392

field = info.get(string_field)

2393

if field is None or isinstance(field, compat_str):

2394

return

2395

report_force_conversion(string_field, 'a string', 'string')

2396

info[string_field] = compat_str(field)

2397

2398

def sanitize_numeric_fields(info):

2399

for numeric_field in self._NUMERIC_FIELDS:

2400

field = info.get(numeric_field)

2401

if field is None or isinstance(field, (int, float)):

2402

continue

2403

report_force_conversion(numeric_field, 'numeric', 'int')

2404

info[numeric_field] = int_or_none(field)

2405

2406

sanitize_string_field(info_dict, 'id')

2407

sanitize_numeric_fields(info_dict)

2408

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2409

self.report_warning('"duration" field is negative, there is an error in extractor')

2410

2411

if 'playlist' not in info_dict:

2412

# It isn't part of a playlist

2413

info_dict['playlist'] = None

2414

info_dict['playlist_index'] = None

2415

2416

self._sanitize_thumbnails(info_dict)

2417

2418

thumbnail = info_dict.get('thumbnail')

2419

thumbnails = info_dict.get('thumbnails')

2420

if thumbnail:

2421

info_dict['thumbnail'] = sanitize_url(thumbnail)

2422

elif thumbnails:

2423

info_dict['thumbnail'] = thumbnails[-1]['url']

2424

2425

if info_dict.get('display_id') is None and 'id' in info_dict:

2426

info_dict['display_id'] = info_dict['id']

2427

2428

self._fill_common_fields(info_dict)

2429

2430

for cc_kind in ('subtitles', 'automatic_captions'):

2431

cc = info_dict.get(cc_kind)

2432

if cc:

2433

for _, subtitle in cc.items():

2434

for subtitle_format in subtitle:

2435

if subtitle_format.get('url'):

2436

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2437

if subtitle_format.get('ext') is None:

2438

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2439

2440

automatic_captions = info_dict.get('automatic_captions')

2441

subtitles = info_dict.get('subtitles')

2442

2443

info_dict['requested_subtitles'] = self.process_subtitles(

2444

info_dict['id'], subtitles, automatic_captions)

2445

2446

if info_dict.get('formats') is None:

2447

# There's only one format available

2448

formats = [info_dict]

2449

else:

2450

formats = info_dict['formats']

2451

2452

# or None ensures --clean-infojson removes it

2453

info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None

2454

if not self.params.get('allow_unplayable_formats'):

2455

formats = [f for f in formats if not f.get('has_drm')]

2456

if info_dict['_has_drm'] and all(

2457

f.get('acodec') == f.get('vcodec') == 'none' for f in formats):

2458

self.report_warning(

2459

'This video is DRM protected and only images are available for download. '

2460

'Use --list-formats to see them')

2461

2462

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2463

if not get_from_start:

2464

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2465

if info_dict.get('is_live') and formats:

2466

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2467

if get_from_start and not formats:

2468

self.raise_no_formats(info_dict, msg=(

2469

'--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2470

'If you want to download from the current time, use --no-live-from-start'))

2471

2472

if not formats:

2473

self.raise_no_formats(info_dict)

2474

2475

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2480

'there is an error in extractor')

2481

return False

2482

if isinstance(url, bytes):

2483

sanitize_string_field(f, 'url')

2484

return True

2485

2486

# Filter out malformed formats for better extraction robustness

2487

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2492

for i, format in enumerate(formats):

2493

sanitize_string_field(format, 'format_id')

2494

sanitize_numeric_fields(format)

2495

format['url'] = sanitize_url(format['url'])

2496

if not format.get('format_id'):

2497

format['format_id'] = compat_str(i)

2498

else:

2499

# Sanitize format_id from characters used in format selector expression

2500

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2501

format_id = format['format_id']

2502

if format_id not in formats_dict:

2503

formats_dict[format_id] = []

2504

formats_dict[format_id].append(format)

2505

2506

# Make sure all formats have unique format_id

2507

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2508

for format_id, ambiguous_formats in formats_dict.items():

2509

ambigious_id = len(ambiguous_formats) > 1

2510

for i, format in enumerate(ambiguous_formats):

2511

if ambigious_id:

2512

format['format_id'] = '%s-%d' % (format_id, i)

2513

if format.get('ext') is None:

2514

format['ext'] = determine_ext(format['url']).lower()

2515

# Ensure there is no conflict between id and ext in format selection

2516

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2517

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2518

format['format_id'] = 'f%s' % format['format_id']

2519

2520

for i, format in enumerate(formats):

2521

if format.get('format') is None:

2522

format['format'] = '{id} - {res}{note}'.format(

2523

id=format['format_id'],

2524

res=self.format_resolution(format),

2525

note=format_field(format, 'format_note', ' (%s)'),

2526

)

2527

if format.get('protocol') is None:

2528

format['protocol'] = determine_protocol(format)

2529

if format.get('resolution') is None:

2530

format['resolution'] = self.format_resolution(format, default=None)

2531

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2532

format['dynamic_range'] = 'SDR'

2533

if (info_dict.get('duration') and format.get('tbr')

2534

and not format.get('filesize') and not format.get('filesize_approx')):

2535

format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)

2536

2537

# Add HTTP headers, so that external programs can use them from the

2538

# json output

2539

full_format_info = info_dict.copy()

2540

full_format_info.update(format)

2541

format['http_headers'] = self._calc_headers(full_format_info)

2542

# Remove private housekeeping stuff

2543

if '__x_forwarded_for_ip' in info_dict:

2544

del info_dict['__x_forwarded_for_ip']

2545

2546

if self.params.get('check_formats') is True:

2547

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2548

2549

if not formats or formats[0] is not info_dict:

2550

# only set the 'formats' fields if the original info_dict list them

2551

# otherwise we end up with a circular reference, the first (and unique)

2552

# element in the 'formats' field in info_dict is info_dict itself,

2553

# which can't be exported to json

2554

info_dict['formats'] = formats

2555

2556

info_dict, _ = self.pre_process(info_dict)

2557

2558

if self._match_entry(info_dict, incomplete=self._format_fields) is not None:

2559

return info_dict

2560

2561

self.post_extract(info_dict)

2562

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2563

2564

# The pre-processors may have modified the formats

2565

formats = info_dict.get('formats', [info_dict])

2566

2567

list_only = self.params.get('simulate') is None and (

2568

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2569

interactive_format_selection = not list_only and self.format_selector == '-'

2570

if self.params.get('list_thumbnails'):

2571

self.list_thumbnails(info_dict)

2572

if self.params.get('listsubtitles'):

2573

if 'automatic_captions' in info_dict:

2574

self.list_subtitles(

2575

info_dict['id'], automatic_captions, 'automatic captions')

2576

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2577

if self.params.get('listformats') or interactive_format_selection:

2578

self.list_formats(info_dict)

2579

if list_only:

2580

# Without this printing, -F --print-json will not work

2581

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2582

return

2583

2584

format_selector = self.format_selector

2585

if format_selector is None:

2586

req_format = self._default_format_spec(info_dict, download=download)

2587

self.write_debug('Default format spec: %s' % req_format)

2588

format_selector = self.build_format_selector(req_format)

2589

2590

while True:

2591

if interactive_format_selection:

2592

req_format = input(

2593

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2594

try:

2595

format_selector = self.build_format_selector(req_format)

2596

except SyntaxError as err:

2597

self.report_error(err, tb=False, is_error=False)

2598

continue

2599

2600

formats_to_download = list(format_selector({

2601

'formats': formats,

2602

'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),

2603

'incomplete_formats': (

2604

# All formats are video-only or

2605

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2606

# all formats are audio-only

2607

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),

2608

}))

2609

if interactive_format_selection and not formats_to_download:

2610

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2615

if not self.params.get('ignore_no_formats_error'):

2616

raise ExtractorError(

2617

'Requested format is not available. Use --list-formats for a list of available formats',

2618

expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])

2619

self.report_warning('Requested format is not available')

2620

# Process what we can, even without any available formats.

2621

formats_to_download = [{}]

2622

2623

best_format = formats_to_download[-1]

if download:

if best_format:

self.to_screen(

f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '

2628

+ ', '.join([f['format_id'] for f in formats_to_download]))

2629

max_downloads_reached = False

2630

for i, fmt in enumerate(formats_to_download):

2631

formats_to_download[i] = new_info = self._copy_infodict(info_dict)

2632

new_info.update(fmt)

2633

try:

2634

self.process_info(new_info)

2635

except MaxDownloadsReached:

2636

max_downloads_reached = True

2637

# Remove copied info

2638

for key, val in tuple(new_info.items()):

2639

if info_dict.get(key) == val:

2640

new_info.pop(key)

2641

if max_downloads_reached:

2642

break

2643

2644

write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}

2645

assert write_archive.issubset({True, False, 'ignore'})

2646

if True in write_archive and False not in write_archive:

2647

self.record_download_archive(info_dict)

2648

2649

info_dict['requested_downloads'] = formats_to_download

2650

info_dict = self.run_all_pps('after_video', info_dict)

2651

if max_downloads_reached:

2652

raise MaxDownloadsReached()

2653

2654

# We update the info dict with the selected best quality format (backwards compatibility)

2655

info_dict.update(best_format)

2656

return info_dict

2657

2658

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2659

"""Select the requested subtitles and their format"""

2660

available_subs, normal_sub_langs = {}, []

2661

if normal_subtitles and self.params.get('writesubtitles'):

2662

available_subs.update(normal_subtitles)

2663

normal_sub_langs = tuple(normal_subtitles.keys())

2664

if automatic_captions and self.params.get('writeautomaticsub'):

2665

for lang, cap_info in automatic_captions.items():

2666

if lang not in available_subs:

2667

available_subs[lang] = cap_info

2668

2669

if (not self.params.get('writesubtitles') and not

2670

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = tuple(available_subs.keys())

2675

if self.params.get('allsubtitles', False):

2676

requested_langs = all_sub_langs

2677

elif self.params.get('subtitleslangs', False):

2678

# A list is used so that the order of languages will be the same as

2679

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2680

requested_langs = []

2681

for lang_re in self.params.get('subtitleslangs'):

2682

discard = lang_re[0] == '-'

2683

if discard:

2684

lang_re = lang_re[1:]

if lang_re == 'all':

if discard:

requested_langs = []

else:

requested_langs.extend(all_sub_langs)

2690

continue

2691

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2692

if discard:

2693

for lang in current_langs:

2694

while lang in requested_langs:

2695

requested_langs.remove(lang)

2696

else:

2697

requested_langs.extend(current_langs)

2698

requested_langs = orderedSet(requested_langs)

2699

elif normal_sub_langs:

2700

requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]

2701

else:

2702

requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]

2703

if requested_langs:

2704

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2705

2706

formats_query = self.params.get('subtitlesformat', 'best')

2707

formats_preference = formats_query.split('/') if formats_query else []

2708

subs = {}

2709

for lang in requested_langs:

2710

formats = available_subs.get(lang)

2711

if formats is None:

2712

self.report_warning(f'{lang} subtitles not available for {video_id}')

2713

continue

2714

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2726

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2731

if info_dict is None:

2732

return

2733

info_copy = info_dict.copy()

2734

info_copy['formats_table'] = self.render_formats_table(info_dict)

2735

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2736

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2737

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2738

2739

def format_tmpl(tmpl):

2740

mobj = re.match(r'\w+(=?)$', tmpl)

2741

if mobj and mobj.group(1):

2742

return f'{tmpl[:-1]} = %({tmpl[:-1]})r'

elif mobj:

return f'%({tmpl})s'

return tmpl

for tmpl in self.params['forceprint'].get(key, []):

2748

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2749

2750

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2751

filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)

2752

tmpl = format_tmpl(tmpl)

2753

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2754

if self._ensure_dir_exists(filename):

2755

with open(filename, 'a', encoding='utf-8') as f:

2756

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2757

2758

def __forced_printings(self, info_dict, filename, incomplete):

2759

def print_mandatory(field, actual_field=None):

2760

if actual_field is None:

2761

actual_field = field

2762

if (self.params.get('force%s' % field, False)

2763

and (not incomplete or info_dict.get(actual_field) is not None)):

2764

self.to_stdout(info_dict[actual_field])

2765

2766

def print_optional(field):

2767

if (self.params.get('force%s' % field, False)

2768

and info_dict.get(field) is not None):

2769

self.to_stdout(info_dict[field])

2770

2771

info_dict = info_dict.copy()

2772

if filename is not None:

2773

info_dict['filename'] = filename

2774

if info_dict.get('requested_formats') is not None:

2775

# For RTMP URLs, also include the playpath

2776

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2777

elif info_dict.get('url'):

2778

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2779

2780

if (self.params.get('forcejson')

2781

or self.params['forceprint'].get('video')

2782

or self.params['print_to_file'].get('video')):

2783

self.post_extract(info_dict)

2784

self._forceprint('video', info_dict)

2785

2786

print_mandatory('title')

2787

print_mandatory('id')

2788

print_mandatory('url', 'urls')

2789

print_optional('thumbnail')

2790

print_optional('description')

2791

print_optional('filename')

2792

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2793

self.to_stdout(formatSeconds(info_dict['duration']))

2794

print_mandatory('format')

2795

2796

if self.params.get('forcejson'):

2797

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2798

2799

def dl(self, name, info, subtitle=False, test=False):

2800

if not info.get('url'):

2801

self.raise_no_formats(info, True)

2802

2803

if test:

2804

verbose = self.params.get('verbose')

2805

params = {

2806

'test': True,

2807

'quiet': self.params.get('quiet') or not verbose,

2808

'verbose': verbose,

2809

'noprogress': not verbose,

2810

'nopart': True,

2811

'skip_unavailable_fragments': False,

2812

'keep_fragments': False,

2813

'overwrites': True,

2814

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2819

if not test:

2820

for ph in self._progress_hooks:

2821

fd.add_progress_hook(ph)

2822

urls = '", "'.join(

2823

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2824

for f in info.get('requested_formats', []) or [info])

2825

self.write_debug('Invoking downloader on "%s"' % urls)

2826

2827

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2828

# But it may contain objects that are not deep-copyable

2829

new_info = self._copy_infodict(info)

2830

if new_info.get('http_headers') is None:

2831

new_info['http_headers'] = self._calc_headers(new_info)

2832

return fd.download(name, new_info, subtitle)

2833

2834

def existing_file(self, filepaths, *, default_overwrite=True):

2835

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2836

if existing_files and not self.params.get('overwrites', default_overwrite):

2837

return existing_files[0]

2838

2839

for file in existing_files:

2840

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2845

"""Process a single resolved IE result. (Modifies it in-place)"""

2846

2847

assert info_dict.get('_type', 'video') == 'video'

2848

original_infodict = info_dict

2849

2850

if 'format' not in info_dict and 'ext' in info_dict:

2851

info_dict['format'] = info_dict['ext']

2852

2853

# This is mostly just for backward compatibility of process_info

2854

# As a side-effect, this allows for format-specific filters

2855

if self._match_entry(info_dict) is not None:

2856

info_dict['__write_download_archive'] = 'ignore'

2857

return

2858

2859

# Does nothing under normal operation - for backward compatibility of process_info

2860

self.post_extract(info_dict)

2861

self._num_downloads += 1

2862

2863

# info_dict['_filename'] needs to be set for backward compatibility

2864

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2865

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2870

2871

if self.params.get('simulate'):

2872

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2873

return

2874

2875

if full_filename is None:

2876

return

2877

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2878

return

2879

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2880

return

2881

2882

if self._write_description('video', info_dict,

2883

self.prepare_filename(info_dict, 'description')) is None:

2884

return

2885

2886

sub_files = self._write_subtitles(info_dict, temp_filename)

2887

if sub_files is None:

2888

return

2889

files_to_move.update(dict(sub_files))

2890

2891

thumb_files = self._write_thumbnails(

2892

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2893

if thumb_files is None:

2894

return

2895

files_to_move.update(dict(thumb_files))

2896

2897

infofn = self.prepare_filename(info_dict, 'infojson')

2898

_infojson_written = self._write_info_json('video', info_dict, infofn)

2899

if _infojson_written:

2900

info_dict['infojson_filename'] = infofn

2901

# For backward compatibility, even though it was a private field

2902

info_dict['__infojson_filename'] = infofn

2903

elif _infojson_written is None:

2904

return

2905

2906

# Note: Annotations are deprecated

2907

annofn = None

2908

if self.params.get('writeannotations', False):

2909

annofn = self.prepare_filename(info_dict, 'annotation')

2910

if annofn:

2911

if not self._ensure_dir_exists(encodeFilename(annofn)):

2912

return

2913

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2914

self.to_screen('[info] Video annotations are already present')

2915

elif not info_dict.get('annotations'):

2916

self.report_warning('There are no annotations to write.')

2917

else:

2918

try:

2919

self.to_screen('[info] Writing video annotations to: ' + annofn)

2920

with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2921

annofile.write(info_dict['annotations'])

2922

except (KeyError, TypeError):

2923

self.report_warning('There are no annotations to write.')

2924

except OSError:

2925

self.report_error('Cannot write annotations file: ' + annofn)

2926

return

2927

2928

# Write internet shortcut files

2929

def _write_link_file(link_type):

2930

url = try_get(info_dict['webpage_url'], iri_to_uri)

2931

if not url:

2932

self.report_warning(

2933

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

2934

return True

2935

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

2936

if not self._ensure_dir_exists(encodeFilename(linkfn)):

2937

return False

2938

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2939

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

2940

return True

2941

try:

2942

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

2943

with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

2944

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

2945

template_vars = {'url': url}

2946

if link_type == 'desktop':

2947

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

2948

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

2949

except OSError:

2950

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

2956

'webloc': self.params.get('writewebloclink'),

2957

'desktop': self.params.get('writedesktoplink'),

2958

}

2959

if self.params.get('writelink'):

2960

link_type = ('webloc' if sys.platform == 'darwin'

2961

else 'desktop' if sys.platform.startswith('linux')

2962

else 'url')

2963

write_links[link_type] = True

2964

2965

if any(should_write and not _write_link_file(link_type)

2966

for link_type, should_write in write_links.items()):

2967

return

2968

2969

def replace_info_dict(new_info):

2970

nonlocal info_dict

2971

if new_info == info_dict:

2972

return

2973

info_dict.clear()

2974

info_dict.update(new_info)

2975

2976

try:

2977

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2978

replace_info_dict(new_info)

2979

except PostProcessingError as err:

2980

self.report_error('Preprocessing: %s' % str(err))

2981

return

2982

2983

if self.params.get('skip_download'):

2984

info_dict['filepath'] = temp_filename

2985

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2986

info_dict['__files_to_move'] = files_to_move

2987

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

2988

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2989

else:

2990

# Download

2991

info_dict.setdefault('__postprocessors', [])

2992

try:

2993

2994

def existing_video_file(*filepaths):

2995

ext = info_dict.get('ext')

2996

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

2997

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

2998

default_overwrite=False)

2999

if file:

3000

info_dict['ext'] = os.path.splitext(file)[1][1:]

return file

success = True

if info_dict.get('requested_formats') is not None:

3005

3006

def compatible_formats(formats):

3007

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

3008

video_formats = [format for format in formats if format.get('vcodec') != 'none']

3009

audio_formats = [format for format in formats if format.get('acodec') != 'none']

3010

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = {format.get('ext') for format in formats}

3015

COMPATIBLE_EXTS = (

3016

{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},

3017

{'webm'},

3018

)

3019

for ext_sets in COMPATIBLE_EXTS:

3020

if ext_sets.issuperset(exts):

3021

return True

3022

# TODO: Check acodec/vcodec

3023

return False

3024

3025

requested_formats = info_dict['requested_formats']

3026

old_ext = info_dict['ext']

3027

if self.params.get('merge_output_format') is None:

3028

if not compatible_formats(requested_formats):

3029

info_dict['ext'] = 'mkv'

3030

self.report_warning(

3031

'Requested formats are incompatible for merge and will be merged into mkv')

3032

if (info_dict['ext'] == 'webm'

3033

and info_dict.get('thumbnails')

3034

# check with type instead of pp_key, __name__, or isinstance

3035

# since we dont want any custom PPs to trigger this

3036

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):

3037

info_dict['ext'] = 'mkv'

3038

self.report_warning(

3039

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3040

new_ext = info_dict['ext']

3041

3042

def correct_ext(filename, ext=new_ext):

3043

if filename == '-':

3044

return filename

3045

filename_real_ext = os.path.splitext(filename)[1][1:]

3046

filename_wo_ext = (

3047

os.path.splitext(filename)[0]

3048

if filename_real_ext in (old_ext, new_ext)

3049

else filename)

3050

return f'{filename_wo_ext}.{ext}'

3051

3052

# Ensure filename always has a correct extension for successful merge

3053

full_filename = correct_ext(full_filename)

3054

temp_filename = correct_ext(temp_filename)

3055

dl_filename = existing_video_file(full_filename, temp_filename)

3056

info_dict['__real_download'] = False

3057

3058

downloaded = []

3059

merger = FFmpegMergerPP(self)

3060

3061

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3062

if dl_filename is not None:

3063

self.report_file_already_downloaded(dl_filename)

3064

elif fd:

3065

for f in requested_formats if fd != FFmpegFD else []:

3066

f['filepath'] = fname = prepend_extension(

3067

correct_ext(temp_filename, info_dict['ext']),

3068

'f%s' % f['format_id'], info_dict['ext'])

3069

downloaded.append(fname)

3070

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3071

success, real_download = self.dl(temp_filename, info_dict)

3072

info_dict['__real_download'] = real_download

3073

else:

3074

if self.params.get('allow_unplayable_formats'):

3075

self.report_warning(

3076

'You have requested merging of multiple formats '

3077

'while also allowing unplayable formats to be downloaded. '

3078

'The formats won\'t be merged to prevent data corruption.')

3079

elif not merger.available:

3080

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3081

if not self.params.get('ignoreerrors'):

3082

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3083

return

3084

self.report_warning(f'{msg}. The formats won\'t be merged')

3085

3086

if temp_filename == '-':

3087

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3088

else 'but the formats are incompatible for simultaneous download' if merger.available

3089

else 'but ffmpeg is not installed')

3090

self.report_warning(

3091

f'You have requested downloading multiple formats to stdout {reason}. '

3092

'The formats will be streamed one after the other')

3093

fname = temp_filename

3094

for f in requested_formats:

3095

new_info = dict(info_dict)

3096

del new_info['requested_formats']

3097

new_info.update(f)

3098

if temp_filename != '-':

3099

fname = prepend_extension(

3100

correct_ext(temp_filename, new_info['ext']),

3101

'f%s' % f['format_id'], new_info['ext'])

3102

if not self._ensure_dir_exists(fname):

3103

return

3104

f['filepath'] = fname

3105

downloaded.append(fname)

3106

partial_success, real_download = self.dl(fname, new_info)

3107

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3108

success = success and partial_success

3109

3110

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3111

info_dict['__postprocessors'].append(merger)

3112

info_dict['__files_to_merge'] = downloaded

3113

# Even if there were no downloads, it is being merged only now

3114

info_dict['__real_download'] = True

3115

else:

3116

for file in downloaded:

3117

files_to_move[file] = None

3118

else:

3119

# Just a single file

3120

dl_filename = existing_video_file(full_filename, temp_filename)

3121

if dl_filename is None or dl_filename == temp_filename:

3122

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3123

# So we should try to resume the download

3124

success, real_download = self.dl(temp_filename, info_dict)

3125

info_dict['__real_download'] = real_download

3126

else:

3127

self.report_file_already_downloaded(dl_filename)

3128

3129

dl_filename = dl_filename or temp_filename

3130

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3131

3132

except network_exceptions as err:

3133

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3134

return

3135

except OSError as err:

3136

raise UnavailableVideoError(err)

3137

except (ContentTooShortError, ) as err:

3138

self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')

3139

return

3140

3141

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3146

vid = info_dict['id']

3147

3148

if fixup_policy in ('ignore', 'never'):

3149

return

3150

elif fixup_policy == 'warn':

3151

do_fixup = False

3152

elif fixup_policy != 'force':

3153

assert fixup_policy in ('detect_or_warn', None)

3154

if not info_dict.get('__real_download'):

3155

do_fixup = False

3156

3157

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3166

else:

3167

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3168

3169

stretched_ratio = info_dict.get('stretched_ratio')

3170

ffmpeg_fixup(

3171

stretched_ratio not in (1, None),

3172

f'Non-uniform pixel ratio {stretched_ratio}',

3173

FFmpegFixupStretchedPP)

3174

3175

ffmpeg_fixup(

3176

(info_dict.get('requested_formats') is None

3177

and info_dict.get('container') == 'm4a_dash'

3178

and info_dict.get('ext') == 'm4a'),

3179

'writing DASH m4a. Only some players support this container',

3180

FFmpegFixupM4aPP)

3181

3182

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3183

downloader = downloader.__name__ if downloader else None

3184

3185

if info_dict.get('requested_formats') is None: # Not necessary if doing merger

3186

ffmpeg_fixup(downloader == 'HlsFD',

3187

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3188

FFmpegFixupM3u8PP)

3189

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3190

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3191

3192

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3193

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3198

except PostProcessingError as err:

3199

self.report_error('Postprocessing: %s' % str(err))

3200

return

3201

try:

3202

for ph in self._post_hooks:

3203

ph(info_dict['filepath'])

3204

except Exception as err:

3205

self.report_error('post hooks: %s' % str(err))

3206

return

3207

info_dict['__write_download_archive'] = True

3208

3209

if self.params.get('force_write_download_archive'):

3210

info_dict['__write_download_archive'] = True

3211

3212

# Make sure the info_dict was modified in-place

3213

assert info_dict is original_infodict

3214

3215

max_downloads = self.params.get('max_downloads')

3216

if max_downloads is not None and self._num_downloads >= int(max_downloads):

3217

raise MaxDownloadsReached()

3218

3219

def __download_wrapper(self, func):

3220

@functools.wraps(func)

3221

def wrapper(*args, **kwargs):

3222

try:

3223

res = func(*args, **kwargs)

3224

except UnavailableVideoError as e:

3225

self.report_error(e)

3226

except MaxDownloadsReached as e:

3227

self.to_screen(f'[info] {e}')

3228

raise

3229

except DownloadCancelled as e:

3230

self.to_screen(f'[info] {e}')

3231

if not self.params.get('break_per_url'):

3232

raise

3233

else:

3234

if self.params.get('dump_single_json', False):

3235

self.post_extract(res)

3236

self.to_stdout(json.dumps(self.sanitize_info(res)))

3237

return wrapper

3238

3239

def download(self, url_list):

3240

"""Download a given list of URLs."""

3241

url_list = variadic(url_list) # Passing a single URL is a common mistake

3242

outtmpl = self.outtmpl_dict['default']

3243

if (len(url_list) > 1

3244

and outtmpl != '-'

3245

and '%' not in outtmpl

3246

and self.params.get('max_downloads') != 1):

3247

raise SameFileError(outtmpl)

3248

3249

for url in url_list:

3250

self.__download_wrapper(self.extract_info)(

3251

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3252

3253

return self._download_retcode

3254

3255

def download_with_info_file(self, info_filename):

3256

with contextlib.closing(fileinput.FileInput(

3257

[info_filename], mode='r',

3258

openhook=fileinput.hook_encoded('utf-8'))) as f:

3259

# FileInput doesn't have a read method, we can't call json.load

3260

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3261

try:

3262

self.__download_wrapper(self.process_ie_result)(info, download=True)

3263

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3264

if not isinstance(e, EntryNotInPlaylist):

3265

self.to_stderr('\r')

3266

webpage_url = info.get('webpage_url')

3267

if webpage_url is not None:

3268

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3269

return self.download([webpage_url])

3270

else:

3271

raise

3272

return self._download_retcode

3273

3274

@staticmethod

3275

def sanitize_info(info_dict, remove_private_keys=False):

3276

''' Sanitize the infodict for converting to json '''

3277

if info_dict is None:

3278

return info_dict

3279

info_dict.setdefault('epoch', int(time.time()))

3280

info_dict.setdefault('_type', 'video')

3281

3282

if remove_private_keys:

3283

reject = lambda k, v: v is None or k.startswith('__') or k in {

3284

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3285

'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',

3286

}

3287

else:

3288

reject = lambda k, v: False

3289

3290

def filter_fn(obj):

3291

if isinstance(obj, dict):

3292

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3293

elif isinstance(obj, (list, tuple, set, LazyList)):

3294

return list(map(filter_fn, obj))

3295

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3301

3302

@staticmethod

3303

def filter_requested_info(info_dict, actually_filter=True):

3304

''' Alias of sanitize_info for backward compatibility '''

3305

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3306

3307

@staticmethod

3308

def post_extract(info_dict):

3309

def actual_post_extract(info_dict):

3310

if info_dict.get('_type') in ('playlist', 'multi_video'):

3311

for video_dict in info_dict.get('entries', {}):

3312

actual_post_extract(video_dict or {})

3313

return

3314

3315

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3316

info_dict.update(post_extractor())

3317

3318

actual_post_extract(info_dict or {})

3319

3320

def run_pp(self, pp, infodict):

3321

files_to_delete = []

3322

if '__files_to_move' not in infodict:

3323

infodict['__files_to_move'] = {}

3324

try:

3325

files_to_delete, infodict = pp.run(infodict)

3326

except PostProcessingError as e:

3327

# Must be True and not 'only_download'

3328

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3334

return infodict

3335

if self.params.get('keepvideo', False):

3336

for f in files_to_delete:

3337

infodict['__files_to_move'].setdefault(f, '')

3338

else:

3339

for old_filename in set(files_to_delete):

3340

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

3341

try:

3342

os.remove(encodeFilename(old_filename))

3343

except OSError:

3344

self.report_warning('Unable to remove downloaded original file')

3345

if old_filename in infodict['__files_to_move']:

3346

del infodict['__files_to_move'][old_filename]

3347

return infodict

3348

3349

def run_all_pps(self, key, info, *, additional_pps=None):

3350

self._forceprint(key, info)

3351

for pp in (additional_pps or []) + self._pps[key]:

3352

info = self.run_pp(pp, info)

3353

return info

3354

3355

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3356

info = dict(ie_info)

3357

info['__files_to_move'] = files_to_move or {}

3358

info = self.run_all_pps(key, info)

3359

return info, info.pop('__files_to_move', None)

3360

3361

def post_process(self, filename, info, files_to_move=None):

3362

"""Run all the postprocessors on the given file."""

3363

info['filepath'] = filename

3364

info['__files_to_move'] = files_to_move or {}

3365

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3366

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3367

del info['__files_to_move']

3368

return self.run_all_pps('after_move', info)

3369

3370

def _make_archive_id(self, info_dict):

3371

video_id = info_dict.get('id')

3372

if not video_id:

3373

return

3374

# Future-proof against any change in case

3375

# and backwards compatibility with prior versions

3376

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3377

if extractor is None:

3378

url = str_or_none(info_dict.get('url'))

3379

if not url:

3380

return

3381

# Try to find matching extractor for the URL and take its ie_key

3382

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return f'{extractor.lower()} {video_id}'

3389

3390

def in_download_archive(self, info_dict):

3391

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

3396

if not vid_id:

3397

return False # Incomplete video information

3398

3399

return vid_id in self.archive

3400

3401

def record_download_archive(self, info_dict):

3402

fn = self.params.get('download_archive')

3403

if fn is None:

3404

return

3405

vid_id = self._make_archive_id(info_dict)

3406

assert vid_id

3407

self.write_debug(f'Adding to archive: {vid_id}')

3408

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3409

archive_file.write(vid_id + '\n')

3410

self.archive.add(vid_id)

3411

3412

@staticmethod

3413

def format_resolution(format, default='unknown'):

3414

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3415

return 'audio only'

3416

if format.get('resolution') is not None:

3417

return format['resolution']

3418

if format.get('width') and format.get('height'):

3419

return '%dx%d' % (format['width'], format['height'])

3420

elif format.get('height'):

3421

return '%sp' % format['height']

3422

elif format.get('width'):

3423

return '%dx?' % format['width']

3424

return default

3425

3426

def _list_format_headers(self, *headers):

3427

if self.params.get('listformats_table', True) is not False:

3428

return [self._format_screen(header, self.Styles.HEADERS) for header in headers]

3429

return headers

3430

3431

def _format_note(self, fdict):

3432

res = ''

3433

if fdict.get('ext') in ['f4f', 'f4m']:

3434

res += '(unsupported)'

3435

if fdict.get('language'):

3436

if res:

3437

res += ' '

3438

res += '[%s]' % fdict['language']

3439

if fdict.get('format_note') is not None:

3440

if res:

3441

res += ' '

3442

res += fdict['format_note']

3443

if fdict.get('tbr') is not None:

3444

if res:

3445

res += ', '

3446

res += '%4dk' % fdict['tbr']

3447

if fdict.get('container') is not None:

3448

if res:

3449

res += ', '

3450

res += '%s container' % fdict['container']

3451

if (fdict.get('vcodec') is not None

3452

and fdict.get('vcodec') != 'none'):

3453

if res:

3454

res += ', '

3455

res += fdict['vcodec']

3456

if fdict.get('vbr') is not None:

3457

res += '@'

3458

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3459

res += 'video@'

3460

if fdict.get('vbr') is not None:

3461

res += '%4dk' % fdict['vbr']

3462

if fdict.get('fps') is not None:

3463

if res:

3464

res += ', '

3465

res += '%sfps' % fdict['fps']

3466

if fdict.get('acodec') is not None:

3467

if res:

3468

res += ', '

3469

if fdict['acodec'] == 'none':

3470

res += 'video only'

3471

else:

3472

res += '%-5s' % fdict['acodec']

3473

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3478

res += '@%3dk' % fdict['abr']

3479

if fdict.get('asr') is not None:

3480

res += ' (%5dHz)' % fdict['asr']

3481

if fdict.get('filesize') is not None:

3482

if res:

3483

res += ', '

3484

res += format_bytes(fdict['filesize'])

3485

elif fdict.get('filesize_approx') is not None:

3486

if res:

3487

res += ', '

3488

res += '~' + format_bytes(fdict['filesize_approx'])

3489

return res

3490

3491

def render_formats_table(self, info_dict):

3492

if not info_dict.get('formats') and not info_dict.get('url'):

3493

return None

3494

3495

formats = info_dict.get('formats', [info_dict])

3496

if not self.params.get('listformats_table', True) is not False:

3497

table = [

3498

[

3499

format_field(f, 'format_id'),

3500

format_field(f, 'ext'),

3501

self.format_resolution(f),

3502

self._format_note(f)

3503

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3504

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3505

3506

delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3507

table = [

3508

[

3509

self._format_screen(format_field(f, 'format_id'), self.Styles.ID),

3510

format_field(f, 'ext'),

3511

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3512

format_field(f, 'fps', '\t%d'),

3513

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3514

delim,

3515

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3516

format_field(f, 'tbr', '\t%dk'),

3517

shorten_protocol_name(f.get('protocol', '')),

3518

delim,

3519

format_field(f, 'vcodec', default='unknown').replace(

3520

'none', 'images' if f.get('acodec') == 'none'

3521

else self._format_screen('audio only', self.Styles.SUPPRESS)),

3522

format_field(f, 'vbr', '\t%dk'),

3523

format_field(f, 'acodec', default='unknown').replace(

3524

'none', '' if f.get('vcodec') == 'none'

3525

else self._format_screen('video only', self.Styles.SUPPRESS)),

3526

format_field(f, 'abr', '\t%dk'),

3527

format_field(f, 'asr', '\t%dHz'),

3528

join_nonempty(

3529

self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3530

format_field(f, 'language', '[%s]'),

3531

join_nonempty(format_field(f, 'format_note'),

3532

format_field(f, 'container', ignore=(None, f.get('ext'))),

3533

delim=', '),

3534

delim=' '),

3535

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3536

header_line = self._list_format_headers(

3537

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3538

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3539

3540

return render_table(

3541

header_line, table, hide_empty=True,

3542

delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3543

3544

def render_thumbnails_table(self, info_dict):

3545

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3550

[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])

3551

3552

def render_subtitles_table(self, video_id, subtitles):

3553

def _row(lang, formats):

3554

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3555

if len(set(names)) == 1:

3556

names = [] if names[0] == 'unknown' else names[:1]

3557

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3563

[_row(lang, formats) for lang, formats in subtitles.items()],

3564

hide_empty=True)

3565

3566

def __list_table(self, video_id, name, func, *args):

3567

table = func(*args)

3568

if not table:

3569

self.to_screen(f'{video_id} has no {name}')

3570

return

3571

self.to_screen(f'[info] Available {name} for {video_id}:')

3572

self.to_stdout(table)

3573

3574

def list_formats(self, info_dict):

3575

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3576

3577

def list_thumbnails(self, info_dict):

3578

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3579

3580

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3581

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3582

3583

def urlopen(self, req):

3584

""" Start an HTTP download """

3585

if isinstance(req, str):

3586

req = sanitized_Request(req)

3587

return self._opener.open(req, timeout=self._socket_timeout)

3588

3589

def print_debug_header(self):

3590

if not self.params.get('verbose'):

3591

return

3592

3593

def get_encoding(stream):

3594

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3595

if not supports_terminal_sequences(stream):

3596

from .compat import WINDOWS_VT_MODE # Must be imported locally

3597

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3598

return ret

3599

3600

encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (

3601

locale.getpreferredencoding(),

3602

sys.getfilesystemencoding(),

3603

get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),

3604

self.get_encoding())

3605

3606

logger = self.params.get('logger')

3607

if logger:

3608

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3609

write_debug(encoding_str)

3610

else:

3611

write_string(f'[debug] {encoding_str}\n', encoding=None)

3612

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3613

3614

source = detect_variant()

3615

write_debug(join_nonempty(

3616

'yt-dlp version', __version__,

3617

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3618

'' if source == 'unknown' else f'({source})',

3619

delim=' '))

3620

if not _LAZY_LOADER:

3621

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3622

write_debug('Lazy loading extractors is forcibly disabled')

3623

else:

3624

write_debug('Lazy loading extractors is disabled')

3625

if plugin_extractors or plugin_postprocessors:

3626

write_debug('Plugins: %s' % [

3627

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3628

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3629

if self.params.get('compat_opts'):

3630

write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))

3631

3632

if source == 'source':

3633

try:

3634

sp = Popen(

3635

['git', 'rev-parse', '--short', 'HEAD'],

3636

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3637

cwd=os.path.dirname(os.path.abspath(__file__)))

3638

out, err = sp.communicate_or_kill()

3639

out = out.decode().strip()

3640

if re.match('[0-9a-f]+', out):

3641

write_debug('Git HEAD: %s' % out)

3642

except Exception:

3643

with contextlib.suppress(Exception):

3644

sys.exc_clear()

3645

3646

def python_implementation():

3647

impl_name = platform.python_implementation()

3648

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3649

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3650

return impl_name

3651

3652

write_debug('Python version %s (%s %s) - %s' % (

3653

platform.python_version(),

3654

python_implementation(),

3655

platform.architecture()[0],

3656

platform_name()))

3657

3658

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3659

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3660

if ffmpeg_features:

3661

exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))

3662

3663

exe_versions['rtmpdump'] = rtmpdump_version()

3664

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3665

exe_str = ', '.join(

3666

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3667

) or 'none'

3668

write_debug('exe versions: %s' % exe_str)

3669

3670

from .dependencies import available_dependencies

3671

3672

write_debug('Optional libraries: %s' % (', '.join(sorted({

3673

module.__name__.split('.')[0] for module in available_dependencies.values()

})) or 'none'))

self._setup_opener()

proxy_map = {}

for handler in self._opener.handlers:

3679

if hasattr(handler, 'proxies'):

3680

proxy_map.update(handler.proxies)

3681

write_debug(f'Proxy map: {proxy_map}')

3682

3683

# Not implemented

3684

if False and self.params.get('call_home'):

3685

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3686

write_debug('Public IP address: %s' % ipaddr)

3687

latest_version = self.urlopen(

3688

'https://yt-dl.org/latest/version').read().decode('utf-8')

3689

if version_tuple(latest_version) > version_tuple(__version__):

3690

self.report_warning(

3691

'You are using an outdated version (newest version: %s)! '

3692

'See https://yt-dl.org/update if you need help updating.' %

3693

latest_version)

3694

3695

def _setup_opener(self):

3696

if hasattr(self, '_opener'):

3697

return

3698

timeout_val = self.params.get('socket_timeout')

3699

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3700

3701

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3702

opts_cookiefile = self.params.get('cookiefile')

3703

opts_proxy = self.params.get('proxy')

3704

3705

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3706

3707

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3708

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3713

else:

3714

proxies = compat_urllib_request.getproxies()

3715

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3716

if 'http' in proxies and 'https' not in proxies:

3717

proxies['https'] = proxies['http']

3718

proxy_handler = PerRequestProxyHandler(proxies)

3719

3720

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3721

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3722

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3723

redirect_handler = YoutubeDLRedirectHandler()

3724

data_handler = urllib.request.DataHandler()

3725

3726

# When passing our own FileHandler instance, build_opener won't add the

3727

# default FileHandler and allows us to disable the file protocol, which

3728

# can be used for malicious purposes (see

3729

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3730

file_handler = compat_urllib_request.FileHandler()

3731

3732

def file_open(*args, **kwargs):

3733

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3734

file_handler.file_open = file_open

3735

3736

opener = compat_urllib_request.build_opener(

3737

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3738

3739

# Delete the default user-agent header, which would otherwise apply in

3740

# cases where our custom HTTP handler doesn't come into play

3741

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3742

opener.addheaders = []

3743

self._opener = opener

3744

3745

def encode(self, s):

3746

if isinstance(s, bytes):

3747

return s # Already encoded

3748

3749

try:

3750

return s.encode(self.get_encoding())

3751

except UnicodeEncodeError as err:

3752

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3753

raise

3754

3755

def get_encoding(self):

3756

encoding = self.params.get('encoding')

3757

if encoding is None:

3758

encoding = preferredencoding()

3759

return encoding

3760

3761

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3762

''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''

3763

if overwrite is None:

3764

overwrite = self.params.get('overwrites', True)

3765

if not self.params.get('writeinfojson'):

3766

return False

3767

elif not infofn:

3768

self.write_debug(f'Skipping writing {label} infojson')

3769

return False

3770

elif not self._ensure_dir_exists(infofn):

3771

return None

3772

elif not overwrite and os.path.exists(infofn):

3773

self.to_screen(f'[info] {label.title()} metadata is already present')

3774

return 'exists'

3775

3776

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3777

try:

3778

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3779

return True

3780

except OSError:

3781

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

3782

return None

3783

3784

def _write_description(self, label, ie_result, descfn):

3785

''' Write description and returns True = written, False = skip, None = error '''

3786

if not self.params.get('writedescription'):

3787

return False

3788

elif not descfn:

3789

self.write_debug(f'Skipping writing {label} description')

3790

return False

3791

elif not self._ensure_dir_exists(descfn):

3792

return None

3793

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3794

self.to_screen(f'[info] {label.title()} description is already present')

3795

elif ie_result.get('description') is None:

3796

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3801

with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3802

descfile.write(ie_result['description'])

3803

except OSError:

3804

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3809

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3810

ret = []

3811

subtitles = info_dict.get('requested_subtitles')

3812

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3813

# subtitles download errors are already managed as troubles in relevant IE

3814

# that way it will silently go on when used with unsupporting IE

3815

return ret

3816

3817

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3818

if not sub_filename_base:

3819

self.to_screen('[info] Skipping writing video subtitles')

3820

return ret

3821

for sub_lang, sub_info in subtitles.items():

3822

sub_format = sub_info['ext']

3823

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3824

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3825

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3826

if existing_sub:

3827

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3828

sub_info['filepath'] = existing_sub

3829

ret.append((existing_sub, sub_filename_final))

3830

continue

3831

3832

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3833

if sub_info.get('data') is not None:

3834

try:

3835

# Use newline='' to prevent conversion of newline characters

3836

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3837

with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3838

subfile.write(sub_info['data'])

3839

sub_info['filepath'] = sub_filename

3840

ret.append((sub_filename, sub_filename_final))

3841

continue

3842

except OSError:

3843

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3848

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3849

self.dl(sub_filename, sub_copy, subtitle=True)

3850

sub_info['filepath'] = sub_filename

3851

ret.append((sub_filename, sub_filename_final))

3852

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3853

msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'

3854

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

3855

if not self.params.get('ignoreerrors'):

3856

self.report_error(msg)

3857

raise DownloadError(msg)

3858

self.report_warning(msg)

3859

return ret

3860

3861

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3862

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3863

write_all = self.params.get('write_all_thumbnails', False)

3864

thumbnails, ret = [], []

3865

if write_all or self.params.get('writethumbnail', False):

3866

thumbnails = info_dict.get('thumbnails') or []

3867

multiple = write_all and len(thumbnails) > 1

3868

3869

if thumb_filename_base is None:

3870

thumb_filename_base = filename

3871

if thumbnails and not thumb_filename_base:

3872

self.write_debug(f'Skipping writing {label} thumbnail')

3873

return ret

3874

3875

for idx, t in list(enumerate(thumbnails))[::-1]:

3876

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3877

thumb_display_id = f'{label} thumbnail {t["id"]}'

3878

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3879

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3880

3881

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

3882

if existing_thumb:

3883

self.to_screen('[info] %s is already present' % (

3884

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3885

t['filepath'] = existing_thumb

3886

ret.append((existing_thumb, thumb_filename_final))

3887

else:

3888

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3889

try:

3890

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

3891

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3892

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3893

shutil.copyfileobj(uf, thumbf)

3894

ret.append((thumb_filename, thumb_filename_final))

3895

t['filepath'] = thumb_filename

3896

except network_exceptions as err:

3897

thumbnails.pop(idx)

3898

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3899

if ret and not write_all:

3900

break

3901

return ret