jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import datetime
	9	import errno
	10	import fileinput
	11	import functools
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import tempfile
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28	import unicodedata
	29
	30	from enum import Enum
	31	from string import ascii_letters
	32
	33	from .compat import (
	34	compat_basestring,
	35	compat_brotli,
	36	compat_get_terminal_size,
	37	compat_kwargs,
	38	compat_numeric_types,
	39	compat_os_name,
	40	compat_pycrypto_AES,
	41	compat_shlex_quote,
	42	compat_str,
	43	compat_tokenize_tokenize,
	44	compat_urllib_error,
	45	compat_urllib_request,
	46	compat_urllib_request_DataHandler,
	47	windows_enable_vt_mode,
	48	)
	49	from .cookies import load_cookies
	50	from .utils import (
	51	age_restricted,
	52	args_to_str,
	53	ContentTooShortError,
	54	date_from_str,
	55	DateRange,
	56	DEFAULT_OUTTMPL,
	57	determine_ext,
	58	determine_protocol,
	59	DownloadCancelled,
	60	DownloadError,
	61	encode_compat_str,
	62	encodeFilename,
	63	EntryNotInPlaylist,
	64	error_to_compat_str,
	65	ExistingVideoReached,
	66	expand_path,
	67	ExtractorError,
	68	float_or_none,
	69	format_bytes,
	70	format_field,
	71	format_decimal_suffix,
	72	formatSeconds,
	73	GeoRestrictedError,
	74	get_domain,
	75	HEADRequest,
	76	InAdvancePagedList,
	77	int_or_none,
	78	iri_to_uri,
	79	ISO3166Utils,
	80	join_nonempty,
	81	LazyList,
	82	LINK_TEMPLATES,
	83	locked_file,
	84	make_dir,
	85	make_HTTPS_handler,
	86	MaxDownloadsReached,
	87	merge_headers,
	88	network_exceptions,
	89	number_of_digits,
	90	orderedSet,
	91	OUTTMPL_TYPES,
	92	PagedList,
	93	parse_filesize,
	94	PerRequestProxyHandler,
	95	platform_name,
	96	Popen,
	97	POSTPROCESS_WHEN,
	98	PostProcessingError,
	99	preferredencoding,
	100	prepend_extension,
	101	ReExtractInfo,
	102	register_socks_protocols,
	103	RejectedVideoReached,
	104	remove_terminal_sequences,
	105	render_table,
	106	replace_extension,
	107	SameFileError,
	108	sanitize_filename,
	109	sanitize_path,
	110	sanitize_url,
	111	sanitized_Request,
	112	std_headers,
	113	STR_FORMAT_RE_TMPL,
	114	STR_FORMAT_TYPES,
	115	str_or_none,
	116	strftime_or_none,
	117	subtitles_filename,
	118	supports_terminal_sequences,
	119	timetuple_from_msec,
	120	to_high_limit_path,
	121	traverse_obj,
	122	try_get,
	123	UnavailableVideoError,
	124	url_basename,
	125	variadic,
	126	version_tuple,
	127	write_json_file,
	128	write_string,
	129	YoutubeDLCookieProcessor,
	130	YoutubeDLHandler,
	131	YoutubeDLRedirectHandler,
	132	)
	133	from .cache import Cache
	134	from .minicurses import format_text
	135	from .extractor import (
	136	gen_extractor_classes,
	137	get_info_extractor,
	138	_LAZY_LOADER,
	139	_PLUGIN_CLASSES as plugin_extractors
	140	)
	141	from .extractor.openload import PhantomJSwrapper
	142	from .downloader import (
	143	FFmpegFD,
	144	get_suitable_downloader,
	145	shorten_protocol_name
	146	)
	147	from .downloader.rtmp import rtmpdump_version
	148	from .postprocessor import (
	149	get_postprocessor,
	150	EmbedThumbnailPP,
	151	FFmpegFixupDuplicateMoovPP,
	152	FFmpegFixupDurationPP,
	153	FFmpegFixupM3u8PP,
	154	FFmpegFixupM4aPP,
	155	FFmpegFixupStretchedPP,
	156	FFmpegFixupTimestampPP,
	157	FFmpegMergerPP,
	158	FFmpegPostProcessor,
	159	MoveFilesAfterDownloadPP,
	160	_PLUGIN_CLASSES as plugin_postprocessors
	161	)
	162	from .update import detect_variant
	163	from .version import __version__, RELEASE_GIT_HEAD
	164
	165	if compat_os_name == 'nt':
	166	import ctypes
	167
	168
	169	class YoutubeDL(object):
	170	"""YoutubeDL class.
	171
	172	YoutubeDL objects are the ones responsible of downloading the
	173	actual video file and writing it to disk if the user has requested
	174	it, among some other tasks. In most cases there should be one per
	175	program. As, given a video URL, the downloader doesn't know how to
	176	extract all the needed information, task that InfoExtractors do, it
	177	has to pass the URL to one of them.
	178
	179	For this, YoutubeDL objects have a method that allows
	180	InfoExtractors to be registered in a given order. When it is passed
	181	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	182	finds that reports being able to handle it. The InfoExtractor extracts
	183	all the information about the video or videos the URL refers to, and
	184	YoutubeDL process the extracted information, possibly using a File
	185	Downloader to download the video.
	186
	187	YoutubeDL objects accept a lot of parameters. In order not to saturate
	188	the object constructor with arguments, it receives a dictionary of
	189	options instead. These options are available through the params
	190	attribute for the InfoExtractors to use. The YoutubeDL also
	191	registers itself as the downloader in charge for the InfoExtractors
	192	that are added to it, so this is a "mutual registration".
	193
	194	Available options:
	195
	196	username: Username for authentication purposes.
	197	password: Password for authentication purposes.
	198	videopassword: Password for accessing a video.
	199	ap_mso: Adobe Pass multiple-system operator identifier.
	200	ap_username: Multiple-system operator account username.
	201	ap_password: Multiple-system operator account password.
	202	usenetrc: Use netrc for authentication instead.
	203	verbose: Print additional info to stdout.
	204	quiet: Do not print messages to stdout.
	205	no_warnings: Do not print out anything for warnings.
	206	forceprint: A dict with keys WHEN mapped to a list of templates to
	207	print to stdout. The allowed keys are video or any of the
	208	items in utils.POSTPROCESS_WHEN.
	209	For compatibility, a single list is also accepted
	210	print_to_file: A dict with keys WHEN (same as forceprint) mapped to
	211	a list of tuples with (template, filename)
	212	forceurl: Force printing final URL. (Deprecated)
	213	forcetitle: Force printing title. (Deprecated)
	214	forceid: Force printing ID. (Deprecated)
	215	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	216	forcedescription: Force printing description. (Deprecated)
	217	forcefilename: Force printing final filename. (Deprecated)
	218	forceduration: Force printing duration. (Deprecated)
	219	forcejson: Force printing info_dict as JSON.
	220	dump_single_json: Force printing the info_dict of the whole playlist
	221	(or video) as a single JSON line.
	222	force_write_download_archive: Force writing download archive regardless
	223	of 'skip_download' or 'simulate'.
	224	simulate: Do not download the video files. If unset (or None),
	225	simulate only if listsubtitles, listformats or list_thumbnails is used
	226	format: Video format code. see "FORMAT SELECTION" for more details.
	227	You can also pass a function. The function takes 'ctx' as
	228	argument and returns the formats to download.
	229	See "build_format_selector" for an implementation
	230	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	231	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	232	extracting metadata even if the video is not actually
	233	available for download (experimental)
	234	format_sort: A list of fields by which to sort the video formats.
	235	See "Sorting Formats" for more details.
	236	format_sort_force: Force the given format_sort. see "Sorting Formats"
	237	for more details.
	238	prefer_free_formats: Whether to prefer video formats with free containers
	239	over non-free ones of same quality.
	240	allow_multiple_video_streams: Allow multiple video streams to be merged
	241	into a single file
	242	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	243	into a single file
	244	check_formats Whether to test if the formats are downloadable.
	245	Can be True (check all), False (check none),
	246	'selected' (check selected formats),
	247	or None (check only if requested by extractor)
	248	paths: Dictionary of output paths. The allowed keys are 'home'
	249	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	250	outtmpl: Dictionary of templates for output names. Allowed keys
	251	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	252	For compatibility with youtube-dl, a single string can also be used
	253	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	254	restrictfilenames: Do not allow "&" and spaces in file names
	255	trim_file_name: Limit length of filename (extension excluded)
	256	windowsfilenames: Force the filenames to be windows compatible
	257	ignoreerrors: Do not stop on download/postprocessing errors.
	258	Can be 'only_download' to ignore only download errors.
	259	Default is 'only_download' for CLI, but False for API
	260	skip_playlist_after_errors: Number of allowed failures until the rest of
	261	the playlist is skipped
	262	force_generic_extractor: Force downloader to use the generic extractor
	263	overwrites: Overwrite all video and metadata files if True,
	264	overwrite only non-video files if None
	265	and don't overwrite any file if False
	266	For compatibility with youtube-dl,
	267	"nooverwrites" may also be used instead
	268	playliststart: Playlist item to start at.
	269	playlistend: Playlist item to end at.
	270	playlist_items: Specific indices of playlist to download.
	271	playlistreverse: Download playlist items in reverse order.
	272	playlistrandom: Download playlist items in random order.
	273	matchtitle: Download only matching titles.
	274	rejecttitle: Reject downloads for matching titles.
	275	logger: Log messages to a logging.Logger instance.
	276	logtostderr: Log messages to stderr instead of stdout.
	277	consoletitle: Display progress in console window's titlebar.
	278	writedescription: Write the video description to a .description file
	279	writeinfojson: Write the video description to a .info.json file
	280	clean_infojson: Remove private fields from the infojson
	281	getcomments: Extract video comments. This will not be written to disk
	282	unless writeinfojson is also given
	283	writeannotations: Write the video annotations to a .annotations.xml file
	284	writethumbnail: Write the thumbnail image to a file
	285	allow_playlist_files: Whether to write playlists' description, infojson etc
	286	also to disk when using the 'write*' options
	287	write_all_thumbnails: Write all thumbnail formats to files
	288	writelink: Write an internet shortcut file, depending on the
	289	current platform (.url/.webloc/.desktop)
	290	writeurllink: Write a Windows internet shortcut file (.url)
	291	writewebloclink: Write a macOS internet shortcut file (.webloc)
	292	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	293	writesubtitles: Write the video subtitles to a file
	294	writeautomaticsub: Write the automatically generated subtitles to a file
	295	allsubtitles: Deprecated - Use subtitleslangs = ['all']
	296	Downloads all the subtitles of the video
	297	(requires writesubtitles or writeautomaticsub)
	298	listsubtitles: Lists all available subtitles for the video
	299	subtitlesformat: The format code for subtitles
	300	subtitleslangs: List of languages of the subtitles to download (can be regex).
	301	The list may contain "all" to refer to all the available
	302	subtitles. The language can be prefixed with a "-" to
	303	exclude it from the requested languages. Eg: ['all', '-live_chat']
	304	keepvideo: Keep the video file after post-processing
	305	daterange: A DateRange object, download only if the upload_date is in the range.
	306	skip_download: Skip the actual download of the video file
	307	cachedir: Location of the cache files in the filesystem.
	308	False to disable filesystem cache.
	309	noplaylist: Download single video instead of a playlist if in doubt.
	310	age_limit: An integer representing the user's age in years.
	311	Unsuitable videos for the given age are skipped.
	312	min_views: An integer representing the minimum view count the video
	313	must have in order to not be skipped.
	314	Videos without view count information are always
	315	downloaded. None for no limit.
	316	max_views: An integer representing the maximum view count.
	317	Videos that are more popular than that are not
	318	downloaded.
	319	Videos without view count information are always
	320	downloaded. None for no limit.
	321	download_archive: File name of a file where all downloads are recorded.
	322	Videos already present in the file are not downloaded
	323	again.
	324	break_on_existing: Stop the download process after attempting to download a
	325	file that is in the archive.
	326	break_on_reject: Stop the download process when encountering a video that
	327	has been filtered out.
	328	break_per_url: Whether break_on_reject and break_on_existing
	329	should act on each input URL as opposed to for the entire queue
	330	cookiefile: File name where cookies should be read from and dumped to
	331	cookiesfrombrowser: A tuple containing the name of the browser, the profile
	332	name/pathfrom where cookies are loaded, and the name of the
	333	keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
	334	legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
	335	support RFC 5746 secure renegotiation
	336	nocheckcertificate: Do not verify SSL certificates
	337	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	338	At the moment, this is only supported by YouTube.
	339	http_headers: A dictionary of custom headers to be used for all requests
	340	proxy: URL of the proxy server to use
	341	geo_verification_proxy: URL of the proxy to use for IP address verification
	342	on geo-restricted sites.
	343	socket_timeout: Time to wait for unresponsive hosts, in seconds
	344	bidi_workaround: Work around buggy terminals without bidirectional text
	345	support, using fridibi
	346	debug_printtraffic:Print out sent and received HTTP traffic
	347	include_ads: Download ads as well (deprecated)
	348	default_search: Prepend this string if an input url is not valid.
	349	'auto' for elaborate guessing
	350	encoding: Use this encoding instead of the system-specified.
	351	extract_flat: Do not resolve URLs, return the immediate result.
	352	Pass in 'in_playlist' to only show this behavior for
	353	playlist items.
	354	wait_for_video: If given, wait for scheduled streams to become available.
	355	The value should be a tuple containing the range
	356	(min_secs, max_secs) to wait between retries
	357	postprocessors: A list of dictionaries, each with an entry
	358	* key: The name of the postprocessor. See
	359	yt_dlp/postprocessor/__init__.py for a list.
	360	* when: When to run the postprocessor. Allowed values are
	361	the entries of utils.POSTPROCESS_WHEN
	362	Assumed to be 'post_process' if not given
	363	post_hooks: Deprecated - Register a custom postprocessor instead
	364	A list of functions that get called as the final step
	365	for each video file, after all postprocessors have been
	366	called. The filename will be passed as the only argument.
	367	progress_hooks: A list of functions that get called on download
	368	progress, with a dictionary with the entries
	369	* status: One of "downloading", "error", or "finished".
	370	Check this first and ignore unknown values.
	371	* info_dict: The extracted info_dict
	372
	373	If status is one of "downloading", or "finished", the
	374	following properties may also be present:
	375	* filename: The final filename (always present)
	376	* tmpfilename: The filename we're currently writing to
	377	* downloaded_bytes: Bytes on disk
	378	* total_bytes: Size of the whole file, None if unknown
	379	* total_bytes_estimate: Guess of the eventual file size,
	380	None if unavailable.
	381	* elapsed: The number of seconds since download started.
	382	* eta: The estimated time in seconds, None if unknown
	383	* speed: The download speed in bytes/second, None if
	384	unknown
	385	* fragment_index: The counter of the currently
	386	downloaded video fragment.
	387	* fragment_count: The number of fragments (= individual
	388	files that will be merged)
	389
	390	Progress hooks are guaranteed to be called at least once
	391	(with status "finished") if the download is successful.
	392	postprocessor_hooks: A list of functions that get called on postprocessing
	393	progress, with a dictionary with the entries
	394	* status: One of "started", "processing", or "finished".
	395	Check this first and ignore unknown values.
	396	* postprocessor: Name of the postprocessor
	397	* info_dict: The extracted info_dict
	398
	399	Progress hooks are guaranteed to be called at least twice
	400	(with status "started" and "finished") if the processing is successful.
	401	merge_output_format: Extension to use when merging formats.
	402	final_ext: Expected final extension; used to detect when the file was
	403	already downloaded and converted
	404	fixup: Automatically correct known faults of the file.
	405	One of:
	406	- "never": do nothing
	407	- "warn": only emit a warning
	408	- "detect_or_warn": check whether we can do anything
	409	about it, warn otherwise (default)
	410	source_address: Client-side IP address to bind to.
	411	call_home: Boolean, true iff we are allowed to contact the
	412	yt-dlp servers for debugging. (BROKEN)
	413	sleep_interval_requests: Number of seconds to sleep between requests
	414	during extraction
	415	sleep_interval: Number of seconds to sleep before each download when
	416	used alone or a lower bound of a range for randomized
	417	sleep before each download (minimum possible number
	418	of seconds to sleep) when used along with
	419	max_sleep_interval.
	420	max_sleep_interval:Upper bound of a range for randomized sleep before each
	421	download (maximum possible number of seconds to sleep).
	422	Must only be used along with sleep_interval.
	423	Actual sleep time will be a random float from range
	424	[sleep_interval; max_sleep_interval].
	425	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	426	listformats: Print an overview of available video formats and exit.
	427	list_thumbnails: Print a table of all thumbnails and exit.
	428	match_filter: A function that gets called with the info_dict of
	429	every video.
	430	If it returns a message, the video is ignored.
	431	If it returns None, the video is downloaded.
	432	match_filter_func in utils.py is one example for this.
	433	no_color: Do not emit color codes in output.
	434	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	435	HTTP header
	436	geo_bypass_country:
	437	Two-letter ISO 3166-2 country code that will be used for
	438	explicit geographic restriction bypassing via faking
	439	X-Forwarded-For HTTP header
	440	geo_bypass_ip_block:
	441	IP range in CIDR notation that will be used similarly to
	442	geo_bypass_country
	443
	444	The following options determine which downloader is picked:
	445	external_downloader: A dictionary of protocol keys and the executable of the
	446	external downloader to use for it. The allowed protocols
	447	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	448	Set the value to 'native' to use the native downloader
	449	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	450	or {'m3u8': 'ffmpeg'} instead.
	451	Use the native HLS downloader instead of ffmpeg/avconv
	452	if True, otherwise use ffmpeg/avconv if False, otherwise
	453	use downloader suggested by extractor if None.
	454	compat_opts: Compatibility options. See "Differences in default behavior".
	455	The following options do not work when used through the API:
	456	filename, abort-on-error, multistreams, no-live-chat, format-sort
	457	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	458	Refer __init__.py for their implementation
	459	progress_template: Dictionary of templates for progress outputs.
	460	Allowed keys are 'download', 'postprocess',
	461	'download-title' (console title) and 'postprocess-title'.
	462	The template is mapped on a dictionary with keys 'progress' and 'info'
	463
	464	The following parameters are not used by YoutubeDL itself, they are used by
	465	the downloader (see yt_dlp/downloader/common.py):
	466	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	467	max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
	468	continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	469	external_downloader_args, concurrent_fragment_downloads.
	470
	471	The following options are used by the post processors:
	472	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	473	otherwise prefer ffmpeg. (avconv support is deprecated)
	474	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	475	to the binary or its containing directory.
	476	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	477	and a list of additional command-line arguments for the
	478	postprocessor/executable. The dict can also have "PP+EXE" keys
	479	which are used when the given exe is used by the given PP.
	480	Use 'default' as the name for arguments to passed to all PP
	481	For compatibility with youtube-dl, a single list of args
	482	can also be used
	483
	484	The following options are used by the extractors:
	485	extractor_retries: Number of times to retry for known errors
	486	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	487	hls_split_discontinuity: Split HLS playlists to different formats at
	488	discontinuities such as ad breaks (default: False)
	489	extractor_args: A dictionary of arguments to be passed to the extractors.
	490	See "EXTRACTOR ARGUMENTS" for details.
	491	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	492	mark_watched: Mark videos watched (even with --simulate). Only for YouTube
	493	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	494	If True (default), DASH manifests and related
	495	data will be downloaded and processed by extractor.
	496	You can reduce network I/O by disabling it if you don't
	497	care about DASH. (only for youtube)
	498	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	499	If True (default), HLS manifests and related
	500	data will be downloaded and processed by extractor.

1

#!/usr/bin/env python3

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import random

import unicodedata

from enum import Enum

31

from string import ascii_letters

32

33

from .compat import (

34

compat_basestring,

35

compat_brotli,

36

compat_get_terminal_size,

37

compat_kwargs,

38

compat_numeric_types,

compat_os_name,

compat_pycrypto_AES,

compat_shlex_quote,

compat_str,

compat_tokenize_tokenize,

44

compat_urllib_error,

45

compat_urllib_request,

46

compat_urllib_request_DataHandler,

47

windows_enable_vt_mode,

48

)

49

from .cookies import load_cookies

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DownloadCancelled,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

format_decimal_suffix,

formatSeconds,

GeoRestrictedError,

get_domain,

HEADRequest,

InAdvancePagedList,

int_or_none,

iri_to_uri,

ISO3166Utils,

join_nonempty,

LazyList,

LINK_TEMPLATES,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

merge_headers,

network_exceptions,

number_of_digits,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

Popen,

POSTPROCESS_WHEN,

PostProcessingError,

preferredencoding,

prepend_extension,

ReExtractInfo,

register_socks_protocols,

103

RejectedVideoReached,

104

remove_terminal_sequences,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

timetuple_from_msec,

to_high_limit_path,

traverse_obj,

try_get,

UnavailableVideoError,

url_basename,

variadic,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieProcessor,

130

YoutubeDLHandler,

131

YoutubeDLRedirectHandler,

132

)

133

from .cache import Cache

134

from .minicurses import format_text

135

from .extractor import (

136

gen_extractor_classes,

137

get_info_extractor,

138

_LAZY_LOADER,

139

_PLUGIN_CLASSES as plugin_extractors

140

)

141

from .extractor.openload import PhantomJSwrapper

142

from .downloader import (

143

FFmpegFD,

144

get_suitable_downloader,

145

shorten_protocol_name

146

)

147

from .downloader.rtmp import rtmpdump_version

148

from .postprocessor import (

149

get_postprocessor,

150

EmbedThumbnailPP,

151

FFmpegFixupDuplicateMoovPP,

152

FFmpegFixupDurationPP,

153

FFmpegFixupM3u8PP,

154

FFmpegFixupM4aPP,

155

FFmpegFixupStretchedPP,

156

FFmpegFixupTimestampPP,

157

FFmpegMergerPP,

158

FFmpegPostProcessor,

159

MoveFilesAfterDownloadPP,

160

_PLUGIN_CLASSES as plugin_postprocessors

161

)

162

from .update import detect_variant

163

from .version import __version__, RELEASE_GIT_HEAD

164

165

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

170

"""YoutubeDL class.

171

172

YoutubeDL objects are the ones responsible of downloading the

173

actual video file and writing it to disk if the user has requested

174

it, among some other tasks. In most cases there should be one per

175

program. As, given a video URL, the downloader doesn't know how to

176

extract all the needed information, task that InfoExtractors do, it

177

has to pass the URL to one of them.

178

179

For this, YoutubeDL objects have a method that allows

180

InfoExtractors to be registered in a given order. When it is passed

181

a URL, the YoutubeDL object handles it to the first InfoExtractor it

182

finds that reports being able to handle it. The InfoExtractor extracts

183

all the information about the video or videos the URL refers to, and

184

YoutubeDL process the extracted information, possibly using a File

185

Downloader to download the video.

186

187

YoutubeDL objects accept a lot of parameters. In order not to saturate

188

the object constructor with arguments, it receives a dictionary of

189

options instead. These options are available through the params

190

attribute for the InfoExtractors to use. The YoutubeDL also

191

registers itself as the downloader in charge for the InfoExtractors

192

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

197

password: Password for authentication purposes.

198

videopassword: Password for accessing a video.

199

ap_mso: Adobe Pass multiple-system operator identifier.

200

ap_username: Multiple-system operator account username.

201

ap_password: Multiple-system operator account password.

202

usenetrc: Use netrc for authentication instead.

203

verbose: Print additional info to stdout.

204

quiet: Do not print messages to stdout.

205

no_warnings: Do not print out anything for warnings.

206

forceprint: A dict with keys WHEN mapped to a list of templates to

207

print to stdout. The allowed keys are video or any of the

208

items in utils.POSTPROCESS_WHEN.

209

For compatibility, a single list is also accepted

210

print_to_file: A dict with keys WHEN (same as forceprint) mapped to

211

a list of tuples with (template, filename)

212

forceurl: Force printing final URL. (Deprecated)

213

forcetitle: Force printing title. (Deprecated)

214

forceid: Force printing ID. (Deprecated)

215

forcethumbnail: Force printing thumbnail URL. (Deprecated)

216

forcedescription: Force printing description. (Deprecated)

217

forcefilename: Force printing final filename. (Deprecated)

218

forceduration: Force printing duration. (Deprecated)

219

forcejson: Force printing info_dict as JSON.

220

dump_single_json: Force printing the info_dict of the whole playlist

221

(or video) as a single JSON line.

222

force_write_download_archive: Force writing download archive regardless

223

of 'skip_download' or 'simulate'.

224

simulate: Do not download the video files. If unset (or None),

225

simulate only if listsubtitles, listformats or list_thumbnails is used

226

format: Video format code. see "FORMAT SELECTION" for more details.

227

You can also pass a function. The function takes 'ctx' as

228

argument and returns the formats to download.

229

See "build_format_selector" for an implementation

230

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

231

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

232

extracting metadata even if the video is not actually

233

available for download (experimental)

234

format_sort: A list of fields by which to sort the video formats.

235

See "Sorting Formats" for more details.

236

format_sort_force: Force the given format_sort. see "Sorting Formats"

237

for more details.

238

prefer_free_formats: Whether to prefer video formats with free containers

239

over non-free ones of same quality.

240

allow_multiple_video_streams: Allow multiple video streams to be merged

241

into a single file

242

allow_multiple_audio_streams: Allow multiple audio streams to be merged

243

into a single file

244

check_formats Whether to test if the formats are downloadable.

245

Can be True (check all), False (check none),

246

'selected' (check selected formats),

247

or None (check only if requested by extractor)

248

paths: Dictionary of output paths. The allowed keys are 'home'

249

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

250

outtmpl: Dictionary of templates for output names. Allowed keys

251

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

252

For compatibility with youtube-dl, a single string can also be used

253

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

254

restrictfilenames: Do not allow "&" and spaces in file names

255

trim_file_name: Limit length of filename (extension excluded)

256

windowsfilenames: Force the filenames to be windows compatible

257

ignoreerrors: Do not stop on download/postprocessing errors.

258

Can be 'only_download' to ignore only download errors.

259

Default is 'only_download' for CLI, but False for API

260

skip_playlist_after_errors: Number of allowed failures until the rest of

261

the playlist is skipped

262

force_generic_extractor: Force downloader to use the generic extractor

263

overwrites: Overwrite all video and metadata files if True,

264

overwrite only non-video files if None

265

and don't overwrite any file if False

266

For compatibility with youtube-dl,

267

"nooverwrites" may also be used instead

268

playliststart: Playlist item to start at.

269

playlistend: Playlist item to end at.

270

playlist_items: Specific indices of playlist to download.

271

playlistreverse: Download playlist items in reverse order.

272

playlistrandom: Download playlist items in random order.

273

matchtitle: Download only matching titles.

274

rejecttitle: Reject downloads for matching titles.

275

logger: Log messages to a logging.Logger instance.

276

logtostderr: Log messages to stderr instead of stdout.

277

consoletitle: Display progress in console window's titlebar.

278

writedescription: Write the video description to a .description file

279

writeinfojson: Write the video description to a .info.json file

280

clean_infojson: Remove private fields from the infojson

281

getcomments: Extract video comments. This will not be written to disk

282

unless writeinfojson is also given

283

writeannotations: Write the video annotations to a .annotations.xml file

284

writethumbnail: Write the thumbnail image to a file

285

allow_playlist_files: Whether to write playlists' description, infojson etc

286

also to disk when using the 'write*' options

287

write_all_thumbnails: Write all thumbnail formats to files

288

writelink: Write an internet shortcut file, depending on the

289

current platform (.url/.webloc/.desktop)

290

writeurllink: Write a Windows internet shortcut file (.url)

291

writewebloclink: Write a macOS internet shortcut file (.webloc)

292

writedesktoplink: Write a Linux internet shortcut file (.desktop)

293

writesubtitles: Write the video subtitles to a file

294

writeautomaticsub: Write the automatically generated subtitles to a file

295

allsubtitles: Deprecated - Use subtitleslangs = ['all']

296

Downloads all the subtitles of the video

297

(requires writesubtitles or writeautomaticsub)

298

listsubtitles: Lists all available subtitles for the video

299

subtitlesformat: The format code for subtitles

300

subtitleslangs: List of languages of the subtitles to download (can be regex).

301

The list may contain "all" to refer to all the available

302

subtitles. The language can be prefixed with a "-" to

303

exclude it from the requested languages. Eg: ['all', '-live_chat']

304

keepvideo: Keep the video file after post-processing

305

daterange: A DateRange object, download only if the upload_date is in the range.

306

skip_download: Skip the actual download of the video file

307

cachedir: Location of the cache files in the filesystem.

308

False to disable filesystem cache.

309

noplaylist: Download single video instead of a playlist if in doubt.

310

age_limit: An integer representing the user's age in years.

311

Unsuitable videos for the given age are skipped.

312

min_views: An integer representing the minimum view count the video

313

must have in order to not be skipped.

314

Videos without view count information are always

315

downloaded. None for no limit.

316

max_views: An integer representing the maximum view count.

317

Videos that are more popular than that are not

318

downloaded.

319

Videos without view count information are always

320

downloaded. None for no limit.

321

download_archive: File name of a file where all downloads are recorded.

322

Videos already present in the file are not downloaded

323

again.

324

break_on_existing: Stop the download process after attempting to download a

325

file that is in the archive.

326

break_on_reject: Stop the download process when encountering a video that

327

has been filtered out.

328

break_per_url: Whether break_on_reject and break_on_existing

329

should act on each input URL as opposed to for the entire queue

330

cookiefile: File name where cookies should be read from and dumped to

331

cookiesfrombrowser: A tuple containing the name of the browser, the profile

332

name/pathfrom where cookies are loaded, and the name of the

333

keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')

334

legacyserverconnect: Explicitly allow HTTPS connection to servers that do not

335

support RFC 5746 secure renegotiation

336

nocheckcertificate: Do not verify SSL certificates

337

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

338

At the moment, this is only supported by YouTube.

339

http_headers: A dictionary of custom headers to be used for all requests

340

proxy: URL of the proxy server to use

341

geo_verification_proxy: URL of the proxy to use for IP address verification

342

on geo-restricted sites.

343

socket_timeout: Time to wait for unresponsive hosts, in seconds

344

bidi_workaround: Work around buggy terminals without bidirectional text

345

support, using fridibi

346

debug_printtraffic:Print out sent and received HTTP traffic

347

include_ads: Download ads as well (deprecated)

348

default_search: Prepend this string if an input url is not valid.

349

'auto' for elaborate guessing

350

encoding: Use this encoding instead of the system-specified.

351

extract_flat: Do not resolve URLs, return the immediate result.

352

Pass in 'in_playlist' to only show this behavior for

353

playlist items.

354

wait_for_video: If given, wait for scheduled streams to become available.

355

The value should be a tuple containing the range

356

(min_secs, max_secs) to wait between retries

357

postprocessors: A list of dictionaries, each with an entry

358

* key: The name of the postprocessor. See

359

yt_dlp/postprocessor/__init__.py for a list.

360

* when: When to run the postprocessor. Allowed values are

361

the entries of utils.POSTPROCESS_WHEN

362

Assumed to be 'post_process' if not given

363

post_hooks: Deprecated - Register a custom postprocessor instead

364

A list of functions that get called as the final step

365

for each video file, after all postprocessors have been

366

called. The filename will be passed as the only argument.

367

progress_hooks: A list of functions that get called on download

368

progress, with a dictionary with the entries

369

* status: One of "downloading", "error", or "finished".

370

Check this first and ignore unknown values.

371

* info_dict: The extracted info_dict

372

373

If status is one of "downloading", or "finished", the

374

following properties may also be present:

375

* filename: The final filename (always present)

376

* tmpfilename: The filename we're currently writing to

377

* downloaded_bytes: Bytes on disk

378

* total_bytes: Size of the whole file, None if unknown

379

* total_bytes_estimate: Guess of the eventual file size,

380

None if unavailable.

381

* elapsed: The number of seconds since download started.

382

* eta: The estimated time in seconds, None if unknown

383

* speed: The download speed in bytes/second, None if

384

unknown

385

* fragment_index: The counter of the currently

386

downloaded video fragment.

387

* fragment_count: The number of fragments (= individual

388

files that will be merged)

389

390

Progress hooks are guaranteed to be called at least once

391

(with status "finished") if the download is successful.

392

postprocessor_hooks: A list of functions that get called on postprocessing

393

progress, with a dictionary with the entries

394

* status: One of "started", "processing", or "finished".

395

Check this first and ignore unknown values.

396

* postprocessor: Name of the postprocessor

397

* info_dict: The extracted info_dict

398

399

Progress hooks are guaranteed to be called at least twice

400

(with status "started" and "finished") if the processing is successful.

401

merge_output_format: Extension to use when merging formats.

402

final_ext: Expected final extension; used to detect when the file was

403

already downloaded and converted

404

fixup: Automatically correct known faults of the file.

405

One of:

406

- "never": do nothing

407

- "warn": only emit a warning

408

- "detect_or_warn": check whether we can do anything

409

about it, warn otherwise (default)

410

source_address: Client-side IP address to bind to.

411

call_home: Boolean, true iff we are allowed to contact the

412

yt-dlp servers for debugging. (BROKEN)

413

sleep_interval_requests: Number of seconds to sleep between requests

414

during extraction

415

sleep_interval: Number of seconds to sleep before each download when

416

used alone or a lower bound of a range for randomized

417

sleep before each download (minimum possible number

418

of seconds to sleep) when used along with

419

max_sleep_interval.

420

max_sleep_interval:Upper bound of a range for randomized sleep before each

421

download (maximum possible number of seconds to sleep).

422

Must only be used along with sleep_interval.

423

Actual sleep time will be a random float from range

424

[sleep_interval; max_sleep_interval].

425

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

426

listformats: Print an overview of available video formats and exit.

427

list_thumbnails: Print a table of all thumbnails and exit.

428

match_filter: A function that gets called with the info_dict of

429

every video.

430

If it returns a message, the video is ignored.

431

If it returns None, the video is downloaded.

432

match_filter_func in utils.py is one example for this.

433

no_color: Do not emit color codes in output.

434

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

435

HTTP header

436

geo_bypass_country:

437

Two-letter ISO 3166-2 country code that will be used for

438

explicit geographic restriction bypassing via faking

439

X-Forwarded-For HTTP header

440

geo_bypass_ip_block:

441

IP range in CIDR notation that will be used similarly to

442

geo_bypass_country

443

444

The following options determine which downloader is picked:

445

external_downloader: A dictionary of protocol keys and the executable of the

446

external downloader to use for it. The allowed protocols

447

448

Set the value to 'native' to use the native downloader

449

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

450

or {'m3u8': 'ffmpeg'} instead.

451

Use the native HLS downloader instead of ffmpeg/avconv

452

if True, otherwise use ffmpeg/avconv if False, otherwise

453

use downloader suggested by extractor if None.

454

compat_opts: Compatibility options. See "Differences in default behavior".

455

The following options do not work when used through the API:

456

filename, abort-on-error, multistreams, no-live-chat, format-sort

457

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

458

Refer __init__.py for their implementation

459

progress_template: Dictionary of templates for progress outputs.

460

Allowed keys are 'download', 'postprocess',

461

'download-title' (console title) and 'postprocess-title'.

462

The template is mapped on a dictionary with keys 'progress' and 'info'

463

464

The following parameters are not used by YoutubeDL itself, they are used by

465

the downloader (see yt_dlp/downloader/common.py):

466

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

467

max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,

468

continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

469

external_downloader_args, concurrent_fragment_downloads.

470

471

The following options are used by the post processors:

472

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

473

otherwise prefer ffmpeg. (avconv support is deprecated)

474

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

475

to the binary or its containing directory.

476

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

477

and a list of additional command-line arguments for the

478

postprocessor/executable. The dict can also have "PP+EXE" keys

479

which are used when the given exe is used by the given PP.

480

Use 'default' as the name for arguments to passed to all PP

481

For compatibility with youtube-dl, a single list of args

482

can also be used

483

484

The following options are used by the extractors:

485

extractor_retries: Number of times to retry for known errors

486

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

487

hls_split_discontinuity: Split HLS playlists to different formats at

488

discontinuities such as ad breaks (default: False)

489

extractor_args: A dictionary of arguments to be passed to the extractors.

490

See "EXTRACTOR ARGUMENTS" for details.

491

Eg: {'youtube': {'skip': ['dash', 'hls']}}

492

mark_watched: Mark videos watched (even with --simulate). Only for YouTube

493

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

494

If True (default), DASH manifests and related

495

data will be downloaded and processed by extractor.

496

You can reduce network I/O by disabling it if you don't

497

care about DASH. (only for youtube)

498

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

499

If True (default), HLS manifests and related

500

data will be downloaded and processed by extractor.

501

You can reduce network I/O by disabling it if you don't

502

care about HLS. (only for youtube)

503

"""

504

505

_NUMERIC_FIELDS = set((

506

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

507

'timestamp', 'release_timestamp',

508

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

509

'average_rating', 'comment_count', 'age_limit',

510

'start_time', 'end_time',

511

'chapter_number', 'season_number', 'episode_number',

512

'track_number', 'disc_number', 'release_year',

513

))

514

515

_format_selection_exts = {

516

'audio': {'m4a', 'mp3', 'ogg', 'aac'},

517

'video': {'mp4', 'flv', 'webm', '3gp'},

518

'storyboards': {'mhtml'},

}

params = None

_ies = {}

_pps = {k: [] for k in POSTPROCESS_WHEN}

524

_printed_messages = set()

525

_first_webpage_request = True

526

_download_retcode = None

527

_num_downloads = None

528

_playlist_level = 0

529

_playlist_urls = set()

530

_screen_file = None

531

532

def __init__(self, params=None, auto_init=True):

533

"""Create a FileDownloader object with the given options.

534

@param auto_init Whether to load the default extractors and print header (if verbose).

535

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self._ies = {}

self._ies_instances = {}

541

self._pps = {k: [] for k in POSTPROCESS_WHEN}

542

self._printed_messages = set()

543

self._first_webpage_request = True

544

self._post_hooks = []

545

self._progress_hooks = []

546

self._postprocessor_hooks = []

547

self._download_retcode = 0

548

self._num_downloads = 0

549

self._num_videos = 0

550

self.params = params

551

self.cache = Cache(self)

552

553

windows_enable_vt_mode()

554

self._out_files = {

555

'error': sys.stderr,

556

'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,

557

'console': None if compat_os_name == 'nt' else next(

558

filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)

559

}

560

self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']

561

self._allow_colors = {

562

type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])

563

for type_ in ('screen', 'error')

564

}

565

566

if sys.version_info < (3, 6):

567

self.report_warning(

568

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

569

570

if self.params.get('allow_unplayable_formats'):

571

self.report_warning(

572

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

573

'This is a developer option intended for debugging. \n'

574

' If you experience any issues while using this option, '

575

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

576

577

def check_deprecated(param, option, suggestion):

578

if self.params.get(param) is not None:

579

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

584

if self.params.get('geo_verification_proxy') is None:

585

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

586

587

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

588

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

589

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

590

591

for msg in self.params.get('_warnings', []):

592

self.report_warning(msg)

593

for msg in self.params.get('_deprecation_warnings', []):

594

self.deprecation_warning(msg)

595

596

if 'list-formats' in self.params.get('compat_opts', []):

597

self.params['listformats_table'] = False

598

599

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

600

# nooverwrites was unnecessarily changed to overwrites

601

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

602

# This ensures compatibility with both keys

603

self.params['overwrites'] = not self.params['nooverwrites']

604

elif self.params.get('overwrites') is None:

605

self.params.pop('overwrites', None)

606

else:

607

self.params['nooverwrites'] = not self.params['overwrites']

608

609

self.params.setdefault('forceprint', {})

610

self.params.setdefault('print_to_file', {})

611

612

# Compatibility with older syntax

613

if not isinstance(params['forceprint'], dict):

614

self.params['forceprint'] = {'video': params['forceprint']}

615

616

if self.params.get('bidi_workaround', False):

617

try:

618

import pty

619

master, slave = pty.openpty()

620

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

625

sp_kwargs = dict(

626

stdin=subprocess.PIPE,

627

stdout=slave,

628

stderr=self._out_files['error'])

629

try:

630

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

631

except OSError:

632

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

633

self._output_channel = os.fdopen(master, 'rb')

634

except OSError as ose:

635

if ose.errno == errno.ENOENT:

636

self.report_warning(

637

'Could not find fribidi executable, ignoring --bidi-workaround. '

638

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

643

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

644

and not self.params.get('restrictfilenames', False)):

645

# Unicode filesystem API will throw errors (#1474, #13027)

646

self.report_warning(

647

'Assuming --restrict-filenames since file system encoding '

648

'cannot encode all characters. '

649

'Set the LC_ALL environment variable to fix this.')

650

self.params['restrictfilenames'] = True

651

652

self.outtmpl_dict = self.parse_outtmpl()

653

654

# Creating format selector here allows us to catch syntax errors before the extraction

655

self.format_selector = (

656

self.params.get('format') if self.params.get('format') in (None, '-')

657

else self.params['format'] if callable(self.params['format'])

658

else self.build_format_selector(self.params['format']))

659

660

# Set http_headers defaults according to std_headers

661

self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))

self._setup_opener()

if auto_init:

if auto_init != 'no_verbose_header':

667

self.print_debug_header()

668

self.add_default_info_extractors()

669

670

hooks = {

671

'post_hooks': self.add_post_hook,

672

'progress_hooks': self.add_progress_hook,

673

'postprocessor_hooks': self.add_postprocessor_hook,

674

}

675

for opt, fn in hooks.items():

676

for ph in self.params.get(opt, []):

677

fn(ph)

678

679

for pp_def_raw in self.params.get('postprocessors', []):

680

pp_def = dict(pp_def_raw)

681

when = pp_def.pop('when', 'post_process')

682

self.add_post_processor(

683

get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),

684

when=when)

685

686

register_socks_protocols()

687

688

def preload_download_archive(fn):

689

"""Preload the archive, if any is specified"""

690

if fn is None:

691

return False

692

self.write_debug(f'Loading archive file {fn!r}')

693

try:

694

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

695

for line in archive_file:

696

self.archive.add(line.strip())

697

except IOError as ioe:

698

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

705

706

def warn_if_short_id(self, argv):

707

# short YouTube ID starting with dash?

708

idxs = [

709

i for i, a in enumerate(argv)

710

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

715

+ ['--'] + [argv[i] for i in idxs]

716

)

717

self.report_warning(

718

'Long argument string detected. '

719

'Use -- to separate parameters and URLs, like this:\n%s' %

720

args_to_str(correct_argv))

721

722

def add_info_extractor(self, ie):

723

"""Add an InfoExtractor object to the end of the list."""

724

ie_key = ie.ie_key()

725

self._ies[ie_key] = ie

726

if not isinstance(ie, type):

727

self._ies_instances[ie_key] = ie

728

ie.set_downloader(self)

729

730

def _get_info_extractor_class(self, ie_key):

731

ie = self._ies.get(ie_key)

732

if ie is None:

733

ie = get_info_extractor(ie_key)

734

self.add_info_extractor(ie)

735

return ie

736

737

def get_info_extractor(self, ie_key):

738

"""

739

Get an instance of an IE with name ie_key, it will try to get one from

740

the _ies list, if there's no instance it will create a new one and add

741

it to the extractor list.

742

"""

743

ie = self._ies_instances.get(ie_key)

744

if ie is None:

745

ie = get_info_extractor(ie_key)()

746

self.add_info_extractor(ie)

747

return ie

748

749

def add_default_info_extractors(self):

750

"""

751

Add the InfoExtractors returned by gen_extractors to the end of the list

752

"""

753

for ie in gen_extractor_classes():

754

self.add_info_extractor(ie)

755

756

def add_post_processor(self, pp, when='post_process'):

757

"""Add a PostProcessor object to the end of the chain."""

758

self._pps[when].append(pp)

759

pp.set_downloader(self)

760

761

def add_post_hook(self, ph):

762

"""Add the post hook"""

763

self._post_hooks.append(ph)

764

765

def add_progress_hook(self, ph):

766

"""Add the download progress hook"""

767

self._progress_hooks.append(ph)

768

769

def add_postprocessor_hook(self, ph):

770

"""Add the postprocessing progress hook"""

771

self._postprocessor_hooks.append(ph)

772

for pps in self._pps.values():

773

for pp in pps:

774

pp.add_progress_hook(ph)

775

776

def _bidi_workaround(self, message):

777

if not hasattr(self, '_output_channel'):

778

return message

779

780

assert hasattr(self, '_output_process')

781

assert isinstance(message, compat_str)

782

line_count = message.count('\n') + 1

783

self._output_process.stdin.write((message + '\n').encode('utf-8'))

784

self._output_process.stdin.flush()

785

res = ''.join(self._output_channel.readline().decode('utf-8')

786

for _ in range(line_count))

787

return res[:-len('\n')]

788

789

def _write_string(self, message, out=None, only_once=False):

790

if only_once:

791

if message in self._printed_messages:

792

return

793

self._printed_messages.add(message)

794

write_string(message, out=out, encoding=self.params.get('encoding'))

795

796

def to_stdout(self, message, skip_eol=False, quiet=None):

797

"""Print message to stdout"""

798

if quiet is not None:

799

self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead')

800

self._write_string(

801

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

802

self._out_files['print'])

803

804

def to_screen(self, message, skip_eol=False, quiet=None):

805

"""Print message to screen if not in quiet mode"""

806

if self.params.get('logger'):

807

self.params['logger'].debug(message)

808

return

809

if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):

810

return

811

self._write_string(

812

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

813

self._out_files['screen'])

814

815

def to_stderr(self, message, only_once=False):

816

"""Print message to stderr"""

817

assert isinstance(message, compat_str)

818

if self.params.get('logger'):

819

self.params['logger'].error(message)

820

else:

821

self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)

822

823

def _send_console_code(self, code):

824

if compat_os_name == 'nt' or not self._out_files['console']:

825

return

826

self._write_string(code, self._out_files['console'])

827

828

def to_console_title(self, message):

829

if not self.params.get('consoletitle', False):

830

return

831

message = remove_terminal_sequences(message)

832

if compat_os_name == 'nt':

833

if ctypes.windll.kernel32.GetConsoleWindow():

834

# c_wchar_p() might not be necessary if `message` is

835

# already of type unicode()

836

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

837

else:

838

self._send_console_code(f'\033]0;{message}\007')

839

840

def save_console_title(self):

841

if not self.params.get('consoletitle') or self.params.get('simulate'):

842

return

843

self._send_console_code('\033[22;0t') # Save the title on stack

844

845

def restore_console_title(self):

846

if not self.params.get('consoletitle') or self.params.get('simulate'):

847

return

848

self._send_console_code('\033[23;0t') # Restore the title from stack

849

850

def __enter__(self):

851

self.save_console_title()

852

return self

853

854

def __exit__(self, *args):

855

self.restore_console_title()

856

857

if self.params.get('cookiefile') is not None:

858

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

859

860

def trouble(self, message=None, tb=None, is_error=True):

861

"""Determine action to take when a download problem appears.

862

863

Depending on if the downloader has been configured to ignore

864

download errors or not, this method may throw an exception or

865

not when errors are found, after printing the message.

866

867

@param tb If given, is additional traceback information

868

@param is_error Whether to raise error according to ignorerrors

869

"""

870

if message is not None:

871

self.to_stderr(message)

872

if self.params.get('verbose'):

873

if tb is None:

874

if sys.exc_info()[0]: # if .trouble has been called from an except block

875

tb = ''

876

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

877

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

878

tb += encode_compat_str(traceback.format_exc())

879

else:

880

tb_data = traceback.format_list(traceback.extract_stack())

881

tb = ''.join(tb_data)

if tb:

self.to_stderr(tb)

if not is_error:

return

if not self.params.get('ignoreerrors'):

887

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

888

exc_info = sys.exc_info()[1].exc_info

889

else:

890

exc_info = sys.exc_info()

891

raise DownloadError(message, exc_info)

892

self._download_retcode = 1

class Styles(Enum):

HEADERS = 'yellow'

EMPHASIS = 'light blue'

ID = 'green'

DELIM = 'blue'

ERROR = 'red'

WARNING = 'yellow'

SUPPRESS = 'light black'

902

903

def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):

904

if test_encoding:

905

original_text = text

906

# handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711

907

encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'

908

text = text.encode(encoding, 'ignore').decode(encoding)

909

if fallback is not None and text != original_text:

910

text = fallback

911

if isinstance(f, self.Styles):

912

f = f.value

913

return format_text(text, f) if allow_colors else text if fallback is None else fallback

914

915

def _format_screen(self, *args, **kwargs):

916

return self._format_text(

917

self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)

918

919

def _format_err(self, *args, **kwargs):

920

return self._format_text(

921

self._out_files['error'], self._allow_colors['error'], *args, **kwargs)

922

923

def report_warning(self, message, only_once=False):

924

'''

925

Print the message to stderr, it will be prefixed with 'WARNING:'

926

If stderr is a tty file the 'WARNING:' will be colored

927

'''

928

if self.params.get('logger') is not None:

929

self.params['logger'].warning(message)

930

else:

931

if self.params.get('no_warnings'):

932

return

933

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

934

935

def deprecation_warning(self, message):

936

if self.params.get('logger') is not None:

937

self.params['logger'].warning('DeprecationWarning: {message}')

938

else:

939

self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)

940

941

def report_error(self, message, *args, **kwargs):

942

'''

943

Do the same as trouble, but prefixes the message with 'ERROR:', colored

944

in red if stderr is a tty file.

945

'''

946

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)

947

948

def write_debug(self, message, only_once=False):

949

'''Log debug message or Print message to stderr'''

950

if not self.params.get('verbose', False):

951

return

952

message = '[debug] %s' % message

953

if self.params.get('logger'):

954

self.params['logger'].debug(message)

955

else:

956

self.to_stderr(message, only_once)

957

958

def report_file_already_downloaded(self, file_name):

959

"""Report file has already been fully downloaded."""

960

try:

961

self.to_screen('[download] %s has already been downloaded' % file_name)

962

except UnicodeEncodeError:

963

self.to_screen('[download] The file has already been downloaded')

964

965

def report_file_delete(self, file_name):

966

"""Report that existing file will be deleted."""

967

try:

968

self.to_screen('Deleting existing file %s' % file_name)

969

except UnicodeEncodeError:

970

self.to_screen('Deleting existing file')

971

972

def raise_no_formats(self, info, forced=False, *, msg=None):

973

has_drm = info.get('__has_drm')

974

ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)

975

msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'

976

if forced or not ignored:

977

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

978

expected=has_drm or ignored or expected)

979

else:

980

self.report_warning(msg)

981

982

def parse_outtmpl(self):

983

outtmpl_dict = self.params.get('outtmpl', {})

984

if not isinstance(outtmpl_dict, dict):

985

outtmpl_dict = {'default': outtmpl_dict}

986

# Remove spaces in the default template

987

if self.params.get('restrictfilenames'):

988

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

989

else:

990

sanitize = lambda x: x

991

outtmpl_dict.update({

992

k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()

993

if outtmpl_dict.get(k) is None})

994

for key, val in outtmpl_dict.items():

995

if isinstance(val, bytes):

996

self.report_warning(

997

'Parameter outtmpl is bytes, but should be a unicode string. '

998

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

999

return outtmpl_dict

1000

1001

def get_output_path(self, dir_type='', filename=None):

1002

paths = self.params.get('paths', {})

1003

assert isinstance(paths, dict)

1004

path = os.path.join(

1005

expand_path(paths.get('home', '').strip()),

1006

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

1007

filename or '')

1008

1009

# Temporary fix for #4787

1010

# 'Treat' all problem characters by passing filename through preferredencoding

1011

# to workaround encoding issues with subprocess on python2 @ Windows

1012

if sys.version_info < (3, 0) and sys.platform == 'win32':

1013

path = encodeFilename(path, True).decode(preferredencoding())

1014

return sanitize_path(path, force=self.params.get('windowsfilenames'))

1015

1016

@staticmethod

1017

def _outtmpl_expandpath(outtmpl):

1018

# expand_path translates '%%' into '%' and '$$' into '$'

1019

# correspondingly that is not what we want since we need to keep

1020

# '%%' intact for template dict substitution step. Working around

1021

# with boundary-alike separator hack.

1022

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

1023

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

1024

1025

# outtmpl should be expand_path'ed before template dict substitution

1026

# because meta fields may contain env variables we don't want to

1027

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

1028

# title "Hello $PATH", we don't want `$PATH` to be expanded.

1029

return expand_path(outtmpl).replace(sep, '')

1030

1031

@staticmethod

1032

def escape_outtmpl(outtmpl):

1033

''' Escape any remaining strings like %s, %abc% etc. '''

1034

return re.sub(

1035

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

1036

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

1041

''' @return None or Exception object '''

1042

outtmpl = re.sub(

1043

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),

1044

lambda mobj: f'{mobj.group(0)[:-1]}s',

1045

cls._outtmpl_expandpath(outtmpl))

1046

try:

1047

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

1048

return None

1049

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

1054

info_dict = dict(info_dict)

1055

info_dict.pop('__postprocessors', None)

1056

return info_dict

1057

1058

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):

1059

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict

1060

@param sanitize Whether to sanitize the output as a filename.

1061

For backward compatibility, a function can also be passed

1062

"""

1063

1064

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1065

1066

info_dict = self._copy_infodict(info_dict)

1067

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1068

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1069

if info_dict.get('duration', None) is not None

1070

else None)

1071

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

1072

info_dict['video_autonumber'] = self._num_videos

1073

if info_dict.get('resolution') is None:

1074

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1075

1076

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1077

# of %(field)s to %(field)0Nd for backward compatibility

1078

field_size_compat_map = {

1079

'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),

1080

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1081

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1091

# where keys (except first) can be string, int or slice

1092

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

1093

MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

1094

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1095

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

1096

(?P<negate>-)?

1097

(?P<fields>{field})

1098

(?P<maths>(?:{math_op}{math_field})*)

1099

(?:>(?P<strf_format>.+?))?

1100

(?P<alternate>(?<!\\),[^|&)]+)?

1101

(?:&(?P<replacement>.*?))?

1102

(?:\|(?P<default>.*?))?

1103

$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

1104

1105

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

1110

1111

def get_value(mdict):

1112

# Object traversal

1113

value = _traverse_infodict(mdict['fields'])

1114

# Negative

1115

if mdict['negate']:

1116

value = float_or_none(value)

1117

if value is not None:

1118

value *= -1

1119

# Do maths

1120

offset_key = mdict['maths']

1121

if offset_key:

1122

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1127

offset_key).group(0)

1128

offset_key = offset_key[len(item):]

1129

if operator is None:

1130

operator = MATH_FUNCTIONS[item]

1131

continue

1132

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1133

offset = float_or_none(item)

1134

if offset is None:

1135

offset = float_or_none(_traverse_infodict(item))

1136

try:

1137

value = operator(value, multiplier * offset)

1138

except (TypeError, ZeroDivisionError):

1139

return None

1140

operator = None

1141

# Datetime formatting

1142

if mdict['strf_format']:

1143

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1148

1149

def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):

1150

return sanitize_filename(str(value), restricted=restricted,

1151

is_id=re.search(r'(^|[_.])id(\.|$)', key))

1152

1153

sanitizer = sanitize if callable(sanitize) else filename_sanitizer

1154

sanitize = bool(sanitize)

1155

1156

def _dumpjson_default(obj):

1157

if isinstance(obj, (set, LazyList)):

return list(obj)

return repr(obj)

def create_key(outer_mobj):

1162

if not outer_mobj.group('has_key'):

1163

return outer_mobj.group(0)

1164

key = outer_mobj.group('key')

1165

mobj = re.match(INTERNAL_FORMAT_RE, key)

1166

initial_field = mobj.group('fields') if mobj else ''

1167

value, replacement, default = None, None, na

1168

while mobj:

1169

mobj = mobj.groupdict()

1170

default = mobj['default'] if mobj['default'] is not None else default

1171

value = get_value(mobj)

1172

replacement = mobj['replacement']

1173

if value is None and mobj['alternate']:

1174

mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])

else:

break

fmt = outer_mobj.group('format')

1179

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1180

fmt = '0{:d}d'.format(field_size_compat_map[key])

1181

1182

value = default if value is None else value if replacement is None else replacement

1183

1184

flags = outer_mobj.group('conversion') or ''

1185

str_fmt = f'{fmt[:-1]}s'

1186

if fmt[-1] == 'l': # list

1187

delim = '\n' if '#' in flags else ', '

1188

value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt

1189

elif fmt[-1] == 'j': # json

1190

value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt

1191

elif fmt[-1] == 'q': # quoted

1192

value = map(str, variadic(value) if '#' in flags else [value])

1193

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1194

elif fmt[-1] == 'B': # bytes

1195

value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')

1196

value, fmt = value.decode('utf-8', 'ignore'), 's'

1197

elif fmt[-1] == 'U': # unicode normalized

1198

value, fmt = unicodedata.normalize(

1199

# "+" = compatibility equivalence, "#" = NFD

1200

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

1201

value), str_fmt

1202

elif fmt[-1] == 'D': # decimal suffix

1203

num_fmt, fmt = fmt[:-1].replace('#', ''), 's'

1204

value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',

1205

factor=1024 if '#' in flags else 1000)

1206

elif fmt[-1] == 'S': # filename sanitization

1207

value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt

1208

elif fmt[-1] == 'c':

1209

if value:

1210

value = str(value)[0]

1211

else:

1212

fmt = str_fmt

1213

elif fmt[-1] not in 'rs': # numeric

1214

value = float_or_none(value)

1215

if value is None:

1216

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1221

# So we convert it to repr first

1222

value, fmt = repr(value), str_fmt

1223

if fmt[-1] in 'csr':

1224

value = sanitizer(initial_field, value)

1225

1226

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1227

TMPL_DICT[key] = value

1228

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1229

1230

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1231

1232

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1233

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1234

return self.escape_outtmpl(outtmpl) % info_dict

1235

1236

def _prepare_filename(self, info_dict, tmpl_type='default'):

1237

try:

1238

outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))

1239

filename = self.evaluate_outtmpl(outtmpl, info_dict, True)

if not filename:

return None

if tmpl_type in ('default', 'temp'):

1244

final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')

1245

if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):

1246

filename = replace_extension(filename, ext, final_ext)

1247

else:

1248

force_ext = OUTTMPL_TYPES[tmpl_type]

1249

if force_ext:

1250

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1251

1252

# https://github.com/blackjack4494/youtube-dlc/issues/85

1253

trim_file_name = self.params.get('trim_file_name', False)

1254

if trim_file_name:

1255

no_ext, *ext = filename.rsplit('.', 2)

1256

filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')

1257

1258

return filename

1259

except ValueError as err:

1260

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1261

return None

1262

1263

def prepare_filename(self, info_dict, dir_type='', warn=False):

1264

"""Generate the output filename."""

1265

1266

filename = self._prepare_filename(info_dict, dir_type or 'default')

1267

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1272

pass

1273

elif filename == '-':

1274

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1275

elif os.path.isabs(filename):

1276

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1277

if filename == '-' or not filename:

1278

return filename

1279

1280

return self.get_output_path(dir_type, filename)

1281

1282

def _match_entry(self, info_dict, incomplete=False, silent=False):

1283

""" Returns None if the file should be downloaded """

1284

1285

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1286

1287

def check_filter():

1288

if 'title' in info_dict:

1289

# This can happen when we're just evaluating the playlist

1290

title = info_dict['title']

1291

matchtitle = self.params.get('matchtitle', False)

1292

if matchtitle:

1293

if not re.search(matchtitle, title, re.IGNORECASE):

1294

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1295

rejecttitle = self.params.get('rejecttitle', False)

1296

if rejecttitle:

1297

if re.search(rejecttitle, title, re.IGNORECASE):

1298

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1299

date = info_dict.get('upload_date')

1300

if date is not None:

1301

dateRange = self.params.get('daterange', DateRange())

1302

if date not in dateRange:

1303

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

1304

view_count = info_dict.get('view_count')

1305

if view_count is not None:

1306

min_views = self.params.get('min_views')

1307

if min_views is not None and view_count < min_views:

1308

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1309

max_views = self.params.get('max_views')

1310

if max_views is not None and view_count > max_views:

1311

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1312

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1313

return 'Skipping "%s" because it is age restricted' % video_title

1314

1315

match_filter = self.params.get('match_filter')

1316

if match_filter is not None:

1317

try:

1318

ret = match_filter(info_dict, incomplete=incomplete)

1319

except TypeError:

1320

# For backward compatibility

1321

ret = None if incomplete else match_filter(info_dict)

if ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1327

reason = '%s has already been recorded in the archive' % video_title

1328

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1329

else:

1330

reason = check_filter()

1331

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1332

if reason is not None:

1333

if not silent:

1334

self.to_screen('[download] ' + reason)

1335

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1341

'''Set the keys from extra_info in info dict if they are missing'''

1342

for key, value in extra_info.items():

1343

info_dict.setdefault(key, value)

1344

1345

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1346

process=True, force_generic_extractor=False):

1347

"""

1348

Return a list with a dictionary for each video extracted.

1349

1350

Arguments:

1351

url -- URL to extract

1352

1353

Keyword arguments:

1354

download -- whether to download videos during extraction

1355

ie_key -- extractor key hint

1356

extra_info -- dictionary containing the extra values to add to each result

1357

process -- whether to resolve all unresolved references (URLs, playlist items),

1358

must be True for download to work.

1359

force_generic_extractor -- force using the generic extractor

1360

"""

1361

1362

if extra_info is None:

1363

extra_info = {}

1364

1365

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1374

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1379

'and will probably not work.')

1380

1381

temp_id = ie.get_temp_id(url)

1382

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1383

self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')

1384

if self.params.get('break_on_existing', False):

1385

raise ExistingVideoReached()

1386

break

1387

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1388

else:

1389

self.report_error('no suitable InfoExtractor for URL %s' % url)

1390

1391

def __handle_extraction_exceptions(func):

1392

@functools.wraps(func)

1393

def wrapper(self, *args, **kwargs):

1394

while True:

1395

try:

1396

return func(self, *args, **kwargs)

1397

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1398

raise

1399

except ReExtractInfo as e:

1400

if e.expected:

1401

self.to_screen(f'{e}; Re-extracting data')

1402

else:

1403

self.to_stderr('\r')

1404

self.report_warning(f'{e}; Re-extracting data')

1405

continue

1406

except GeoRestrictedError as e:

1407

msg = e.msg

1408

if e.countries:

1409

msg += '\nThis video is available in %s.' % ', '.join(

1410

map(ISO3166Utils.short2full, e.countries))

1411

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1412

self.report_error(msg)

1413

except ExtractorError as e: # An error we somewhat expected

1414

self.report_error(str(e), e.format_traceback())

1415

except Exception as e:

1416

if self.params.get('ignoreerrors'):

1417

self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

break

return wrapper

def _wait_for_video(self, ie_result):

1424

if (not self.params.get('wait_for_video')

1425

or ie_result.get('_type', 'video') != 'video'

1426

or ie_result.get('formats') or ie_result.get('url')):

1427

return

1428

1429

format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]

last_msg = ''

def progress(msg):

nonlocal last_msg

self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)

1435

last_msg = msg

1436

1437

min_wait, max_wait = self.params.get('wait_for_video')

1438

diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())

1439

if diff is None and ie_result.get('live_status') == 'is_upcoming':

1440

diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)

1441

self.report_warning('Release time of video is not known')

1442

elif (diff or 0) <= 0:

1443

self.report_warning('Video should already be available according to extracted info')

1444

diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))

1445

self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')

1446

1447

wait_till = time.time() + diff

1448

try:

1449

while True:

1450

diff = wait_till - time.time()

1451

if diff <= 0:

1452

progress('')

1453

raise ReExtractInfo('[wait] Wait period ended', expected=True)

1454

progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')

1455

time.sleep(1)

1456

except KeyboardInterrupt:

1457

progress('')

1458

raise ReExtractInfo('[wait] Interrupted by user', expected=True)

1459

except BaseException as e:

1460

if not isinstance(e, ReExtractInfo):

self.to_screen('')

raise

@__handle_extraction_exceptions

1465

def __extract_info(self, url, ie, download, extra_info, process):

1466

ie_result = ie.extract(url)

1467

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1468

return

1469

if isinstance(ie_result, list):

1470

# Backwards compatibility: old IE result format

1471

ie_result = {

1472

'_type': 'compat_list',

1473

'entries': ie_result,

1474

}

1475

if extra_info.get('original_url'):

1476

ie_result.setdefault('original_url', extra_info['original_url'])

1477

self.add_default_extra_info(ie_result, ie, url)

1478

if process:

1479

self._wait_for_video(ie_result)

1480

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1485

if url is not None:

1486

self.add_extra_info(ie_result, {

'webpage_url': url,

'original_url': url,

})

webpage_url = ie_result.get('webpage_url')

1491

if webpage_url:

1492

self.add_extra_info(ie_result, {

1493

'webpage_url_basename': url_basename(webpage_url),

1494

'webpage_url_domain': get_domain(webpage_url),

1495

})

1496

if ie is not None:

1497

self.add_extra_info(ie_result, {

1498

'extractor': ie.IE_NAME,

1499

'extractor_key': ie.ie_key(),

1500

})

1501

1502

def process_ie_result(self, ie_result, download=True, extra_info=None):

1503

"""

1504

Take the result of the ie(may be modified) and resolve all unresolved

1505

references (URLs, playlist items).

1506

1507

It will also download the videos if 'download'.

1508

Returns the resolved ie_result.

1509

"""

1510

if extra_info is None:

1511

extra_info = {}

1512

result_type = ie_result.get('_type', 'video')

1513

1514

if result_type in ('url', 'url_transparent'):

1515

ie_result['url'] = sanitize_url(ie_result['url'])

1516

if ie_result.get('original_url'):

1517

extra_info.setdefault('original_url', ie_result['original_url'])

1518

1519

extract_flat = self.params.get('extract_flat', False)

1520

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1521

or extract_flat is True):

1522

info_copy = ie_result.copy()

1523

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1524

if ie and not ie_result.get('id'):

1525

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1526

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1527

self.add_extra_info(info_copy, extra_info)

1528

info_copy, _ = self.pre_process(info_copy)

1529

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1530

if self.params.get('force_write_download_archive', False):

1531

self.record_download_archive(info_copy)

1532

return ie_result

1533

1534

if result_type == 'video':

1535

self.add_extra_info(ie_result, extra_info)

1536

ie_result = self.process_video_result(ie_result, download=download)

1537

additional_urls = (ie_result or {}).get('additional_urls')

1538

if additional_urls:

1539

# TODO: Improve MetadataParserPP to allow setting a list

1540

if isinstance(additional_urls, compat_str):

1541

additional_urls = [additional_urls]

1542

self.to_screen(

1543

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1544

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1545

ie_result['additional_entries'] = [

1546

self.extract_info(

1547

url, download, extra_info=extra_info,

1548

force_generic_extractor=self.params.get('force_generic_extractor'))

1549

for url in additional_urls

1550

]

1551

return ie_result

1552

elif result_type == 'url':

1553

# We have to add extra_info to the results because it may be

1554

# contained in a playlist

1555

return self.extract_info(

1556

ie_result['url'], download,

1557

ie_key=ie_result.get('ie_key'),

1558

extra_info=extra_info)

1559

elif result_type == 'url_transparent':

1560

# Use the information from the embedding page

1561

info = self.extract_info(

1562

ie_result['url'], ie_key=ie_result.get('ie_key'),

1563

extra_info=extra_info, download=False, process=False)

1564

1565

# extract_info may return None when ignoreerrors is enabled and

1566

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

1572

(k, v) for k, v in ie_result.items() if v is not None)

1573

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1574

if f in force_properties:

1575

del force_properties[f]

1576

new_result = info.copy()

1577

new_result.update(force_properties)

1578

1579

# Extracted info may not be a video result (i.e.

1580

# info.get('_type', 'video') != video) but rather an url or

1581

# url_transparent. In such cases outer metadata (from ie_result)

1582

# should be propagated to inner one (info). For this to happen

1583

# _type of info should be overridden with url_transparent. This

1584

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1585

if new_result.get('_type') == 'url':

1586

new_result['_type'] = 'url_transparent'

1587

1588

return self.process_ie_result(

1589

new_result, download=download, extra_info=extra_info)

1590

elif result_type in ('playlist', 'multi_video'):

1591

# Protect from infinite recursion due to recursively nested playlists

1592

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1593

webpage_url = ie_result['webpage_url']

1594

if webpage_url in self._playlist_urls:

1595

self.to_screen(

1596

'[download] Skipping already downloaded playlist: %s'

1597

% ie_result.get('title') or ie_result.get('id'))

1598

return

1599

1600

self._playlist_level += 1

1601

self._playlist_urls.add(webpage_url)

1602

self._fill_common_fields(ie_result, False)

1603

self._sanitize_thumbnails(ie_result)

1604

try:

1605

return self.__process_playlist(ie_result, download)

1606

finally:

1607

self._playlist_level -= 1

1608

if not self._playlist_level:

1609

self._playlist_urls.clear()

1610

elif result_type == 'compat_list':

1611

self.report_warning(

1612

'Extractor %s returned a compat_list result. '

1613

'It needs to be updated.' % ie_result.get('extractor'))

1614

1615

def _fixup(r):

1616

self.add_extra_info(r, {

1617

'extractor': ie_result['extractor'],

1618

'webpage_url': ie_result['webpage_url'],

1619

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1620

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1621

'extractor_key': ie_result['extractor_key'],

1622

})

1623

return r

1624

ie_result['entries'] = [

1625

self.process_ie_result(_fixup(r), download, extra_info)

1626

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1631

1632

def _ensure_dir_exists(self, path):

1633

return make_dir(path, self.report_error)

1634

1635

@staticmethod

1636

def _playlist_infodict(ie_result, **kwargs):

1637

return {

1638

**ie_result,

1639

'playlist': ie_result.get('title') or ie_result.get('id'),

1640

'playlist_id': ie_result.get('id'),

1641

'playlist_title': ie_result.get('title'),

1642

'playlist_uploader': ie_result.get('uploader'),

1643

'playlist_uploader_id': ie_result.get('uploader_id'),

'playlist_index': 0,

**kwargs,

}

def __process_playlist(self, ie_result, download):

1649

# We process each entry in the playlist

1650

playlist = ie_result.get('title') or ie_result.get('id')

1651

self.to_screen('[download] Downloading playlist: %s' % playlist)

1652

1653

if 'entries' not in ie_result:

1654

raise EntryNotInPlaylist('There are no entries')

1655

1656

MissingEntry = object()

1657

incomplete_entries = bool(ie_result.get('requested_entries'))

1658

if incomplete_entries:

1659

def fill_missing_entries(entries, indices):

1660

ret = [MissingEntry] * max(indices)

1661

for i, entry in zip(indices, entries):

1662

ret[i - 1] = entry

1663

return ret

1664

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1665

1666

playlist_results = []

1667

1668

playliststart = self.params.get('playliststart', 1)

1669

playlistend = self.params.get('playlistend')

1670

# For backwards compatibility, interpret -1 as whole list

1671

if playlistend == -1:

1672

playlistend = None

1673

1674

playlistitems_str = self.params.get('playlist_items')

1675

playlistitems = None

1676

if playlistitems_str is not None:

1677

def iter_playlistitems(format):

1678

for string_segment in format.split(','):

1679

if '-' in string_segment:

1680

start, end = string_segment.split('-')

1681

for item in range(int(start), int(end) + 1):

1682

yield int(item)

1683

else:

1684

yield int(string_segment)

1685

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1686

1687

ie_entries = ie_result['entries']

1688

if isinstance(ie_entries, list):

1689

playlist_count = len(ie_entries)

1690

msg = f'Collected {playlist_count} videos; downloading %d of them'

1691

ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count

1692

1693

def get_entry(i):

1694

return ie_entries[i - 1]

1695

else:

1696

msg = 'Downloading %d videos'

1697

if not isinstance(ie_entries, (PagedList, LazyList)):

1698

ie_entries = LazyList(ie_entries)

1699

elif isinstance(ie_entries, InAdvancePagedList):

1700

if ie_entries._pagesize == 1:

1701

playlist_count = ie_entries._pagecount

1702

1703

def get_entry(i):

1704

return YoutubeDL.__handle_extraction_exceptions(

1705

lambda self, i: ie_entries[i - 1]

1706

)(self, i)

1707

1708

entries, broken = [], False

1709

items = playlistitems if playlistitems is not None else itertools.count(playliststart)

for i in items:

if i == 0:

continue

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = get_entry(i)

if entry is MissingEntry:

1719

raise EntryNotInPlaylist()

1720

except (IndexError, EntryNotInPlaylist):

1721

if incomplete_entries:

1722

raise EntryNotInPlaylist(f'Entry {i} cannot be found')

1723

elif not playlistitems:

1724

break

1725

entries.append(entry)

1726

try:

1727

if entry is not None:

1728

self._match_entry(entry, incomplete=True, silent=True)

1729

except (ExistingVideoReached, RejectedVideoReached):

1730

broken = True

1731

break

1732

ie_result['entries'] = entries

1733

1734

# Save playlist_index before re-ordering

1735

entries = [

1736

((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)

1737

for i, entry in enumerate(entries, 1)

1738

if entry is not None]

1739

n_entries = len(entries)

1740

1741

if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):

1742

ie_result['playlist_count'] = n_entries

1743

1744

if not playlistitems and (playliststart != 1 or playlistend):

1745

playlistitems = list(range(playliststart, playliststart + n_entries))

1746

ie_result['requested_entries'] = playlistitems

1747

1748

_infojson_written = False

1749

write_playlist_files = self.params.get('allow_playlist_files', True)

1750

if write_playlist_files and self.params.get('list_thumbnails'):

1751

self.list_thumbnails(ie_result)

1752

if write_playlist_files and not self.params.get('simulate'):

1753

ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)

1754

_infojson_written = self._write_info_json(

1755

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1756

if _infojson_written is None:

1757

return

1758

if self._write_description('playlist', ie_result,

1759

self.prepare_filename(ie_copy, 'pl_description')) is None:

1760

return

1761

# TODO: This should be passed to ThumbnailsConvertor if necessary

1762

self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1763

1764

if self.params.get('playlistreverse', False):

1765

entries = entries[::-1]

1766

if self.params.get('playlistrandom', False):

1767

random.shuffle(entries)

1768

1769

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1770

1771

self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))

1772

failures = 0

1773

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1774

for i, entry_tuple in enumerate(entries, 1):

1775

playlist_index, entry = entry_tuple

1776

if 'playlist-index' in self.params.get('compat_opts', []):

1777

playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1

1778

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1779

# This __x_forwarded_for_ip thing is a bit ugly but requires

1780

# minimal changes

1781

if x_forwarded_for:

1782

entry['__x_forwarded_for_ip'] = x_forwarded_for

1783

extra = {

1784

'n_entries': n_entries,

1785

'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1786

'playlist_count': ie_result.get('playlist_count'),

1787

'playlist_index': playlist_index,

1788

'playlist_autonumber': i,

1789

'playlist': playlist,

1790

'playlist_id': ie_result.get('id'),

1791

'playlist_title': ie_result.get('title'),

1792

'playlist_uploader': ie_result.get('uploader'),

1793

'playlist_uploader_id': ie_result.get('uploader_id'),

1794

'extractor': ie_result['extractor'],

1795

'webpage_url': ie_result['webpage_url'],

1796

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1797

'webpage_url_domain': get_domain(ie_result['webpage_url']),

1798

'extractor_key': ie_result['extractor_key'],

1799

}

1800

1801

if self._match_entry(entry, incomplete=True) is not None:

1802

continue

1803

1804

entry_result = self.__process_iterable_entry(entry, download, extra)

1805

if not entry_result:

1806

failures += 1

1807

if failures >= max_failures:

1808

self.report_error(

1809

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1810

break

1811

playlist_results.append(entry_result)

1812

ie_result['entries'] = playlist_results

1813

1814

# Write the updated info to json

1815

if _infojson_written and self._write_info_json(

1816

'updated playlist', ie_result,

1817

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1818

return

1819

1820

ie_result = self.run_all_pps('playlist', ie_result)

1821

self.to_screen(f'[download] Finished downloading playlist: {playlist}')

1822

return ie_result

1823

1824

@__handle_extraction_exceptions

1825

def __process_iterable_entry(self, entry, download, extra_info):

1826

return self.process_ie_result(

1827

entry, download=download, extra_info=extra_info)

1828

1829

def _build_format_filter(self, filter_spec):

1830

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1841

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1842

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1843

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1844

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1845

m = operator_rex.fullmatch(filter_spec)

1846

if m:

1847

try:

1848

comparison_value = int(m.group('value'))

1849

except ValueError:

1850

comparison_value = parse_filesize(m.group('value'))

1851

if comparison_value is None:

1852

comparison_value = parse_filesize(m.group('value') + 'B')

1853

if comparison_value is None:

1854

raise ValueError(

1855

'Invalid value %r in format specification %r' % (

1856

m.group('value'), filter_spec))

1857

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1863

'$=': lambda attr, value: attr.endswith(value),

1864

'*=': lambda attr, value: value in attr,

1865

'~=': lambda attr, value: value.search(attr) is not None

1866

}

1867

str_operator_rex = re.compile(r'''(?x)\s*

1868

(?P<key>[a-zA-Z0-9._-]+)\s*

1869

(?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?

1870

(?P<quote>["'])?

1871

(?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))

1872

(?(quote)(?P=quote))\s*

1873

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1874

m = str_operator_rex.fullmatch(filter_spec)

1875

if m:

1876

if m.group('op') == '~=':

1877

comparison_value = re.compile(m.group('value'))

1878

else:

1879

comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))

1880

str_op = STR_OPERATORS[m.group('op')]

1881

if m.group('negation'):

1882

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1888

1889

def _filter(f):

1890

actual_value = f.get(m.group('key'))

1891

if actual_value is None:

1892

return m.group('none_inclusive')

1893

return op(actual_value, comparison_value)

1894

return _filter

1895

1896

def _check_formats(self, formats):

1897

for f in formats:

1898

self.to_screen('[info] Testing format %s' % f['format_id'])

1899

path = self.get_output_path('temp')

1900

if not self._ensure_dir_exists(f'{path}/'):

1901

continue

1902

temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)

1903

temp_file.close()

1904

try:

1905

success, _ = self.dl(temp_file.name, f, test=True)

1906

except (DownloadError, IOError, OSError, ValueError) + network_exceptions:

1907

success = False

1908

finally:

1909

if os.path.exists(temp_file.name):

1910

try:

1911

os.remove(temp_file.name)

1912

except OSError:

1913

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1918

1919

def _default_format_spec(self, info_dict, download=True):

1920

1921

def can_merge():

1922

merger = FFmpegMergerPP(self)

1923

return merger.available and merger.can_merge()

1924

1925

prefer_best = (

1926

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1931

or self.outtmpl_dict['default'] == '-'))

1932

compat = (

1933

prefer_best

1934

or self.params.get('allow_multiple_audio_streams', False)

1935

or 'format-spec' in self.params.get('compat_opts', []))

1936

1937

return (

1938

'best/bestvideo+bestaudio' if prefer_best

1939

else 'bestvideo*+bestaudio/best' if not compat

1940

else 'bestvideo+bestaudio/best')

1941

1942

def build_format_selector(self, format_spec):

1943

def syntax_error(note, start):

1944

message = (

1945

'Invalid format specification: '

1946

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1947

return SyntaxError(message)

1948

1949

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1954

1955

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1956

'video': self.params.get('allow_multiple_video_streams', False)}

1957

1958

check_formats = self.params.get('check_formats') == 'selected'

1959

1960

def _parse_filter(tokens):

1961

filter_parts = []

1962

for type, string, start, _, _ in tokens:

1963

if type == tokenize.OP and string == ']':

1964

return ''.join(filter_parts)

1965

else:

1966

filter_parts.append(string)

1967

1968

def _remove_unused_ops(tokens):

1969

# Remove operators that we don't use and join them with the surrounding strings

1970

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1971

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1972

last_string, last_start, last_end, last_line = None, None, None, None

1973

for type, string, start, end, line in tokens:

1974

if type == tokenize.OP and string == '[':

1975

if last_string:

1976

yield tokenize.NAME, last_string, last_start, last_end, last_line

1977

last_string = None

1978

yield type, string, start, end, line

1979

# everything inside brackets will be handled by _parse_filter

1980

for type, string, start, end, line in tokens:

1981

yield type, string, start, end, line

1982

if type == tokenize.OP and string == ']':

1983

break

1984

elif type == tokenize.OP and string in ALLOWED_OPS:

1985

if last_string:

1986

yield tokenize.NAME, last_string, last_start, last_end, last_line

1987

last_string = None

1988

yield type, string, start, end, line

1989

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1996

if last_string:

1997

yield tokenize.NAME, last_string, last_start, last_end, last_line

1998

1999

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

2000

selectors = []

2001

current_selector = None

2002

for type, string, start, _, _ in tokens:

2003

# ENCODING is only defined in python 3.x

2004

if type == getattr(tokenize, 'ENCODING', None):

2005

continue

2006

elif type in [tokenize.NAME, tokenize.NUMBER]:

2007

current_selector = FormatSelector(SINGLE, string, [])

2008

elif type == tokenize.OP:

2009

if string == ')':

2010

if not inside_group:

2011

# ')' will be handled by the parentheses group

2012

tokens.restore_last_token()

2013

break

2014

elif inside_merge and string in ['/', ',']:

2015

tokens.restore_last_token()

2016

break

2017

elif inside_choice and string == ',':

2018

tokens.restore_last_token()

2019

break

2020

elif string == ',':

2021

if not current_selector:

2022

raise syntax_error('"," must follow a format selector', start)

2023

selectors.append(current_selector)

2024

current_selector = None

2025

elif string == '/':

2026

if not current_selector:

2027

raise syntax_error('"/" must follow a format selector', start)

2028

first_choice = current_selector

2029

second_choice = _parse_format_selection(tokens, inside_choice=True)

2030

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

2031

elif string == '[':

2032

if not current_selector:

2033

current_selector = FormatSelector(SINGLE, 'best', [])

2034

format_filter = _parse_filter(tokens)

2035

current_selector.filters.append(format_filter)

2036

elif string == '(':

2037

if current_selector:

2038

raise syntax_error('Unexpected "("', start)

2039

group = _parse_format_selection(tokens, inside_group=True)

2040

current_selector = FormatSelector(GROUP, group, [])

2041

elif string == '+':

2042

if not current_selector:

2043

raise syntax_error('Unexpected "+"', start)

2044

selector_1 = current_selector

2045

selector_2 = _parse_format_selection(tokens, inside_merge=True)

2046

if not selector_2:

2047

raise syntax_error('Expected a selector', start)

2048

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

2049

else:

2050

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

2051

elif type == tokenize.ENDMARKER:

2052

break

2053

if current_selector:

2054

selectors.append(current_selector)

2055

return selectors

2056

2057

def _merge(formats_pair):

2058

format_1, format_2 = formats_pair

2059

2060

formats_info = []

2061

formats_info.extend(format_1.get('requested_formats', (format_1,)))

2062

formats_info.extend(format_2.get('requested_formats', (format_2,)))

2063

2064

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

2065

get_no_more = {'video': False, 'audio': False}

2066

for (i, fmt_info) in enumerate(formats_info):

2067

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

2068

formats_info.pop(i)

2069

continue

2070

for aud_vid in ['audio', 'video']:

2071

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

2072

if get_no_more[aud_vid]:

2073

formats_info.pop(i)

2074

break

2075

get_no_more[aud_vid] = True

2076

2077

if len(formats_info) == 1:

2078

return formats_info[0]

2079

2080

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

2081

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

2082

2083

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

2084

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

2085

2086

output_ext = self.params.get('merge_output_format')

2087

if not output_ext:

2088

if the_only_video:

2089

output_ext = the_only_video['ext']

2090

elif the_only_audio and not video_fmts:

2091

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

2096

2097

new_dict = {

2098

'requested_formats': formats_info,

2099

'format': '+'.join(filtered('format')),

2100

'format_id': '+'.join(filtered('format_id')),

2101

'ext': output_ext,

2102

'protocol': '+'.join(map(determine_protocol, formats_info)),

2103

'language': '+'.join(orderedSet(filtered('language'))) or None,

2104

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

2105

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

2106

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

2112

'height': the_only_video.get('height'),

2113

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

2114

'fps': the_only_video.get('fps'),

2115

'dynamic_range': the_only_video.get('dynamic_range'),

2116

'vcodec': the_only_video.get('vcodec'),

2117

'vbr': the_only_video.get('vbr'),

2118

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

2124

'abr': the_only_audio.get('abr'),

2125

'asr': the_only_audio.get('asr'),

})

return new_dict

def _check_formats(formats):

2131

if not check_formats:

2132

yield from formats

2133

return

2134

yield from self._check_formats(formats)

2135

2136

def _build_selector_function(selector):

2137

if isinstance(selector, list): # ,

2138

fs = [_build_selector_function(s) for s in selector]

2139

2140

def selector_function(ctx):

2141

for f in fs:

2142

yield from f(ctx)

2143

return selector_function

2144

2145

elif selector.type == GROUP: # ()

2146

selector_function = _build_selector_function(selector.selector)

2147

2148

elif selector.type == PICKFIRST: # /

2149

fs = [_build_selector_function(s) for s in selector.selector]

2150

2151

def selector_function(ctx):

2152

for f in fs:

2153

picked_formats = list(f(ctx))

2154

if picked_formats:

2155

return picked_formats

2156

return []

2157

2158

elif selector.type == MERGE: # +

2159

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2160

2161

def selector_function(ctx):

2162

for pair in itertools.product(selector_1(ctx), selector_2(ctx)):

2163

yield _merge(pair)

2164

2165

elif selector.type == SINGLE: # atom

2166

format_spec = selector.selector or 'best'

2167

2168

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2169

if format_spec == 'all':

2170

def selector_function(ctx):

2171

yield from _check_formats(ctx['formats'][::-1])

2172

elif format_spec == 'mergeall':

2173

def selector_function(ctx):

2174

formats = list(_check_formats(ctx['formats']))

2175

if not formats:

2176

return

2177

merged_format = formats[-1]

2178

for f in formats[-2::-1]:

2179

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, format_reverse, format_idx = False, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2189

format_reverse = mobj.group('bw')[0] == 'b'

2190

format_type = (mobj.group('type') or [None])[0]

2191

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2192

format_modified = mobj.group('mod') is not None

2193

2194

format_fallback = not format_type and not format_modified # for b, w

2195

_filter_f = (

2196

(lambda f: f.get('%scodec' % format_type) != 'none')

2197

if format_type and format_modified # bv*, ba*, wv*, wa*

2198

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2199

if format_type # bv, ba, wv, wa

2200

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2201

if not format_modified # b, w

2202

else lambda f: True) # b*, w*

2203

filter_f = lambda f: _filter_f(f) and (

2204

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2205

else:

2206

if format_spec in self._format_selection_exts['audio']:

2207

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2208

elif format_spec in self._format_selection_exts['video']:

2209

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2210

elif format_spec in self._format_selection_exts['storyboards']:

2211

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2212

else:

2213

filter_f = lambda f: f.get('format_id') == format_spec # id

2214

2215

def selector_function(ctx):

2216

formats = list(ctx['formats'])

2217

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2218

if format_fallback and ctx['incomplete_formats'] and not matches:

2219

# for extractors with incomplete formats (audio only (soundcloud)

2220

# or video only (imgur)) best/worst will fallback to

2221

# best/worst {video,audio}-only format

2222

matches = formats

2223

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2224

try:

2225

yield matches[format_idx - 1]

except IndexError:

return

filters = [self._build_format_filter(f) for f in selector.filters]

2230

2231

def final_selector(ctx):

2232

ctx_copy = dict(ctx)

2233

for _filter in filters:

2234

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2235

return selector_function(ctx_copy)

2236

return final_selector

2237

2238

stream = io.BytesIO(format_spec.encode('utf-8'))

2239

try:

2240

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

2241

except tokenize.TokenError:

2242

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2243

2244

class TokenIterator(object):

2245

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2254

raise StopIteration()

2255

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2262

self.counter -= 1

2263

2264

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2265

return _build_selector_function(parsed_selector)

2266

2267

def _calc_headers(self, info_dict):

2268

res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})

2269

2270

cookies = self._calc_cookies(info_dict)

2271

if cookies:

2272

res['Cookie'] = cookies

2273

2274

if 'X-Forwarded-For' not in res:

2275

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2276

if x_forwarded_for_ip:

2277

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

2282

pr = sanitized_Request(info_dict['url'])

2283

self.cookiejar.add_cookie_header(pr)

2284

return pr.get_header('Cookie')

2285

2286

def _sort_thumbnails(self, thumbnails):

2287

thumbnails.sort(key=lambda t: (

2288

t.get('preference') if t.get('preference') is not None else -1,

2289

t.get('width') if t.get('width') is not None else -1,

2290

t.get('height') if t.get('height') is not None else -1,

2291

t.get('id') if t.get('id') is not None else '',

2292

t.get('url')))

2293

2294

def _sanitize_thumbnails(self, info_dict):

2295

thumbnails = info_dict.get('thumbnails')

2296

if thumbnails is None:

2297

thumbnail = info_dict.get('thumbnail')

2298

if thumbnail:

2299

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2304

for t in thumbnails:

2305

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2306

try:

2307

self.urlopen(HEADRequest(t['url']))

2308

except network_exceptions as err:

2309

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2314

for i, t in enumerate(thumbnails):

2315

if t.get('id') is None:

2316

t['id'] = '%d' % i

2317

if t.get('width') and t.get('height'):

2318

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2319

t['url'] = sanitize_url(t['url'])

2320

2321

if self.params.get('check_formats') is True:

2322

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2323

else:

2324

info_dict['thumbnails'] = thumbnails

2325

2326

def _fill_common_fields(self, info_dict, is_video=True):

2327

# TODO: move sanitization here

2328

if is_video:

2329

# playlists are allowed to lack "title"

2330

info_dict['fulltitle'] = info_dict.get('title')

2331

if 'title' not in info_dict:

2332

raise ExtractorError('Missing "title" field in extractor result',

2333

video_id=info_dict['id'], ie=info_dict['extractor'])

2334

elif not info_dict.get('title'):

2335

self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')

2336

info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'

2337

2338

if info_dict.get('duration') is not None:

2339

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2340

2341

for ts_key, date_key in (

2342

('timestamp', 'upload_date'),

2343

('release_timestamp', 'release_date'),

2344

('modified_timestamp', 'modified_date'),

2345

):

2346

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2347

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2348

# see http://bugs.python.org/issue1646728)

2349

try:

2350

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2351

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2352

except (ValueError, OverflowError, OSError):

2353

pass

2354

2355

live_keys = ('is_live', 'was_live')

2356

live_status = info_dict.get('live_status')

2357

if live_status is None:

2358

for key in live_keys:

2359

if info_dict.get(key) is False:

2360

continue

2361

if info_dict.get(key):

2362

live_status = key

2363

break

2364

if all(info_dict.get(key) is False for key in live_keys):

2365

live_status = 'not_live'

2366

if live_status:

2367

info_dict['live_status'] = live_status

2368

for key in live_keys:

2369

if info_dict.get(key) is None:

2370

info_dict[key] = (live_status == key)

2371

2372

# Auto generate title fields corresponding to the *_number fields when missing

2373

# in order to always have clean titles. This is very common for TV series.

2374

for field in ('chapter', 'season', 'episode'):

2375

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2376

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2377

2378

def process_video_result(self, info_dict, download=True):

2379

assert info_dict.get('_type', 'video') == 'video'

2380

self._num_videos += 1

2381

2382

if 'id' not in info_dict:

2383

raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])

2384

elif not info_dict.get('id'):

2385

raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])

2386

2387

def report_force_conversion(field, field_not, conversion):

2388

self.report_warning(

2389

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2390

% (field, field_not, conversion))

2391

2392

def sanitize_string_field(info, string_field):

2393

field = info.get(string_field)

2394

if field is None or isinstance(field, compat_str):

2395

return

2396

report_force_conversion(string_field, 'a string', 'string')

2397

info[string_field] = compat_str(field)

2398

2399

def sanitize_numeric_fields(info):

2400

for numeric_field in self._NUMERIC_FIELDS:

2401

field = info.get(numeric_field)

2402

if field is None or isinstance(field, compat_numeric_types):

2403

continue

2404

report_force_conversion(numeric_field, 'numeric', 'int')

2405

info[numeric_field] = int_or_none(field)

2406

2407

sanitize_string_field(info_dict, 'id')

2408

sanitize_numeric_fields(info_dict)

2409

if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):

2410

self.report_warning('"duration" field is negative, there is an error in extractor')

2411

2412

if 'playlist' not in info_dict:

2413

# It isn't part of a playlist

2414

info_dict['playlist'] = None

2415

info_dict['playlist_index'] = None

2416

2417

self._sanitize_thumbnails(info_dict)

2418

2419

thumbnail = info_dict.get('thumbnail')

2420

thumbnails = info_dict.get('thumbnails')

2421

if thumbnail:

2422

info_dict['thumbnail'] = sanitize_url(thumbnail)

2423

elif thumbnails:

2424

info_dict['thumbnail'] = thumbnails[-1]['url']

2425

2426

if info_dict.get('display_id') is None and 'id' in info_dict:

2427

info_dict['display_id'] = info_dict['id']

2428

2429

self._fill_common_fields(info_dict)

2430

2431

for cc_kind in ('subtitles', 'automatic_captions'):

2432

cc = info_dict.get(cc_kind)

2433

if cc:

2434

for _, subtitle in cc.items():

2435

for subtitle_format in subtitle:

2436

if subtitle_format.get('url'):

2437

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2438

if subtitle_format.get('ext') is None:

2439

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2440

2441

automatic_captions = info_dict.get('automatic_captions')

2442

subtitles = info_dict.get('subtitles')

2443

2444

info_dict['requested_subtitles'] = self.process_subtitles(

2445

info_dict['id'], subtitles, automatic_captions)

2446

2447

if info_dict.get('formats') is None:

2448

# There's only one format available

2449

formats = [info_dict]

2450

else:

2451

formats = info_dict['formats']

2452

2453

info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)

2454

if not self.params.get('allow_unplayable_formats'):

2455

formats = [f for f in formats if not f.get('has_drm')]

2456

2457

get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))

2458

if not get_from_start:

2459

info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')

2460

if info_dict.get('is_live') and formats:

2461

formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]

2462

if get_from_start and not formats:

2463

self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '

2464

'If you want to download from the current time, pass --no-live-from-start')

2465

2466

if not formats:

2467

self.raise_no_formats(info_dict)

2468

2469

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2474

'there is an error in extractor')

2475

return False

2476

if isinstance(url, bytes):

2477

sanitize_string_field(f, 'url')

2478

return True

2479

2480

# Filter out malformed formats for better extraction robustness

2481

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2486

for i, format in enumerate(formats):

2487

sanitize_string_field(format, 'format_id')

2488

sanitize_numeric_fields(format)

2489

format['url'] = sanitize_url(format['url'])

2490

if not format.get('format_id'):

2491

format['format_id'] = compat_str(i)

2492

else:

2493

# Sanitize format_id from characters used in format selector expression

2494

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2495

format_id = format['format_id']

2496

if format_id not in formats_dict:

2497

formats_dict[format_id] = []

2498

formats_dict[format_id].append(format)

2499

2500

# Make sure all formats have unique format_id

2501

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2502

for format_id, ambiguous_formats in formats_dict.items():

2503

ambigious_id = len(ambiguous_formats) > 1

2504

for i, format in enumerate(ambiguous_formats):

2505

if ambigious_id:

2506

format['format_id'] = '%s-%d' % (format_id, i)

2507

if format.get('ext') is None:

2508

format['ext'] = determine_ext(format['url']).lower()

2509

# Ensure there is no conflict between id and ext in format selection

2510

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2511

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2512

format['format_id'] = 'f%s' % format['format_id']

2513

2514

for i, format in enumerate(formats):

2515

if format.get('format') is None:

2516

format['format'] = '{id} - {res}{note}'.format(

2517

id=format['format_id'],

2518

res=self.format_resolution(format),

2519

note=format_field(format, 'format_note', ' (%s)'),

2520

)

2521

if format.get('protocol') is None:

2522

format['protocol'] = determine_protocol(format)

2523

if format.get('resolution') is None:

2524

format['resolution'] = self.format_resolution(format, default=None)

2525

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2526

format['dynamic_range'] = 'SDR'

2527

if (info_dict.get('duration') and format.get('tbr')

2528

and not format.get('filesize') and not format.get('filesize_approx')):

2529

format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)

2530

2531

# Add HTTP headers, so that external programs can use them from the

2532

# json output

2533

full_format_info = info_dict.copy()

2534

full_format_info.update(format)

2535

format['http_headers'] = self._calc_headers(full_format_info)

2536

# Remove private housekeeping stuff

2537

if '__x_forwarded_for_ip' in info_dict:

2538

del info_dict['__x_forwarded_for_ip']

2539

2540

if self.params.get('check_formats') is True:

2541

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2542

2543

if not formats or formats[0] is not info_dict:

2544

# only set the 'formats' fields if the original info_dict list them

2545

# otherwise we end up with a circular reference, the first (and unique)

2546

# element in the 'formats' field in info_dict is info_dict itself,

2547

# which can't be exported to json

2548

info_dict['formats'] = formats

2549

2550

info_dict, _ = self.pre_process(info_dict)

2551

2552

if self._match_entry(info_dict) is not None:

2553

return info_dict

2554

2555

self.post_extract(info_dict)

2556

info_dict, _ = self.pre_process(info_dict, 'after_filter')

2557

2558

# The pre-processors may have modified the formats

2559

formats = info_dict.get('formats', [info_dict])

2560

2561

list_only = self.params.get('simulate') is None and (

2562

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2563

interactive_format_selection = not list_only and self.format_selector == '-'

2564

if self.params.get('list_thumbnails'):

2565

self.list_thumbnails(info_dict)

2566

if self.params.get('listsubtitles'):

2567

if 'automatic_captions' in info_dict:

2568

self.list_subtitles(

2569

info_dict['id'], automatic_captions, 'automatic captions')

2570

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2571

if self.params.get('listformats') or interactive_format_selection:

2572

self.list_formats(info_dict)

2573

if list_only:

2574

# Without this printing, -F --print-json will not work

2575

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2576

return

2577

2578

format_selector = self.format_selector

2579

if format_selector is None:

2580

req_format = self._default_format_spec(info_dict, download=download)

2581

self.write_debug('Default format spec: %s' % req_format)

2582

format_selector = self.build_format_selector(req_format)

2583

2584

while True:

2585

if interactive_format_selection:

2586

req_format = input(

2587

self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))

2588

try:

2589

format_selector = self.build_format_selector(req_format)

2590

except SyntaxError as err:

2591

self.report_error(err, tb=False, is_error=False)

2592

continue

2593

2594

# While in format selection we may need to have an access to the original

2595

# format set in order to calculate some metrics or do some processing.

2596

# For now we need to be able to guess whether original formats provided

2597

# by extractor are incomplete or not (i.e. whether extractor provides only

2598

# video-only or audio-only formats) for proper formats selection for

2599

# extractors with such incomplete formats (see

2600

# https://github.com/ytdl-org/youtube-dl/pull/5556).

2601

# Since formats may be filtered during format selection and may not match

2602

# the original formats the results may be incorrect. Thus original formats

2603

# or pre-calculated metrics should be passed to format selection routines

2604

# as well.

2605

# We will pass a context object containing all necessary additional data

2606

# instead of just formats.

2607

# This fixes incorrect format selection issue (see

2608

# https://github.com/ytdl-org/youtube-dl/issues/10083).

2609

incomplete_formats = (

2610

# All formats are video-only or

2611

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2612

# all formats are audio-only

2613

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

2618

}

2619

2620

formats_to_download = list(format_selector(ctx))

2621

if interactive_format_selection and not formats_to_download:

2622

self.report_error('Requested format is not available', tb=False, is_error=False)

continue

break

if not formats_to_download:

2627

if not self.params.get('ignore_no_formats_error'):

2628

raise ExtractorError('Requested format is not available', expected=True,

2629

video_id=info_dict['id'], ie=info_dict['extractor'])

2630

self.report_warning('Requested format is not available')

2631

# Process what we can, even without any available formats.

2632

formats_to_download = [{}]

2633

2634

best_format = formats_to_download[-1]

if download:

if best_format:

self.to_screen(

f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '

2639

+ ', '.join([f['format_id'] for f in formats_to_download]))

2640

max_downloads_reached = False

2641

for i, fmt in enumerate(formats_to_download):

2642

formats_to_download[i] = new_info = self._copy_infodict(info_dict)

2643

new_info.update(fmt)

2644

try:

2645

self.process_info(new_info)

2646

except MaxDownloadsReached:

2647

max_downloads_reached = True

2648

# Remove copied info

2649

for key, val in tuple(new_info.items()):

2650

if info_dict.get(key) == val:

2651

new_info.pop(key)

2652

if max_downloads_reached:

2653

break

2654

2655

write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)

2656

assert write_archive.issubset({True, False, 'ignore'})

2657

if True in write_archive and False not in write_archive:

2658

self.record_download_archive(info_dict)

2659

2660

info_dict['requested_downloads'] = formats_to_download

2661

info_dict = self.run_all_pps('after_video', info_dict)

2662

if max_downloads_reached:

2663

raise MaxDownloadsReached()

2664

2665

# We update the info dict with the selected best quality format (backwards compatibility)

2666

info_dict.update(best_format)

2667

return info_dict

2668

2669

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2670

"""Select the requested subtitles and their format"""

2671

available_subs = {}

2672

if normal_subtitles and self.params.get('writesubtitles'):

2673

available_subs.update(normal_subtitles)

2674

if automatic_captions and self.params.get('writeautomaticsub'):

2675

for lang, cap_info in automatic_captions.items():

2676

if lang not in available_subs:

2677

available_subs[lang] = cap_info

2678

2679

if (not self.params.get('writesubtitles') and not

2680

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = available_subs.keys()

2685

if self.params.get('allsubtitles', False):

2686

requested_langs = all_sub_langs

2687

elif self.params.get('subtitleslangs', False):

2688

# A list is used so that the order of languages will be the same as

2689

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2690

requested_langs = []

2691

for lang_re in self.params.get('subtitleslangs'):

2692

discard = lang_re[0] == '-'

2693

if discard:

2694

lang_re = lang_re[1:]

if lang_re == 'all':

if discard:

requested_langs = []

else:

requested_langs.extend(all_sub_langs)

2700

continue

2701

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2702

if discard:

2703

for lang in current_langs:

2704

while lang in requested_langs:

2705

requested_langs.remove(lang)

2706

else:

2707

requested_langs.extend(current_langs)

2708

requested_langs = orderedSet(requested_langs)

2709

elif 'en' in available_subs:

2710

requested_langs = ['en']

2711

else:

2712

requested_langs = [list(all_sub_langs)[0]]

2713

if requested_langs:

2714

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2715

2716

formats_query = self.params.get('subtitlesformat', 'best')

2717

formats_preference = formats_query.split('/') if formats_query else []

2718

subs = {}

2719

for lang in requested_langs:

2720

formats = available_subs.get(lang)

2721

if formats is None:

2722

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

2723

continue

2724

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2736

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def _forceprint(self, key, info_dict):

2741

if info_dict is None:

2742

return

2743

info_copy = info_dict.copy()

2744

info_copy['formats_table'] = self.render_formats_table(info_dict)

2745

info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)

2746

info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))

2747

info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))

2748

2749

def format_tmpl(tmpl):

2750

mobj = re.match(r'\w+(=?)$', tmpl)

2751

if mobj and mobj.group(1):

2752

return f'{tmpl[:-1]} = %({tmpl[:-1]})r'

elif mobj:

return f'%({tmpl})s'

return tmpl

for tmpl in self.params['forceprint'].get(key, []):

2758

self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))

2759

2760

for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):

2761

filename = self.evaluate_outtmpl(file_tmpl, info_dict)

2762

tmpl = format_tmpl(tmpl)

2763

self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')

2764

if self._ensure_dir_exists(filename):

2765

with io.open(filename, 'a', encoding='utf-8') as f:

2766

f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')

2767

2768

def __forced_printings(self, info_dict, filename, incomplete):

2769

def print_mandatory(field, actual_field=None):

2770

if actual_field is None:

2771

actual_field = field

2772

if (self.params.get('force%s' % field, False)

2773

and (not incomplete or info_dict.get(actual_field) is not None)):

2774

self.to_stdout(info_dict[actual_field])

2775

2776

def print_optional(field):

2777

if (self.params.get('force%s' % field, False)

2778

and info_dict.get(field) is not None):

2779

self.to_stdout(info_dict[field])

2780

2781

info_dict = info_dict.copy()

2782

if filename is not None:

2783

info_dict['filename'] = filename

2784

if info_dict.get('requested_formats') is not None:

2785

# For RTMP URLs, also include the playpath

2786

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2787

elif info_dict.get('url'):

2788

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2789

2790

if (self.params.get('forcejson')

2791

or self.params['forceprint'].get('video')

2792

or self.params['print_to_file'].get('video')):

2793

self.post_extract(info_dict)

2794

self._forceprint('video', info_dict)

2795

2796

print_mandatory('title')

2797

print_mandatory('id')

2798

print_mandatory('url', 'urls')

2799

print_optional('thumbnail')

2800

print_optional('description')

2801

print_optional('filename')

2802

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2803

self.to_stdout(formatSeconds(info_dict['duration']))

2804

print_mandatory('format')

2805

2806

if self.params.get('forcejson'):

2807

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2808

2809

def dl(self, name, info, subtitle=False, test=False):

2810

if not info.get('url'):

2811

self.raise_no_formats(info, True)

2812

2813

if test:

2814

verbose = self.params.get('verbose')

2815

params = {

2816

'test': True,

2817

'quiet': self.params.get('quiet') or not verbose,

2818

'verbose': verbose,

2819

'noprogress': not verbose,

2820

'nopart': True,

2821

'skip_unavailable_fragments': False,

2822

'keep_fragments': False,

2823

'overwrites': True,

2824

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2829

if not test:

2830

for ph in self._progress_hooks:

2831

fd.add_progress_hook(ph)

2832

urls = '", "'.join(

2833

(f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])

2834

for f in info.get('requested_formats', []) or [info])

2835

self.write_debug('Invoking downloader on "%s"' % urls)

2836

2837

# Note: Ideally info should be a deep-copied so that hooks cannot modify it.

2838

# But it may contain objects that are not deep-copyable

2839

new_info = self._copy_infodict(info)

2840

if new_info.get('http_headers') is None:

2841

new_info['http_headers'] = self._calc_headers(new_info)

2842

return fd.download(name, new_info, subtitle)

2843

2844

def existing_file(self, filepaths, *, default_overwrite=True):

2845

existing_files = list(filter(os.path.exists, orderedSet(filepaths)))

2846

if existing_files and not self.params.get('overwrites', default_overwrite):

2847

return existing_files[0]

2848

2849

for file in existing_files:

2850

self.report_file_delete(file)

os.remove(file)

return None

def process_info(self, info_dict):

2855

"""Process a single resolved IE result. (Modifies it in-place)"""

2856

2857

assert info_dict.get('_type', 'video') == 'video'

2858

original_infodict = info_dict

2859

2860

if 'format' not in info_dict and 'ext' in info_dict:

2861

info_dict['format'] = info_dict['ext']

2862

2863

# This is mostly just for backward compatibility of process_info

2864

# As a side-effect, this allows for format-specific filters

2865

if self._match_entry(info_dict) is not None:

2866

info_dict['__write_download_archive'] = 'ignore'

2867

return

2868

2869

# Does nothing under normal operation - for backward compatibility of process_info

2870

self.post_extract(info_dict)

2871

2872

# info_dict['_filename'] needs to be set for backward compatibility

2873

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2874

temp_filename = self.prepare_filename(info_dict, 'temp')

2875

files_to_move = {}

2876

2877

self._num_downloads += 1

2878

2879

# Forced printings

2880

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2881

2882

if self.params.get('simulate'):

2883

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

2884

return

2885

2886

if full_filename is None:

2887

return

2888

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2889

return

2890

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2891

return

2892

2893

if self._write_description('video', info_dict,

2894

self.prepare_filename(info_dict, 'description')) is None:

2895

return

2896

2897

sub_files = self._write_subtitles(info_dict, temp_filename)

2898

if sub_files is None:

2899

return

2900

files_to_move.update(dict(sub_files))

2901

2902

thumb_files = self._write_thumbnails(

2903

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2904

if thumb_files is None:

2905

return

2906

files_to_move.update(dict(thumb_files))

2907

2908

infofn = self.prepare_filename(info_dict, 'infojson')

2909

_infojson_written = self._write_info_json('video', info_dict, infofn)

2910

if _infojson_written:

2911

info_dict['infojson_filename'] = infofn

2912

# For backward compatibility, even though it was a private field

2913

info_dict['__infojson_filename'] = infofn

2914

elif _infojson_written is None:

2915

return

2916

2917

# Note: Annotations are deprecated

2918

annofn = None

2919

if self.params.get('writeannotations', False):

2920

annofn = self.prepare_filename(info_dict, 'annotation')

2921

if annofn:

2922

if not self._ensure_dir_exists(encodeFilename(annofn)):

2923

return

2924

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2925

self.to_screen('[info] Video annotations are already present')

2926

elif not info_dict.get('annotations'):

2927

self.report_warning('There are no annotations to write.')

2928

else:

2929

try:

2930

self.to_screen('[info] Writing video annotations to: ' + annofn)

2931

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2932

annofile.write(info_dict['annotations'])

2933

except (KeyError, TypeError):

2934

self.report_warning('There are no annotations to write.')

2935

except (OSError, IOError):

2936

self.report_error('Cannot write annotations file: ' + annofn)

2937

return

2938

2939

# Write internet shortcut files

2940

def _write_link_file(link_type):

2941

url = try_get(info_dict['webpage_url'], iri_to_uri)

2942

if not url:

2943

self.report_warning(

2944

f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')

2945

return True

2946

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

2947

if not self._ensure_dir_exists(encodeFilename(linkfn)):

2948

return False

2949

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2950

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

2951

return True

2952

try:

2953

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

2954

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

2955

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

2956

template_vars = {'url': url}

2957

if link_type == 'desktop':

2958

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

2959

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

2960

except (OSError, IOError):

2961

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

2967

'webloc': self.params.get('writewebloclink'),

2968

'desktop': self.params.get('writedesktoplink'),

2969

}

2970

if self.params.get('writelink'):

2971

link_type = ('webloc' if sys.platform == 'darwin'

2972

else 'desktop' if sys.platform.startswith('linux')

2973

else 'url')

2974

write_links[link_type] = True

2975

2976

if any(should_write and not _write_link_file(link_type)

2977

for link_type, should_write in write_links.items()):

2978

return

2979

2980

def replace_info_dict(new_info):

2981

nonlocal info_dict

2982

if new_info == info_dict:

2983

return

2984

info_dict.clear()

2985

info_dict.update(new_info)

2986

2987

try:

2988

new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2989

replace_info_dict(new_info)

2990

except PostProcessingError as err:

2991

self.report_error('Preprocessing: %s' % str(err))

2992

return

2993

2994

if self.params.get('skip_download'):

2995

info_dict['filepath'] = temp_filename

2996

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2997

info_dict['__files_to_move'] = files_to_move

2998

replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))

2999

info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')

3000

else:

3001

# Download

3002

info_dict.setdefault('__postprocessors', [])

3003

try:

3004

3005

def existing_video_file(*filepaths):

3006

ext = info_dict.get('ext')

3007

converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)

3008

file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),

3009

default_overwrite=False)

3010

if file:

3011

info_dict['ext'] = os.path.splitext(file)[1][1:]

return file

success = True

if info_dict.get('requested_formats') is not None:

3016

3017

def compatible_formats(formats):

3018

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

3019

video_formats = [format for format in formats if format.get('vcodec') != 'none']

3020

audio_formats = [format for format in formats if format.get('acodec') != 'none']

3021

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

3026

COMPATIBLE_EXTS = (

3027

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

3028

set(('webm',)),

3029

)

3030

for ext_sets in COMPATIBLE_EXTS:

3031

if ext_sets.issuperset(exts):

3032

return True

3033

# TODO: Check acodec/vcodec

3034

return False

3035

3036

requested_formats = info_dict['requested_formats']

3037

old_ext = info_dict['ext']

3038

if self.params.get('merge_output_format') is None:

3039

if not compatible_formats(requested_formats):

3040

info_dict['ext'] = 'mkv'

3041

self.report_warning(

3042

'Requested formats are incompatible for merge and will be merged into mkv')

3043

if (info_dict['ext'] == 'webm'

3044

and info_dict.get('thumbnails')

3045

# check with type instead of pp_key, __name__, or isinstance

3046

# since we dont want any custom PPs to trigger this

3047

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):

3048

info_dict['ext'] = 'mkv'

3049

self.report_warning(

3050

'webm doesn\'t support embedding a thumbnail, mkv will be used')

3051

new_ext = info_dict['ext']

3052

3053

def correct_ext(filename, ext=new_ext):

3054

if filename == '-':

3055

return filename

3056

filename_real_ext = os.path.splitext(filename)[1][1:]

3057

filename_wo_ext = (

3058

os.path.splitext(filename)[0]

3059

if filename_real_ext in (old_ext, new_ext)

3060

else filename)

3061

return '%s.%s' % (filename_wo_ext, ext)

3062

3063

# Ensure filename always has a correct extension for successful merge

3064

full_filename = correct_ext(full_filename)

3065

temp_filename = correct_ext(temp_filename)

3066

dl_filename = existing_video_file(full_filename, temp_filename)

3067

info_dict['__real_download'] = False

3068

3069

downloaded = []

3070

merger = FFmpegMergerPP(self)

3071

3072

fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')

3073

if dl_filename is not None:

3074

self.report_file_already_downloaded(dl_filename)

3075

elif fd:

3076

for f in requested_formats if fd != FFmpegFD else []:

3077

f['filepath'] = fname = prepend_extension(

3078

correct_ext(temp_filename, info_dict['ext']),

3079

'f%s' % f['format_id'], info_dict['ext'])

3080

downloaded.append(fname)

3081

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

3082

success, real_download = self.dl(temp_filename, info_dict)

3083

info_dict['__real_download'] = real_download

3084

else:

3085

if self.params.get('allow_unplayable_formats'):

3086

self.report_warning(

3087

'You have requested merging of multiple formats '

3088

'while also allowing unplayable formats to be downloaded. '

3089

'The formats won\'t be merged to prevent data corruption.')

3090

elif not merger.available:

3091

msg = 'You have requested merging of multiple formats but ffmpeg is not installed'

3092

if not self.params.get('ignoreerrors'):

3093

self.report_error(f'{msg}. Aborting due to --abort-on-error')

3094

return

3095

self.report_warning(f'{msg}. The formats won\'t be merged')

3096

3097

if temp_filename == '-':

3098

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)

3099

else 'but the formats are incompatible for simultaneous download' if merger.available

3100

else 'but ffmpeg is not installed')

3101

self.report_warning(

3102

f'You have requested downloading multiple formats to stdout {reason}. '

3103

'The formats will be streamed one after the other')

3104

fname = temp_filename

3105

for f in requested_formats:

3106

new_info = dict(info_dict)

3107

del new_info['requested_formats']

3108

new_info.update(f)

3109

if temp_filename != '-':

3110

fname = prepend_extension(

3111

correct_ext(temp_filename, new_info['ext']),

3112

'f%s' % f['format_id'], new_info['ext'])

3113

if not self._ensure_dir_exists(fname):

3114

return

3115

f['filepath'] = fname

3116

downloaded.append(fname)

3117

partial_success, real_download = self.dl(fname, new_info)

3118

info_dict['__real_download'] = info_dict['__real_download'] or real_download

3119

success = success and partial_success

3120

3121

if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):

3122

info_dict['__postprocessors'].append(merger)

3123

info_dict['__files_to_merge'] = downloaded

3124

# Even if there were no downloads, it is being merged only now

3125

info_dict['__real_download'] = True

3126

else:

3127

for file in downloaded:

3128

files_to_move[file] = None

3129

else:

3130

# Just a single file

3131

dl_filename = existing_video_file(full_filename, temp_filename)

3132

if dl_filename is None or dl_filename == temp_filename:

3133

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

3134

# So we should try to resume the download

3135

success, real_download = self.dl(temp_filename, info_dict)

3136

info_dict['__real_download'] = real_download

3137

else:

3138

self.report_file_already_downloaded(dl_filename)

3139

3140

dl_filename = dl_filename or temp_filename

3141

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

3142

3143

except network_exceptions as err:

3144

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

3145

return

3146

except (OSError, IOError) as err:

3147

raise UnavailableVideoError(err)

3148

except (ContentTooShortError, ) as err:

3149

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

3150

return

3151

3152

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

3157

vid = info_dict['id']

3158

3159

if fixup_policy in ('ignore', 'never'):

3160

return

3161

elif fixup_policy == 'warn':

3162

do_fixup = False

3163

elif fixup_policy != 'force':

3164

assert fixup_policy in ('detect_or_warn', None)

3165

if not info_dict.get('__real_download'):

3166

do_fixup = False

3167

3168

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

3177

else:

3178

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

3179

3180

stretched_ratio = info_dict.get('stretched_ratio')

3181

ffmpeg_fixup(

3182

stretched_ratio not in (1, None),

3183

f'Non-uniform pixel ratio {stretched_ratio}',

3184

FFmpegFixupStretchedPP)

3185

3186

ffmpeg_fixup(

3187

(info_dict.get('requested_formats') is None

3188

and info_dict.get('container') == 'm4a_dash'

3189

and info_dict.get('ext') == 'm4a'),

3190

'writing DASH m4a. Only some players support this container',

3191

FFmpegFixupM4aPP)

3192

3193

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

3194

downloader = downloader.__name__ if downloader else None

3195

3196

if info_dict.get('requested_formats') is None: # Not necessary if doing merger

3197

ffmpeg_fixup(downloader == 'HlsFD',

3198

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

3199

FFmpegFixupM3u8PP)

3200

ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',

3201

'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)

3202

3203

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

3204

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))

3209

except PostProcessingError as err:

3210

self.report_error('Postprocessing: %s' % str(err))

3211

return

3212

try:

3213

for ph in self._post_hooks:

3214

ph(info_dict['filepath'])

3215

except Exception as err:

3216

self.report_error('post hooks: %s' % str(err))

3217

return

3218

info_dict['__write_download_archive'] = True

3219

3220

if self.params.get('force_write_download_archive'):

3221

info_dict['__write_download_archive'] = True

3222

3223

# Make sure the info_dict was modified in-place

3224

assert info_dict is original_infodict

3225

3226

max_downloads = self.params.get('max_downloads')

3227

if max_downloads is not None and self._num_downloads >= int(max_downloads):

3228

raise MaxDownloadsReached()

3229

3230

def __download_wrapper(self, func):

3231

@functools.wraps(func)

3232

def wrapper(*args, **kwargs):

3233

try:

3234

res = func(*args, **kwargs)

3235

except UnavailableVideoError as e:

3236

self.report_error(e)

3237

except MaxDownloadsReached as e:

3238

self.to_screen(f'[info] {e}')

3239

raise

3240

except DownloadCancelled as e:

3241

self.to_screen(f'[info] {e}')

3242

if not self.params.get('break_per_url'):

3243

raise

3244

else:

3245

if self.params.get('dump_single_json', False):

3246

self.post_extract(res)

3247

self.to_stdout(json.dumps(self.sanitize_info(res)))

3248

return wrapper

3249

3250

def download(self, url_list):

3251

"""Download a given list of URLs."""

3252

url_list = variadic(url_list) # Passing a single URL is a common mistake

3253

outtmpl = self.outtmpl_dict['default']

3254

if (len(url_list) > 1

3255

and outtmpl != '-'

3256

and '%' not in outtmpl

3257

and self.params.get('max_downloads') != 1):

3258

raise SameFileError(outtmpl)

3259

3260

for url in url_list:

3261

self.__download_wrapper(self.extract_info)(

3262

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

3263

3264

return self._download_retcode

3265

3266

def download_with_info_file(self, info_filename):

3267

with contextlib.closing(fileinput.FileInput(

3268

[info_filename], mode='r',

3269

openhook=fileinput.hook_encoded('utf-8'))) as f:

3270

# FileInput doesn't have a read method, we can't call json.load

3271

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3272

try:

3273

self.__download_wrapper(self.process_ie_result)(info, download=True)

3274

except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:

3275

if not isinstance(e, EntryNotInPlaylist):

3276

self.to_stderr('\r')

3277

webpage_url = info.get('webpage_url')

3278

if webpage_url is not None:

3279

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3280

return self.download([webpage_url])

3281

else:

3282

raise

3283

return self._download_retcode

3284

3285

@staticmethod

3286

def sanitize_info(info_dict, remove_private_keys=False):

3287

''' Sanitize the infodict for converting to json '''

3288

if info_dict is None:

3289

return info_dict

3290

info_dict.setdefault('epoch', int(time.time()))

3291

info_dict.setdefault('_type', 'video')

3292

3293

if remove_private_keys:

3294

reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {

3295

'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',

3296

'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',

3297

}

3298

else:

3299

reject = lambda k, v: False

3300

3301

def filter_fn(obj):

3302

if isinstance(obj, dict):

3303

return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}

3304

elif isinstance(obj, (list, tuple, set, LazyList)):

3305

return list(map(filter_fn, obj))

3306

elif obj is None or isinstance(obj, (str, int, float, bool)):

return obj

else:

return repr(obj)

return filter_fn(info_dict)

3312

3313

@staticmethod

3314

def filter_requested_info(info_dict, actually_filter=True):

3315

''' Alias of sanitize_info for backward compatibility '''

3316

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3317

3318

@staticmethod

3319

def post_extract(info_dict):

3320

def actual_post_extract(info_dict):

3321

if info_dict.get('_type') in ('playlist', 'multi_video'):

3322

for video_dict in info_dict.get('entries', {}):

3323

actual_post_extract(video_dict or {})

3324

return

3325

3326

post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})

3327

info_dict.update(post_extractor())

3328

3329

actual_post_extract(info_dict or {})

3330

3331

def run_pp(self, pp, infodict):

3332

files_to_delete = []

3333

if '__files_to_move' not in infodict:

3334

infodict['__files_to_move'] = {}

3335

try:

3336

files_to_delete, infodict = pp.run(infodict)

3337

except PostProcessingError as e:

3338

# Must be True and not 'only_download'

3339

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3345

return infodict

3346

if self.params.get('keepvideo', False):

3347

for f in files_to_delete:

3348

infodict['__files_to_move'].setdefault(f, '')

3349

else:

3350

for old_filename in set(files_to_delete):

3351

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

3352

try:

3353

os.remove(encodeFilename(old_filename))

3354

except (IOError, OSError):

3355

self.report_warning('Unable to remove downloaded original file')

3356

if old_filename in infodict['__files_to_move']:

3357

del infodict['__files_to_move'][old_filename]

3358

return infodict

3359

3360

def run_all_pps(self, key, info, *, additional_pps=None):

3361

self._forceprint(key, info)

3362

for pp in (additional_pps or []) + self._pps[key]:

3363

info = self.run_pp(pp, info)

3364

return info

3365

3366

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3367

info = dict(ie_info)

3368

info['__files_to_move'] = files_to_move or {}

3369

info = self.run_all_pps(key, info)

3370

return info, info.pop('__files_to_move', None)

3371

3372

def post_process(self, filename, info, files_to_move=None):

3373

"""Run all the postprocessors on the given file."""

3374

info['filepath'] = filename

3375

info['__files_to_move'] = files_to_move or {}

3376

info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))

3377

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3378

del info['__files_to_move']

3379

return self.run_all_pps('after_move', info)

3380

3381

def _make_archive_id(self, info_dict):

3382

video_id = info_dict.get('id')

3383

if not video_id:

3384

return

3385

# Future-proof against any change in case

3386

# and backwards compatibility with prior versions

3387

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3388

if extractor is None:

3389

url = str_or_none(info_dict.get('url'))

3390

if not url:

3391

return

3392

# Try to find matching extractor for the URL and take its ie_key

3393

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return '%s %s' % (extractor.lower(), video_id)

3400

3401

def in_download_archive(self, info_dict):

3402

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

3407

if not vid_id:

3408

return False # Incomplete video information

3409

3410

return vid_id in self.archive

3411

3412

def record_download_archive(self, info_dict):

3413

fn = self.params.get('download_archive')

3414

if fn is None:

3415

return

3416

vid_id = self._make_archive_id(info_dict)

3417

assert vid_id

3418

self.write_debug(f'Adding to archive: {vid_id}')

3419

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3420

archive_file.write(vid_id + '\n')

3421

self.archive.add(vid_id)

3422

3423

@staticmethod

3424

def format_resolution(format, default='unknown'):

3425

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3426

return 'audio only'

3427

if format.get('resolution') is not None:

3428

return format['resolution']

3429

if format.get('width') and format.get('height'):

3430

return '%dx%d' % (format['width'], format['height'])

3431

elif format.get('height'):

3432

return '%sp' % format['height']

3433

elif format.get('width'):

3434

return '%dx?' % format['width']

3435

return default

3436

3437

def _list_format_headers(self, *headers):

3438

if self.params.get('listformats_table', True) is not False:

3439

return [self._format_screen(header, self.Styles.HEADERS) for header in headers]

3440

return headers

3441

3442

def _format_note(self, fdict):

3443

res = ''

3444

if fdict.get('ext') in ['f4f', 'f4m']:

3445

res += '(unsupported)'

3446

if fdict.get('language'):

3447

if res:

3448

res += ' '

3449

res += '[%s]' % fdict['language']

3450

if fdict.get('format_note') is not None:

3451

if res:

3452

res += ' '

3453

res += fdict['format_note']

3454

if fdict.get('tbr') is not None:

3455

if res:

3456

res += ', '

3457

res += '%4dk' % fdict['tbr']

3458

if fdict.get('container') is not None:

3459

if res:

3460

res += ', '

3461

res += '%s container' % fdict['container']

3462

if (fdict.get('vcodec') is not None

3463

and fdict.get('vcodec') != 'none'):

3464

if res:

3465

res += ', '

3466

res += fdict['vcodec']

3467

if fdict.get('vbr') is not None:

3468

res += '@'

3469

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3470

res += 'video@'

3471

if fdict.get('vbr') is not None:

3472

res += '%4dk' % fdict['vbr']

3473

if fdict.get('fps') is not None:

3474

if res:

3475

res += ', '

3476

res += '%sfps' % fdict['fps']

3477

if fdict.get('acodec') is not None:

3478

if res:

3479

res += ', '

3480

if fdict['acodec'] == 'none':

3481

res += 'video only'

3482

else:

3483

res += '%-5s' % fdict['acodec']

3484

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3489

res += '@%3dk' % fdict['abr']

3490

if fdict.get('asr') is not None:

3491

res += ' (%5dHz)' % fdict['asr']

3492

if fdict.get('filesize') is not None:

3493

if res:

3494

res += ', '

3495

res += format_bytes(fdict['filesize'])

3496

elif fdict.get('filesize_approx') is not None:

3497

if res:

3498

res += ', '

3499

res += '~' + format_bytes(fdict['filesize_approx'])

3500

return res

3501

3502

def render_formats_table(self, info_dict):

3503

if not info_dict.get('formats') and not info_dict.get('url'):

3504

return None

3505

3506

formats = info_dict.get('formats', [info_dict])

3507

if not self.params.get('listformats_table', True) is not False:

3508

table = [

3509

[

3510

format_field(f, 'format_id'),

3511

format_field(f, 'ext'),

3512

self.format_resolution(f),

3513

self._format_note(f)

3514

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3515

return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)

3516

3517

delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3518

table = [

3519

[

3520

self._format_screen(format_field(f, 'format_id'), self.Styles.ID),

3521

format_field(f, 'ext'),

3522

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3523

format_field(f, 'fps', '\t%d'),

3524

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3525

delim,

3526

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3527

format_field(f, 'tbr', '\t%dk'),

3528

shorten_protocol_name(f.get('protocol', '')),

3529

delim,

3530

format_field(f, 'vcodec', default='unknown').replace(

3531

'none', 'images' if f.get('acodec') == 'none'

3532

else self._format_screen('audio only', self.Styles.SUPPRESS)),

3533

format_field(f, 'vbr', '\t%dk'),

3534

format_field(f, 'acodec', default='unknown').replace(

3535

'none', '' if f.get('vcodec') == 'none'

3536

else self._format_screen('video only', self.Styles.SUPPRESS)),

3537

format_field(f, 'abr', '\t%dk'),

3538

format_field(f, 'asr', '\t%dHz'),

3539

join_nonempty(

3540

self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3541

format_field(f, 'language', '[%s]'),

3542

join_nonempty(format_field(f, 'format_note'),

3543

format_field(f, 'container', ignore=(None, f.get('ext'))),

3544

delim=', '),

3545

delim=' '),

3546

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3547

header_line = self._list_format_headers(

3548

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3549

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

3550

3551

return render_table(

3552

header_line, table, hide_empty=True,

3553

delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))

3554

3555

def render_thumbnails_table(self, info_dict):

3556

thumbnails = list(info_dict.get('thumbnails') or [])

if not thumbnails:

return None

return render_table(

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3561

[[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])

3562

3563

def render_subtitles_table(self, video_id, subtitles):

3564

def _row(lang, formats):

3565

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3566

if len(set(names)) == 1:

3567

names = [] if names[0] == 'unknown' else names[:1]

3568

return [lang, ', '.join(names), ', '.join(exts)]

if not subtitles:

return None

return render_table(

self._list_format_headers('Language', 'Name', 'Formats'),

3574

[_row(lang, formats) for lang, formats in subtitles.items()],

3575

hide_empty=True)

3576

3577

def __list_table(self, video_id, name, func, *args):

3578

table = func(*args)

3579

if not table:

3580

self.to_screen(f'{video_id} has no {name}')

3581

return

3582

self.to_screen(f'[info] Available {name} for {video_id}:')

3583

self.to_stdout(table)

3584

3585

def list_formats(self, info_dict):

3586

self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)

3587

3588

def list_thumbnails(self, info_dict):

3589

self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)

3590

3591

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3592

self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)

3593

3594

def urlopen(self, req):

3595

""" Start an HTTP download """

3596

if isinstance(req, compat_basestring):

3597

req = sanitized_Request(req)

3598

return self._opener.open(req, timeout=self._socket_timeout)

3599

3600

def print_debug_header(self):

3601

if not self.params.get('verbose'):

3602

return

3603

3604

def get_encoding(stream):

3605

ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))

3606

if not supports_terminal_sequences(stream):

3607

from .compat import WINDOWS_VT_MODE

3608

ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'

3609

return ret

3610

3611

encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (

3612

locale.getpreferredencoding(),

3613

sys.getfilesystemencoding(),

3614

get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),

3615

self.get_encoding())

3616

3617

logger = self.params.get('logger')

3618

if logger:

3619

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3620

write_debug(encoding_str)

3621

else:

3622

write_string(f'[debug] {encoding_str}\n', encoding=None)

3623

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3624

3625

source = detect_variant()

3626

write_debug(join_nonempty(

3627

'yt-dlp version', __version__,

3628

f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',

3629

'' if source == 'unknown' else f'({source})',

3630

delim=' '))

3631

if not _LAZY_LOADER:

3632

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3633

write_debug('Lazy loading extractors is forcibly disabled')

3634

else:

3635

write_debug('Lazy loading extractors is disabled')

3636

if plugin_extractors or plugin_postprocessors:

3637

write_debug('Plugins: %s' % [

3638

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3639

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3640

if self.params.get('compat_opts'):

3641

write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))

3642

3643

if source == 'source':

3644

try:

3645

sp = Popen(

3646

['git', 'rev-parse', '--short', 'HEAD'],

3647

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3648

cwd=os.path.dirname(os.path.abspath(__file__)))

3649

out, err = sp.communicate_or_kill()

3650

out = out.decode().strip()

3651

if re.match('[0-9a-f]+', out):

3652

write_debug('Git HEAD: %s' % out)

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

3660

impl_name = platform.python_implementation()

3661

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3662

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3663

return impl_name

3664

3665

write_debug('Python version %s (%s %s) - %s' % (

3666

platform.python_version(),

3667

python_implementation(),

3668

platform.architecture()[0],

3669

platform_name()))

3670

3671

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3672

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3673

if ffmpeg_features:

3674

exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)

3675

3676

exe_versions['rtmpdump'] = rtmpdump_version()

3677

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3678

exe_str = ', '.join(

3679

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3680

) or 'none'

3681

write_debug('exe versions: %s' % exe_str)

3682

3683

from .downloader.websocket import has_websockets

3684

from .postprocessor.embedthumbnail import has_mutagen

3685

from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE

3686

3687

lib_str = join_nonempty(

3688

compat_brotli and compat_brotli.__name__,

3689

compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],

3690

SECRETSTORAGE_AVAILABLE and 'secretstorage',

3691

has_mutagen and 'mutagen',

3692

SQLITE_AVAILABLE and 'sqlite',

3693

has_websockets and 'websockets',

3694

delim=', ') or 'none'

3695

write_debug('Optional libraries: %s' % lib_str)

3696

3697

proxy_map = {}

3698

for handler in self._opener.handlers:

3699

if hasattr(handler, 'proxies'):

3700

proxy_map.update(handler.proxies)

3701

write_debug(f'Proxy map: {proxy_map}')

3702

3703

# Not implemented

3704

if False and self.params.get('call_home'):

3705

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3706

write_debug('Public IP address: %s' % ipaddr)

3707

latest_version = self.urlopen(

3708

'https://yt-dl.org/latest/version').read().decode('utf-8')

3709

if version_tuple(latest_version) > version_tuple(__version__):

3710

self.report_warning(

3711

'You are using an outdated version (newest version: %s)! '

3712

'See https://yt-dl.org/update if you need help updating.' %

3713

latest_version)

3714

3715

def _setup_opener(self):

3716

timeout_val = self.params.get('socket_timeout')

3717

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3718

3719

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3720

opts_cookiefile = self.params.get('cookiefile')

3721

opts_proxy = self.params.get('proxy')

3722

3723

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3724

3725

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3726

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3731

else:

3732

proxies = compat_urllib_request.getproxies()

3733

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3734

if 'http' in proxies and 'https' not in proxies:

3735

proxies['https'] = proxies['http']

3736

proxy_handler = PerRequestProxyHandler(proxies)

3737

3738

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3739

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3740

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3741

redirect_handler = YoutubeDLRedirectHandler()

3742

data_handler = compat_urllib_request_DataHandler()

3743

3744

# When passing our own FileHandler instance, build_opener won't add the

3745

# default FileHandler and allows us to disable the file protocol, which

3746

# can be used for malicious purposes (see

3747

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3748

file_handler = compat_urllib_request.FileHandler()

3749

3750

def file_open(*args, **kwargs):

3751

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3752

file_handler.file_open = file_open

3753

3754

opener = compat_urllib_request.build_opener(

3755

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3756

3757

# Delete the default user-agent header, which would otherwise apply in

3758

# cases where our custom HTTP handler doesn't come into play

3759

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3760

opener.addheaders = []

3761

self._opener = opener

3762

3763

def encode(self, s):

3764

if isinstance(s, bytes):

3765

return s # Already encoded

3766

3767

try:

3768

return s.encode(self.get_encoding())

3769

except UnicodeEncodeError as err:

3770

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3771

raise

3772

3773

def get_encoding(self):

3774

encoding = self.params.get('encoding')

3775

if encoding is None:

3776

encoding = preferredencoding()

3777

return encoding

3778

3779

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3780

''' Write infojson and returns True = written, False = skip, None = error '''

3781

if overwrite is None:

3782

overwrite = self.params.get('overwrites', True)

3783

if not self.params.get('writeinfojson'):

3784

return False

3785

elif not infofn:

3786

self.write_debug(f'Skipping writing {label} infojson')

3787

return False

3788

elif not self._ensure_dir_exists(infofn):

3789

return None

3790

elif not overwrite and os.path.exists(infofn):

3791

self.to_screen(f'[info] {label.title()} metadata is already present')

3792

else:

3793

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3794

try:

3795

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3796

except (OSError, IOError):

3797

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

return None

return True

def _write_description(self, label, ie_result, descfn):

3802

''' Write description and returns True = written, False = skip, None = error '''

3803

if not self.params.get('writedescription'):

3804

return False

3805

elif not descfn:

3806

self.write_debug(f'Skipping writing {label} description')

3807

return False

3808

elif not self._ensure_dir_exists(descfn):

3809

return None

3810

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3811

self.to_screen(f'[info] {label.title()} description is already present')

3812

elif ie_result.get('description') is None:

3813

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3818

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3819

descfile.write(ie_result['description'])

3820

except (OSError, IOError):

3821

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3826

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3827

ret = []

3828

subtitles = info_dict.get('requested_subtitles')

3829

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3830

# subtitles download errors are already managed as troubles in relevant IE

3831

# that way it will silently go on when used with unsupporting IE

3832

return ret

3833

3834

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3835

if not sub_filename_base:

3836

self.to_screen('[info] Skipping writing video subtitles')

3837

return ret

3838

for sub_lang, sub_info in subtitles.items():

3839

sub_format = sub_info['ext']

3840

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3841

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3842

existing_sub = self.existing_file((sub_filename_final, sub_filename))

3843

if existing_sub:

3844

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3845

sub_info['filepath'] = existing_sub

3846

ret.append((existing_sub, sub_filename_final))

3847

continue

3848

3849

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3850

if sub_info.get('data') is not None:

3851

try:

3852

# Use newline='' to prevent conversion of newline characters

3853

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3854

with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3855

subfile.write(sub_info['data'])

3856

sub_info['filepath'] = sub_filename

3857

ret.append((sub_filename, sub_filename_final))

3858

continue

3859

except (OSError, IOError):

3860

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3865

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3866

self.dl(sub_filename, sub_copy, subtitle=True)

3867

sub_info['filepath'] = sub_filename

3868

ret.append((sub_filename, sub_filename_final))

3869

except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3870

if self.params.get('ignoreerrors') is not True: # False or 'only_download'

3871

raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)

3872

self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')

3873

return ret

3874

3875

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3876

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3877

write_all = self.params.get('write_all_thumbnails', False)

3878

thumbnails, ret = [], []

3879

if write_all or self.params.get('writethumbnail', False):

3880

thumbnails = info_dict.get('thumbnails') or []

3881

multiple = write_all and len(thumbnails) > 1

3882

3883

if thumb_filename_base is None:

3884

thumb_filename_base = filename

3885

if thumbnails and not thumb_filename_base:

3886

self.write_debug(f'Skipping writing {label} thumbnail')

3887

return ret

3888

3889

for idx, t in list(enumerate(thumbnails))[::-1]:

3890

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3891

thumb_display_id = f'{label} thumbnail {t["id"]}'

3892

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3893

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3894

3895

existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))

3896

if existing_thumb:

3897

self.to_screen('[info] %s is already present' % (

3898

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3899

t['filepath'] = existing_thumb

3900

ret.append((existing_thumb, thumb_filename_final))

3901

else:

3902

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3903

try:

3904

uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))

3905

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3906

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3907

shutil.copyfileobj(uf, thumbf)

3908

ret.append((thumb_filename, thumb_filename_final))

3909

t['filepath'] = thumb_filename

3910

except network_exceptions as err:

3911

thumbnails.pop(idx)

3912

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3913

if ret and not write_all:

3914

break

3915

return ret