jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import tempfile
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28	import unicodedata
	29
	30	from string import ascii_letters
	31
	32	from .compat import (
	33	compat_basestring,
	34	compat_get_terminal_size,
	35	compat_kwargs,
	36	compat_numeric_types,
	37	compat_os_name,
	38	compat_pycrypto_AES,
	39	compat_shlex_quote,
	40	compat_str,
	41	compat_tokenize_tokenize,
	42	compat_urllib_error,
	43	compat_urllib_request,
	44	compat_urllib_request_DataHandler,
	45	windows_enable_vt_mode,
	46	)
	47	from .cookies import load_cookies
	48	from .utils import (
	49	age_restricted,
	50	args_to_str,
	51	ContentTooShortError,
	52	date_from_str,
	53	DateRange,
	54	DEFAULT_OUTTMPL,
	55	determine_ext,
	56	determine_protocol,
	57	DOT_DESKTOP_LINK_TEMPLATE,
	58	DOT_URL_LINK_TEMPLATE,
	59	DOT_WEBLOC_LINK_TEMPLATE,
	60	DownloadError,
	61	encode_compat_str,
	62	encodeFilename,
	63	EntryNotInPlaylist,
	64	error_to_compat_str,
	65	ExistingVideoReached,
	66	expand_path,
	67	ExtractorError,
	68	float_or_none,
	69	format_bytes,
	70	format_field,
	71	formatSeconds,
	72	GeoRestrictedError,
	73	HEADRequest,
	74	int_or_none,
	75	iri_to_uri,
	76	ISO3166Utils,
	77	LazyList,
	78	locked_file,
	79	make_dir,
	80	make_HTTPS_handler,
	81	MaxDownloadsReached,
	82	network_exceptions,
	83	orderedSet,
	84	OUTTMPL_TYPES,
	85	PagedList,
	86	parse_filesize,
	87	PerRequestProxyHandler,
	88	platform_name,
	89	PostProcessingError,
	90	preferredencoding,
	91	prepend_extension,
	92	process_communicate_or_kill,
	93	register_socks_protocols,
	94	RejectedVideoReached,
	95	render_table,
	96	replace_extension,
	97	SameFileError,
	98	sanitize_filename,
	99	sanitize_path,
	100	sanitize_url,
	101	sanitized_Request,
	102	std_headers,
	103	STR_FORMAT_RE_TMPL,
	104	STR_FORMAT_TYPES,
	105	str_or_none,
	106	strftime_or_none,
	107	subtitles_filename,
	108	supports_terminal_sequences,
	109	TERMINAL_SEQUENCES,
	110	ThrottledDownload,
	111	to_high_limit_path,
	112	traverse_obj,
	113	try_get,
	114	UnavailableVideoError,
	115	url_basename,
	116	variadic,
	117	version_tuple,
	118	write_json_file,
	119	write_string,
	120	YoutubeDLCookieProcessor,
	121	YoutubeDLHandler,
	122	YoutubeDLRedirectHandler,
	123	)
	124	from .cache import Cache
	125	from .extractor import (
	126	gen_extractor_classes,
	127	get_info_extractor,
	128	_LAZY_LOADER,
	129	_PLUGIN_CLASSES as plugin_extractors
	130	)
	131	from .extractor.openload import PhantomJSwrapper
	132	from .downloader import (
	133	FFmpegFD,
	134	get_suitable_downloader,
	135	shorten_protocol_name
	136	)
	137	from .downloader.rtmp import rtmpdump_version
	138	from .postprocessor import (
	139	get_postprocessor,
	140	FFmpegFixupDurationPP,
	141	FFmpegFixupM3u8PP,
	142	FFmpegFixupM4aPP,
	143	FFmpegFixupStretchedPP,
	144	FFmpegFixupTimestampPP,
	145	FFmpegMergerPP,
	146	FFmpegPostProcessor,
	147	MoveFilesAfterDownloadPP,
	148	_PLUGIN_CLASSES as plugin_postprocessors
	149	)
	150	from .update import detect_variant
	151	from .version import __version__
	152
	153	if compat_os_name == 'nt':
	154	import ctypes
	155
	156
	157	class YoutubeDL(object):
	158	"""YoutubeDL class.
	159
	160	YoutubeDL objects are the ones responsible of downloading the
	161	actual video file and writing it to disk if the user has requested
	162	it, among some other tasks. In most cases there should be one per
	163	program. As, given a video URL, the downloader doesn't know how to
	164	extract all the needed information, task that InfoExtractors do, it
	165	has to pass the URL to one of them.
	166
	167	For this, YoutubeDL objects have a method that allows
	168	InfoExtractors to be registered in a given order. When it is passed
	169	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	170	finds that reports being able to handle it. The InfoExtractor extracts
	171	all the information about the video or videos the URL refers to, and
	172	YoutubeDL process the extracted information, possibly using a File
	173	Downloader to download the video.
	174
	175	YoutubeDL objects accept a lot of parameters. In order not to saturate
	176	the object constructor with arguments, it receives a dictionary of
	177	options instead. These options are available through the params
	178	attribute for the InfoExtractors to use. The YoutubeDL also
	179	registers itself as the downloader in charge for the InfoExtractors
	180	that are added to it, so this is a "mutual registration".
	181
	182	Available options:
	183
	184	username: Username for authentication purposes.
	185	password: Password for authentication purposes.
	186	videopassword: Password for accessing a video.
	187	ap_mso: Adobe Pass multiple-system operator identifier.
	188	ap_username: Multiple-system operator account username.
	189	ap_password: Multiple-system operator account password.
	190	usenetrc: Use netrc for authentication instead.
	191	verbose: Print additional info to stdout.
	192	quiet: Do not print messages to stdout.
	193	no_warnings: Do not print out anything for warnings.
	194	forceprint: A list of templates to force print
	195	forceurl: Force printing final URL. (Deprecated)
	196	forcetitle: Force printing title. (Deprecated)
	197	forceid: Force printing ID. (Deprecated)
	198	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	199	forcedescription: Force printing description. (Deprecated)
	200	forcefilename: Force printing final filename. (Deprecated)
	201	forceduration: Force printing duration. (Deprecated)
	202	forcejson: Force printing info_dict as JSON.
	203	dump_single_json: Force printing the info_dict of the whole playlist
	204	(or video) as a single JSON line.
	205	force_write_download_archive: Force writing download archive regardless
	206	of 'skip_download' or 'simulate'.
	207	simulate: Do not download the video files. If unset (or None),
	208	simulate only if listsubtitles, listformats or list_thumbnails is used
	209	format: Video format code. see "FORMAT SELECTION" for more details.
	210	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	211	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	212	extracting metadata even if the video is not actually
	213	available for download (experimental)
	214	format_sort: How to sort the video formats. see "Sorting Formats"
	215	for more details.
	216	format_sort_force: Force the given format_sort. see "Sorting Formats"
	217	for more details.
	218	allow_multiple_video_streams: Allow multiple video streams to be merged
	219	into a single file
	220	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	221	into a single file
	222	check_formats Whether to test if the formats are downloadable.
	223	Can be True (check all), False (check none)
	224	or None (check only if requested by extractor)
	225	paths: Dictionary of output paths. The allowed keys are 'home'
	226	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	227	outtmpl: Dictionary of templates for output names. Allowed keys
	228	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	229	For compatibility with youtube-dl, a single string can also be used
	230	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	231	restrictfilenames: Do not allow "&" and spaces in file names
	232	trim_file_name: Limit length of filename (extension excluded)
	233	windowsfilenames: Force the filenames to be windows compatible
	234	ignoreerrors: Do not stop on download/postprocessing errors.
	235	Can be 'only_download' to ignore only download errors.
	236	Default is 'only_download' for CLI, but False for API
	237	skip_playlist_after_errors: Number of allowed failures until the rest of
	238	the playlist is skipped
	239	force_generic_extractor: Force downloader to use the generic extractor
	240	overwrites: Overwrite all video and metadata files if True,
	241	overwrite only non-video files if None
	242	and don't overwrite any file if False
	243	For compatibility with youtube-dl,
	244	"nooverwrites" may also be used instead
	245	playliststart: Playlist item to start at.
	246	playlistend: Playlist item to end at.
	247	playlist_items: Specific indices of playlist to download.
	248	playlistreverse: Download playlist items in reverse order.
	249	playlistrandom: Download playlist items in random order.
	250	matchtitle: Download only matching titles.
	251	rejecttitle: Reject downloads for matching titles.
	252	logger: Log messages to a logging.Logger instance.
	253	logtostderr: Log messages to stderr instead of stdout.
	254	consoletitle: Display progress in console window's titlebar.
	255	writedescription: Write the video description to a .description file
	256	writeinfojson: Write the video description to a .info.json file
	257	clean_infojson: Remove private fields from the infojson
	258	getcomments: Extract video comments. This will not be written to disk
	259	unless writeinfojson is also given
	260	writeannotations: Write the video annotations to a .annotations.xml file
	261	writethumbnail: Write the thumbnail image to a file
	262	allow_playlist_files: Whether to write playlists' description, infojson etc
	263	also to disk when using the 'write*' options
	264	write_all_thumbnails: Write all thumbnail formats to files
	265	writelink: Write an internet shortcut file, depending on the
	266	current platform (.url/.webloc/.desktop)
	267	writeurllink: Write a Windows internet shortcut file (.url)
	268	writewebloclink: Write a macOS internet shortcut file (.webloc)
	269	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	270	writesubtitles: Write the video subtitles to a file
	271	writeautomaticsub: Write the automatically generated subtitles to a file
	272	allsubtitles: Deprecated - Use subtitleslangs = ['all']
	273	Downloads all the subtitles of the video
	274	(requires writesubtitles or writeautomaticsub)
	275	listsubtitles: Lists all available subtitles for the video
	276	subtitlesformat: The format code for subtitles
	277	subtitleslangs: List of languages of the subtitles to download (can be regex).
	278	The list may contain "all" to refer to all the available
	279	subtitles. The language can be prefixed with a "-" to
	280	exclude it from the requested languages. Eg: ['all', '-live_chat']
	281	keepvideo: Keep the video file after post-processing
	282	daterange: A DateRange object, download only if the upload_date is in the range.
	283	skip_download: Skip the actual download of the video file
	284	cachedir: Location of the cache files in the filesystem.
	285	False to disable filesystem cache.
	286	noplaylist: Download single video instead of a playlist if in doubt.
	287	age_limit: An integer representing the user's age in years.
	288	Unsuitable videos for the given age are skipped.
	289	min_views: An integer representing the minimum view count the video
	290	must have in order to not be skipped.
	291	Videos without view count information are always
	292	downloaded. None for no limit.
	293	max_views: An integer representing the maximum view count.
	294	Videos that are more popular than that are not
	295	downloaded.
	296	Videos without view count information are always
	297	downloaded. None for no limit.
	298	download_archive: File name of a file where all downloads are recorded.
	299	Videos already present in the file are not downloaded
	300	again.
	301	break_on_existing: Stop the download process after attempting to download a
	302	file that is in the archive.
	303	break_on_reject: Stop the download process when encountering a video that
	304	has been filtered out.
	305	cookiefile: File name where cookies should be read from and dumped to
	306	cookiesfrombrowser: A tuple containing the name of the browser and the profile
	307	name/path from where cookies are loaded.
	308	Eg: ('chrome', ) or (vivaldi, 'default')
	309	nocheckcertificate:Do not verify SSL certificates
	310	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	311	At the moment, this is only supported by YouTube.
	312	proxy: URL of the proxy server to use
	313	geo_verification_proxy: URL of the proxy to use for IP address verification
	314	on geo-restricted sites.
	315	socket_timeout: Time to wait for unresponsive hosts, in seconds
	316	bidi_workaround: Work around buggy terminals without bidirectional text
	317	support, using fridibi
	318	debug_printtraffic:Print out sent and received HTTP traffic
	319	include_ads: Download ads as well
	320	default_search: Prepend this string if an input url is not valid.
	321	'auto' for elaborate guessing
	322	encoding: Use this encoding instead of the system-specified.
	323	extract_flat: Do not resolve URLs, return the immediate result.
	324	Pass in 'in_playlist' to only show this behavior for
	325	playlist items.
	326	postprocessors: A list of dictionaries, each with an entry
	327	* key: The name of the postprocessor. See
	328	yt_dlp/postprocessor/__init__.py for a list.
	329	* when: When to run the postprocessor. Can be one of
	330	pre_process\|before_dl\|post_process\|after_move.
	331	Assumed to be 'post_process' if not given
	332	post_hooks: A list of functions that get called as the final step
	333	for each video file, after all postprocessors have been
	334	called. The filename will be passed as the only argument.
	335	progress_hooks: A list of functions that get called on download
	336	progress, with a dictionary with the entries
	337	* status: One of "downloading", "error", or "finished".
	338	Check this first and ignore unknown values.
	339	* info_dict: The extracted info_dict
	340
	341	If status is one of "downloading", or "finished", the
	342	following properties may also be present:
	343	* filename: The final filename (always present)
	344	* tmpfilename: The filename we're currently writing to
	345	* downloaded_bytes: Bytes on disk
	346	* total_bytes: Size of the whole file, None if unknown
	347	* total_bytes_estimate: Guess of the eventual file size,
	348	None if unavailable.
	349	* elapsed: The number of seconds since download started.
	350	* eta: The estimated time in seconds, None if unknown
	351	* speed: The download speed in bytes/second, None if
	352	unknown
	353	* fragment_index: The counter of the currently
	354	downloaded video fragment.
	355	* fragment_count: The number of fragments (= individual
	356	files that will be merged)
	357
	358	Progress hooks are guaranteed to be called at least once
	359	(with status "finished") if the download is successful.
	360	postprocessor_hooks: A list of functions that get called on postprocessing
	361	progress, with a dictionary with the entries
	362	* status: One of "started", "processing", or "finished".
	363	Check this first and ignore unknown values.
	364	* postprocessor: Name of the postprocessor
	365	* info_dict: The extracted info_dict
	366
	367	Progress hooks are guaranteed to be called at least twice
	368	(with status "started" and "finished") if the processing is successful.
	369	merge_output_format: Extension to use when merging formats.
	370	final_ext: Expected final extension; used to detect when the file was
	371	already downloaded and converted. "merge_output_format" is
	372	replaced by this extension when given
	373	fixup: Automatically correct known faults of the file.
	374	One of:
	375	- "never": do nothing
	376	- "warn": only emit a warning
	377	- "detect_or_warn": check whether we can do anything
	378	about it, warn otherwise (default)
	379	source_address: Client-side IP address to bind to.
	380	call_home: Boolean, true iff we are allowed to contact the
	381	yt-dlp servers for debugging. (BROKEN)
	382	sleep_interval_requests: Number of seconds to sleep between requests
	383	during extraction
	384	sleep_interval: Number of seconds to sleep before each download when
	385	used alone or a lower bound of a range for randomized
	386	sleep before each download (minimum possible number
	387	of seconds to sleep) when used along with
	388	max_sleep_interval.
	389	max_sleep_interval:Upper bound of a range for randomized sleep before each
	390	download (maximum possible number of seconds to sleep).
	391	Must only be used along with sleep_interval.
	392	Actual sleep time will be a random float from range
	393	[sleep_interval; max_sleep_interval].
	394	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	395	listformats: Print an overview of available video formats and exit.
	396	list_thumbnails: Print a table of all thumbnails and exit.
	397	match_filter: A function that gets called with the info_dict of
	398	every video.
	399	If it returns a message, the video is ignored.
	400	If it returns None, the video is downloaded.
	401	match_filter_func in utils.py is one example for this.
	402	no_color: Do not emit color codes in output.
	403	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	404	HTTP header
	405	geo_bypass_country:
	406	Two-letter ISO 3166-2 country code that will be used for
	407	explicit geographic restriction bypassing via faking
	408	X-Forwarded-For HTTP header
	409	geo_bypass_ip_block:
	410	IP range in CIDR notation that will be used similarly to
	411	geo_bypass_country
	412
	413	The following options determine which downloader is picked:
	414	external_downloader: A dictionary of protocol keys and the executable of the
	415	external downloader to use for it. The allowed protocols
	416	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	417	Set the value to 'native' to use the native downloader
	418	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	419	or {'m3u8': 'ffmpeg'} instead.
	420	Use the native HLS downloader instead of ffmpeg/avconv
	421	if True, otherwise use ffmpeg/avconv if False, otherwise
	422	use downloader suggested by extractor if None.
	423	compat_opts: Compatibility options. See "Differences in default behavior".
	424	The following options do not work when used through the API:
	425	filename, abort-on-error, multistreams, no-live-chat,
	426	no-clean-infojson, no-playlist-metafiles, no-keep-subs.
	427	Refer __init__.py for their implementation
	428	progress_template: Dictionary of templates for progress outputs.
	429	Allowed keys are 'download', 'postprocess',
	430	'download-title' (console title) and 'postprocess-title'.
	431	The template is mapped on a dictionary with keys 'progress' and 'info'
	432
	433	The following parameters are not used by YoutubeDL itself, they are used by
	434	the downloader (see yt_dlp/downloader/common.py):
	435	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	436	max_filesize, test, noresizebuffer, retries, continuedl, noprogress,
	437	xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
	438
	439	The following options are used by the post processors:
	440	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	441	otherwise prefer ffmpeg. (avconv support is deprecated)
	442	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	443	to the binary or its containing directory.
	444	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	445	and a list of additional command-line arguments for the
	446	postprocessor/executable. The dict can also have "PP+EXE" keys
	447	which are used when the given exe is used by the given PP.
	448	Use 'default' as the name for arguments to passed to all PP
	449	For compatibility with youtube-dl, a single list of args
	450	can also be used
	451
	452	The following options are used by the extractors:
	453	extractor_retries: Number of times to retry for known errors
	454	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	455	hls_split_discontinuity: Split HLS playlists to different formats at
	456	discontinuities such as ad breaks (default: False)
	457	extractor_args: A dictionary of arguments to be passed to the extractors.
	458	See "EXTRACTOR ARGUMENTS" for details.
	459	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	460	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	461	If True (default), DASH manifests and related
	462	data will be downloaded and processed by extractor.
	463	You can reduce network I/O by disabling it if you don't
	464	care about DASH. (only for youtube)
	465	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	466	If True (default), HLS manifests and related
	467	data will be downloaded and processed by extractor.
	468	You can reduce network I/O by disabling it if you don't
	469	care about HLS. (only for youtube)
	470	"""
	471
	472	_NUMERIC_FIELDS = set((
	473	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	474	'timestamp', 'release_timestamp',
	475	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	476	'average_rating', 'comment_count', 'age_limit',
	477	'start_time', 'end_time',
	478	'chapter_number', 'season_number', 'episode_number',
	479	'track_number', 'disc_number', 'release_year',
	480	))
	481
	482	params = None
	483	_ies = {}
	484	_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	485	_printed_messages = set()
	486	_first_webpage_request = True
	487	_download_retcode = None
	488	_num_downloads = None
	489	_playlist_level = 0
	490	_playlist_urls = set()
	491	_screen_file = None
	492
	493	def __init__(self, params=None, auto_init=True):
	494	"""Create a FileDownloader object with the given options."""
	495	if params is None:
	496	params = {}
	497	self._ies = {}
	498	self._ies_instances = {}
	499	self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	500	self._printed_messages = set()

1

#!/usr/bin/env python3

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import random

import unicodedata

from string import ascii_letters

31

32

from .compat import (

33

compat_basestring,

34

compat_get_terminal_size,

35

compat_kwargs,

36

compat_numeric_types,

compat_os_name,

compat_pycrypto_AES,

compat_shlex_quote,

compat_str,

compat_tokenize_tokenize,

42

compat_urllib_error,

43

compat_urllib_request,

44

compat_urllib_request_DataHandler,

45

windows_enable_vt_mode,

46

)

47

from .cookies import load_cookies

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

58

DOT_URL_LINK_TEMPLATE,

59

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

formatSeconds,

GeoRestrictedError,

HEADRequest,

int_or_none,

iri_to_uri,

ISO3166Utils,

LazyList,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

network_exceptions,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

process_communicate_or_kill,

93

register_socks_protocols,

94

RejectedVideoReached,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

TERMINAL_SEQUENCES,

ThrottledDownload,

to_high_limit_path,

traverse_obj,

try_get,

UnavailableVideoError,

url_basename,

variadic,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieProcessor,

121

YoutubeDLHandler,

122

YoutubeDLRedirectHandler,

123

)

124

from .cache import Cache

125

from .extractor import (

126

gen_extractor_classes,

127

get_info_extractor,

128

_LAZY_LOADER,

129

_PLUGIN_CLASSES as plugin_extractors

130

)

131

from .extractor.openload import PhantomJSwrapper

132

from .downloader import (

133

FFmpegFD,

134

get_suitable_downloader,

135

shorten_protocol_name

136

)

137

from .downloader.rtmp import rtmpdump_version

138

from .postprocessor import (

139

get_postprocessor,

140

FFmpegFixupDurationPP,

141

FFmpegFixupM3u8PP,

142

FFmpegFixupM4aPP,

143

FFmpegFixupStretchedPP,

144

FFmpegFixupTimestampPP,

145

FFmpegMergerPP,

146

FFmpegPostProcessor,

147

MoveFilesAfterDownloadPP,

148

_PLUGIN_CLASSES as plugin_postprocessors

149

)

150

from .update import detect_variant

151

from .version import __version__

152

153

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

158

"""YoutubeDL class.

159

160

YoutubeDL objects are the ones responsible of downloading the

161

actual video file and writing it to disk if the user has requested

162

it, among some other tasks. In most cases there should be one per

163

program. As, given a video URL, the downloader doesn't know how to

164

extract all the needed information, task that InfoExtractors do, it

165

has to pass the URL to one of them.

166

167

For this, YoutubeDL objects have a method that allows

168

InfoExtractors to be registered in a given order. When it is passed

169

a URL, the YoutubeDL object handles it to the first InfoExtractor it

170

finds that reports being able to handle it. The InfoExtractor extracts

171

all the information about the video or videos the URL refers to, and

172

YoutubeDL process the extracted information, possibly using a File

173

Downloader to download the video.

174

175

YoutubeDL objects accept a lot of parameters. In order not to saturate

176

the object constructor with arguments, it receives a dictionary of

177

options instead. These options are available through the params

178

attribute for the InfoExtractors to use. The YoutubeDL also

179

registers itself as the downloader in charge for the InfoExtractors

180

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

185

password: Password for authentication purposes.

186

videopassword: Password for accessing a video.

187

ap_mso: Adobe Pass multiple-system operator identifier.

188

ap_username: Multiple-system operator account username.

189

ap_password: Multiple-system operator account password.

190

usenetrc: Use netrc for authentication instead.

191

verbose: Print additional info to stdout.

192

quiet: Do not print messages to stdout.

193

no_warnings: Do not print out anything for warnings.

194

forceprint: A list of templates to force print

195

forceurl: Force printing final URL. (Deprecated)

196

forcetitle: Force printing title. (Deprecated)

197

forceid: Force printing ID. (Deprecated)

198

forcethumbnail: Force printing thumbnail URL. (Deprecated)

199

forcedescription: Force printing description. (Deprecated)

200

forcefilename: Force printing final filename. (Deprecated)

201

forceduration: Force printing duration. (Deprecated)

202

forcejson: Force printing info_dict as JSON.

203

dump_single_json: Force printing the info_dict of the whole playlist

204

(or video) as a single JSON line.

205

force_write_download_archive: Force writing download archive regardless

206

of 'skip_download' or 'simulate'.

207

simulate: Do not download the video files. If unset (or None),

208

simulate only if listsubtitles, listformats or list_thumbnails is used

209

format: Video format code. see "FORMAT SELECTION" for more details.

210

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

211

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

212

extracting metadata even if the video is not actually

213

available for download (experimental)

214

format_sort: How to sort the video formats. see "Sorting Formats"

215

for more details.

216

format_sort_force: Force the given format_sort. see "Sorting Formats"

217

for more details.

218

allow_multiple_video_streams: Allow multiple video streams to be merged

219

into a single file

220

allow_multiple_audio_streams: Allow multiple audio streams to be merged

221

into a single file

222

check_formats Whether to test if the formats are downloadable.

223

Can be True (check all), False (check none)

224

or None (check only if requested by extractor)

225

paths: Dictionary of output paths. The allowed keys are 'home'

226

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

227

outtmpl: Dictionary of templates for output names. Allowed keys

228

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

229

For compatibility with youtube-dl, a single string can also be used

230

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

231

restrictfilenames: Do not allow "&" and spaces in file names

232

trim_file_name: Limit length of filename (extension excluded)

233

windowsfilenames: Force the filenames to be windows compatible

234

ignoreerrors: Do not stop on download/postprocessing errors.

235

Can be 'only_download' to ignore only download errors.

236

Default is 'only_download' for CLI, but False for API

237

skip_playlist_after_errors: Number of allowed failures until the rest of

238

the playlist is skipped

239

force_generic_extractor: Force downloader to use the generic extractor

240

overwrites: Overwrite all video and metadata files if True,

241

overwrite only non-video files if None

242

and don't overwrite any file if False

243

For compatibility with youtube-dl,

244

"nooverwrites" may also be used instead

245

playliststart: Playlist item to start at.

246

playlistend: Playlist item to end at.

247

playlist_items: Specific indices of playlist to download.

248

playlistreverse: Download playlist items in reverse order.

249

playlistrandom: Download playlist items in random order.

250

matchtitle: Download only matching titles.

251

rejecttitle: Reject downloads for matching titles.

252

logger: Log messages to a logging.Logger instance.

253

logtostderr: Log messages to stderr instead of stdout.

254

consoletitle: Display progress in console window's titlebar.

255

writedescription: Write the video description to a .description file

256

writeinfojson: Write the video description to a .info.json file

257

clean_infojson: Remove private fields from the infojson

258

getcomments: Extract video comments. This will not be written to disk

259

unless writeinfojson is also given

260

writeannotations: Write the video annotations to a .annotations.xml file

261

writethumbnail: Write the thumbnail image to a file

262

allow_playlist_files: Whether to write playlists' description, infojson etc

263

also to disk when using the 'write*' options

264

write_all_thumbnails: Write all thumbnail formats to files

265

writelink: Write an internet shortcut file, depending on the

266

current platform (.url/.webloc/.desktop)

267

writeurllink: Write a Windows internet shortcut file (.url)

268

writewebloclink: Write a macOS internet shortcut file (.webloc)

269

writedesktoplink: Write a Linux internet shortcut file (.desktop)

270

writesubtitles: Write the video subtitles to a file

271

writeautomaticsub: Write the automatically generated subtitles to a file

272

allsubtitles: Deprecated - Use subtitleslangs = ['all']

273

Downloads all the subtitles of the video

274

(requires writesubtitles or writeautomaticsub)

275

listsubtitles: Lists all available subtitles for the video

276

subtitlesformat: The format code for subtitles

277

subtitleslangs: List of languages of the subtitles to download (can be regex).

278

The list may contain "all" to refer to all the available

279

subtitles. The language can be prefixed with a "-" to

280

exclude it from the requested languages. Eg: ['all', '-live_chat']

281

keepvideo: Keep the video file after post-processing

282

daterange: A DateRange object, download only if the upload_date is in the range.

283

skip_download: Skip the actual download of the video file

284

cachedir: Location of the cache files in the filesystem.

285

False to disable filesystem cache.

286

noplaylist: Download single video instead of a playlist if in doubt.

287

age_limit: An integer representing the user's age in years.

288

Unsuitable videos for the given age are skipped.

289

min_views: An integer representing the minimum view count the video

290

must have in order to not be skipped.

291

Videos without view count information are always

292

downloaded. None for no limit.

293

max_views: An integer representing the maximum view count.

294

Videos that are more popular than that are not

295

downloaded.

296

Videos without view count information are always

297

downloaded. None for no limit.

298

download_archive: File name of a file where all downloads are recorded.

299

Videos already present in the file are not downloaded

300

again.

301

break_on_existing: Stop the download process after attempting to download a

302

file that is in the archive.

303

break_on_reject: Stop the download process when encountering a video that

304

has been filtered out.

305

cookiefile: File name where cookies should be read from and dumped to

306

cookiesfrombrowser: A tuple containing the name of the browser and the profile

307

name/path from where cookies are loaded.

308

Eg: ('chrome', ) or (vivaldi, 'default')

309

nocheckcertificate:Do not verify SSL certificates

310

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

311

At the moment, this is only supported by YouTube.

312

proxy: URL of the proxy server to use

313

geo_verification_proxy: URL of the proxy to use for IP address verification

314

on geo-restricted sites.

315

socket_timeout: Time to wait for unresponsive hosts, in seconds

316

bidi_workaround: Work around buggy terminals without bidirectional text

317

support, using fridibi

318

debug_printtraffic:Print out sent and received HTTP traffic

319

include_ads: Download ads as well

320

default_search: Prepend this string if an input url is not valid.

321

'auto' for elaborate guessing

322

encoding: Use this encoding instead of the system-specified.

323

extract_flat: Do not resolve URLs, return the immediate result.

324

Pass in 'in_playlist' to only show this behavior for

325

playlist items.

326

postprocessors: A list of dictionaries, each with an entry

327

* key: The name of the postprocessor. See

328

yt_dlp/postprocessor/__init__.py for a list.

329

* when: When to run the postprocessor. Can be one of

330

pre_process|before_dl|post_process|after_move.

331

Assumed to be 'post_process' if not given

332

post_hooks: A list of functions that get called as the final step

333

for each video file, after all postprocessors have been

334

called. The filename will be passed as the only argument.

335

progress_hooks: A list of functions that get called on download

336

progress, with a dictionary with the entries

337

* status: One of "downloading", "error", or "finished".

338

Check this first and ignore unknown values.

339

* info_dict: The extracted info_dict

340

341

If status is one of "downloading", or "finished", the

342

following properties may also be present:

343

* filename: The final filename (always present)

344

* tmpfilename: The filename we're currently writing to

345

* downloaded_bytes: Bytes on disk

346

* total_bytes: Size of the whole file, None if unknown

347

* total_bytes_estimate: Guess of the eventual file size,

348

None if unavailable.

349

* elapsed: The number of seconds since download started.

350

* eta: The estimated time in seconds, None if unknown

351

* speed: The download speed in bytes/second, None if

352

unknown

353

* fragment_index: The counter of the currently

354

downloaded video fragment.

355

* fragment_count: The number of fragments (= individual

356

files that will be merged)

357

358

Progress hooks are guaranteed to be called at least once

359

(with status "finished") if the download is successful.

360

postprocessor_hooks: A list of functions that get called on postprocessing

361

progress, with a dictionary with the entries

362

* status: One of "started", "processing", or "finished".

363

Check this first and ignore unknown values.

364

* postprocessor: Name of the postprocessor

365

* info_dict: The extracted info_dict

366

367

Progress hooks are guaranteed to be called at least twice

368

(with status "started" and "finished") if the processing is successful.

369

merge_output_format: Extension to use when merging formats.

370

final_ext: Expected final extension; used to detect when the file was

371

already downloaded and converted. "merge_output_format" is

372

replaced by this extension when given

373

fixup: Automatically correct known faults of the file.

374

One of:

375

- "never": do nothing

376

- "warn": only emit a warning

377

- "detect_or_warn": check whether we can do anything

378

about it, warn otherwise (default)

379

source_address: Client-side IP address to bind to.

380

call_home: Boolean, true iff we are allowed to contact the

381

yt-dlp servers for debugging. (BROKEN)

382

sleep_interval_requests: Number of seconds to sleep between requests

383

during extraction

384

sleep_interval: Number of seconds to sleep before each download when

385

used alone or a lower bound of a range for randomized

386

sleep before each download (minimum possible number

387

of seconds to sleep) when used along with

388

max_sleep_interval.

389

max_sleep_interval:Upper bound of a range for randomized sleep before each

390

download (maximum possible number of seconds to sleep).

391

Must only be used along with sleep_interval.

392

Actual sleep time will be a random float from range

393

[sleep_interval; max_sleep_interval].

394

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

395

listformats: Print an overview of available video formats and exit.

396

list_thumbnails: Print a table of all thumbnails and exit.

397

match_filter: A function that gets called with the info_dict of

398

every video.

399

If it returns a message, the video is ignored.

400

If it returns None, the video is downloaded.

401

match_filter_func in utils.py is one example for this.

402

no_color: Do not emit color codes in output.

403

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

404

HTTP header

405

geo_bypass_country:

406

Two-letter ISO 3166-2 country code that will be used for

407

explicit geographic restriction bypassing via faking

408

X-Forwarded-For HTTP header

409

geo_bypass_ip_block:

410

IP range in CIDR notation that will be used similarly to

411

geo_bypass_country

412

413

The following options determine which downloader is picked:

414

external_downloader: A dictionary of protocol keys and the executable of the

415

external downloader to use for it. The allowed protocols

416

417

Set the value to 'native' to use the native downloader

418

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

419

or {'m3u8': 'ffmpeg'} instead.

420

Use the native HLS downloader instead of ffmpeg/avconv

421

if True, otherwise use ffmpeg/avconv if False, otherwise

422

use downloader suggested by extractor if None.

423

compat_opts: Compatibility options. See "Differences in default behavior".

424

The following options do not work when used through the API:

425

filename, abort-on-error, multistreams, no-live-chat,

426

no-clean-infojson, no-playlist-metafiles, no-keep-subs.

427

Refer __init__.py for their implementation

428

progress_template: Dictionary of templates for progress outputs.

429

Allowed keys are 'download', 'postprocess',

430

'download-title' (console title) and 'postprocess-title'.

431

The template is mapped on a dictionary with keys 'progress' and 'info'

432

433

The following parameters are not used by YoutubeDL itself, they are used by

434

the downloader (see yt_dlp/downloader/common.py):

435

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

436

max_filesize, test, noresizebuffer, retries, continuedl, noprogress,

437

xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.

438

439

The following options are used by the post processors:

440

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

441

otherwise prefer ffmpeg. (avconv support is deprecated)

442

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

443

to the binary or its containing directory.

444

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

445

and a list of additional command-line arguments for the

446

postprocessor/executable. The dict can also have "PP+EXE" keys

447

which are used when the given exe is used by the given PP.

448

Use 'default' as the name for arguments to passed to all PP

449

For compatibility with youtube-dl, a single list of args

450

can also be used

451

452

The following options are used by the extractors:

453

extractor_retries: Number of times to retry for known errors

454

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

455

hls_split_discontinuity: Split HLS playlists to different formats at

456

discontinuities such as ad breaks (default: False)

457

extractor_args: A dictionary of arguments to be passed to the extractors.

458

See "EXTRACTOR ARGUMENTS" for details.

459

Eg: {'youtube': {'skip': ['dash', 'hls']}}

460

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

461

If True (default), DASH manifests and related

462

data will be downloaded and processed by extractor.

463

You can reduce network I/O by disabling it if you don't

464

care about DASH. (only for youtube)

465

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

466

If True (default), HLS manifests and related

467

data will be downloaded and processed by extractor.

468

You can reduce network I/O by disabling it if you don't

469

care about HLS. (only for youtube)

470

"""

471

472

_NUMERIC_FIELDS = set((

473

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

474

'timestamp', 'release_timestamp',

475

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

476

'average_rating', 'comment_count', 'age_limit',

477

'start_time', 'end_time',

478

'chapter_number', 'season_number', 'episode_number',

479

'track_number', 'disc_number', 'release_year',

))

params = None

_ies = {}

_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

485

_printed_messages = set()

486

_first_webpage_request = True

487

_download_retcode = None

488

_num_downloads = None

489

_playlist_level = 0

490

_playlist_urls = set()

491

_screen_file = None

492

493

def __init__(self, params=None, auto_init=True):

494

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = {}

self._ies_instances = {}

499

self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

500

self._printed_messages = set()

501

self._first_webpage_request = True

502

self._post_hooks = []

503

self._progress_hooks = []

504

self._postprocessor_hooks = []

505

self._download_retcode = 0

506

self._num_downloads = 0

507

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

508

self._err_file = sys.stderr

509

self.params = params

510

self.cache = Cache(self)

511

512

windows_enable_vt_mode()

513

self.params['no_color'] = self.params.get('no_color') or not supports_terminal_sequences(self._err_file)

514

515

if sys.version_info < (3, 6):

516

self.report_warning(

517

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

518

519

if self.params.get('allow_unplayable_formats'):

520

self.report_warning(

521

f'You have asked for {self._color_text("unplayable formats", "blue")} to be listed/downloaded. '

522

'This is a developer option intended for debugging. \n'

523

' If you experience any issues while using this option, '

524

f'{self._color_text("DO NOT", "red")} open a bug report')

525

526

def check_deprecated(param, option, suggestion):

527

if self.params.get(param) is not None:

528

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

533

if self.params.get('geo_verification_proxy') is None:

534

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

535

536

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

537

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

538

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

539

540

for msg in self.params.get('warnings', []):

541

self.report_warning(msg)

542

543

if self.params.get('overwrites') is None:

544

self.params.pop('overwrites', None)

545

elif self.params.get('nooverwrites') is not None:

546

# nooverwrites was unnecessarily changed to overwrites

547

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

548

# This ensures compatibility with both keys

549

self.params['overwrites'] = not self.params['nooverwrites']

550

else:

551

self.params['nooverwrites'] = not self.params['overwrites']

552

553

if params.get('bidi_workaround', False):

554

try:

555

import pty

556

master, slave = pty.openpty()

557

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

562

sp_kwargs = dict(

563

stdin=subprocess.PIPE,

564

stdout=slave,

565

stderr=self._err_file)

566

try:

567

self._output_process = subprocess.Popen(

568

['bidiv'] + width_args, **sp_kwargs

569

)

570

except OSError:

571

self._output_process = subprocess.Popen(

572

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

573

self._output_channel = os.fdopen(master, 'rb')

574

except OSError as ose:

575

if ose.errno == errno.ENOENT:

576

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

581

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

582

and not params.get('restrictfilenames', False)):

583

# Unicode filesystem API will throw errors (#1474, #13027)

584

self.report_warning(

585

'Assuming --restrict-filenames since file system encoding '

586

'cannot encode all characters. '

587

'Set the LC_ALL environment variable to fix this.')

588

self.params['restrictfilenames'] = True

589

590

self.outtmpl_dict = self.parse_outtmpl()

591

592

# Creating format selector here allows us to catch syntax errors before the extraction

593

self.format_selector = (

594

None if self.params.get('format') is None

595

else self.build_format_selector(self.params['format']))

self._setup_opener()

def preload_download_archive(fn):

600

"""Preload the archive, if any is specified"""

601

if fn is None:

602

return False

603

self.write_debug('Loading archive file %r\n' % fn)

604

try:

605

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

606

for line in archive_file:

607

self.archive.add(line.strip())

608

except IOError as ioe:

609

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

616

617

if auto_init:

618

self.print_debug_header()

619

self.add_default_info_extractors()

620

621

for pp_def_raw in self.params.get('postprocessors', []):

622

pp_def = dict(pp_def_raw)

623

when = pp_def.pop('when', 'post_process')

624

pp_class = get_postprocessor(pp_def.pop('key'))

625

pp = pp_class(self, **compat_kwargs(pp_def))

626

self.add_post_processor(pp, when=when)

627

628

for ph in self.params.get('post_hooks', []):

629

self.add_post_hook(ph)

630

631

for ph in self.params.get('progress_hooks', []):

632

self.add_progress_hook(ph)

633

634

register_socks_protocols()

635

636

def warn_if_short_id(self, argv):

637

# short YouTube ID starting with dash?

638

idxs = [

639

i for i, a in enumerate(argv)

640

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

645

+ ['--'] + [argv[i] for i in idxs]

646

)

647

self.report_warning(

648

'Long argument string detected. '

649

'Use -- to separate parameters and URLs, like this:\n%s\n' %

650

args_to_str(correct_argv))

651

652

def add_info_extractor(self, ie):

653

"""Add an InfoExtractor object to the end of the list."""

654

ie_key = ie.ie_key()

655

self._ies[ie_key] = ie

656

if not isinstance(ie, type):

657

self._ies_instances[ie_key] = ie

658

ie.set_downloader(self)

659

660

def _get_info_extractor_class(self, ie_key):

661

ie = self._ies.get(ie_key)

662

if ie is None:

663

ie = get_info_extractor(ie_key)

664

self.add_info_extractor(ie)

665

return ie

666

667

def get_info_extractor(self, ie_key):

668

"""

669

Get an instance of an IE with name ie_key, it will try to get one from

670

the _ies list, if there's no instance it will create a new one and add

671

it to the extractor list.

672

"""

673

ie = self._ies_instances.get(ie_key)

674

if ie is None:

675

ie = get_info_extractor(ie_key)()

676

self.add_info_extractor(ie)

677

return ie

678

679

def add_default_info_extractors(self):

680

"""

681

Add the InfoExtractors returned by gen_extractors to the end of the list

682

"""

683

for ie in gen_extractor_classes():

684

self.add_info_extractor(ie)

685

686

def add_post_processor(self, pp, when='post_process'):

687

"""Add a PostProcessor object to the end of the chain."""

688

self._pps[when].append(pp)

689

pp.set_downloader(self)

690

691

def add_post_hook(self, ph):

692

"""Add the post hook"""

693

self._post_hooks.append(ph)

694

695

def add_progress_hook(self, ph):

696

"""Add the download progress hook"""

697

self._progress_hooks.append(ph)

698

699

def add_postprocessor_hook(self, ph):

700

"""Add the postprocessing progress hook"""

701

self._postprocessor_hooks.append(ph)

702

703

def _bidi_workaround(self, message):

704

if not hasattr(self, '_output_channel'):

705

return message

706

707

assert hasattr(self, '_output_process')

708

assert isinstance(message, compat_str)

709

line_count = message.count('\n') + 1

710

self._output_process.stdin.write((message + '\n').encode('utf-8'))

711

self._output_process.stdin.flush()

712

res = ''.join(self._output_channel.readline().decode('utf-8')

713

for _ in range(line_count))

714

return res[:-len('\n')]

715

716

def _write_string(self, message, out=None, only_once=False):

717

if only_once:

718

if message in self._printed_messages:

719

return

720

self._printed_messages.add(message)

721

write_string(message, out=out, encoding=self.params.get('encoding'))

722

723

def to_stdout(self, message, skip_eol=False, quiet=False):

724

"""Print message to stdout"""

725

if self.params.get('logger'):

726

self.params['logger'].debug(message)

727

elif not quiet or self.params.get('verbose'):

728

self._write_string(

729

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

730

self._err_file if quiet else self._screen_file)

731

732

def to_stderr(self, message, only_once=False):

733

"""Print message to stderr"""

734

assert isinstance(message, compat_str)

735

if self.params.get('logger'):

736

self.params['logger'].error(message)

737

else:

738

self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)

739

740

def to_console_title(self, message):

741

if not self.params.get('consoletitle', False):

742

return

743

if compat_os_name == 'nt':

744

if ctypes.windll.kernel32.GetConsoleWindow():

745

# c_wchar_p() might not be necessary if `message` is

746

# already of type unicode()

747

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

748

elif 'TERM' in os.environ:

749

self._write_string('\033]0;%s\007' % message, self._screen_file)

750

751

def save_console_title(self):

752

if not self.params.get('consoletitle', False):

753

return

754

if self.params.get('simulate'):

755

return

756

if compat_os_name != 'nt' and 'TERM' in os.environ:

757

# Save the title on stack

758

self._write_string('\033[22;0t', self._screen_file)

759

760

def restore_console_title(self):

761

if not self.params.get('consoletitle', False):

762

return

763

if self.params.get('simulate'):

764

return

765

if compat_os_name != 'nt' and 'TERM' in os.environ:

766

# Restore the title from stack

767

self._write_string('\033[23;0t', self._screen_file)

768

769

def __enter__(self):

770

self.save_console_title()

771

return self

772

773

def __exit__(self, *args):

774

self.restore_console_title()

775

776

if self.params.get('cookiefile') is not None:

777

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

778

779

def trouble(self, message=None, tb=None):

780

"""Determine action to take when a download problem appears.

781

782

Depending on if the downloader has been configured to ignore

783

download errors or not, this method may throw an exception or

784

not when errors are found, after printing the message.

785

786

tb, if given, is additional traceback information.

787

"""

788

if message is not None:

789

self.to_stderr(message)

790

if self.params.get('verbose'):

791

if tb is None:

792

if sys.exc_info()[0]: # if .trouble has been called from an except block

793

tb = ''

794

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

795

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

796

tb += encode_compat_str(traceback.format_exc())

797

else:

798

tb_data = traceback.format_list(traceback.extract_stack())

799

tb = ''.join(tb_data)

800

if tb:

801

self.to_stderr(tb)

802

if not self.params.get('ignoreerrors'):

803

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

804

exc_info = sys.exc_info()[1].exc_info

805

else:

806

exc_info = sys.exc_info()

807

raise DownloadError(message, exc_info)

808

self._download_retcode = 1

809

810

def to_screen(self, message, skip_eol=False):

811

"""Print message to stdout if not in quiet mode"""

812

self.to_stdout(

813

message, skip_eol, quiet=self.params.get('quiet', False))

814

815

def _color_text(self, text, color):

816

if self.params.get('no_color'):

817

return text

818

return f'{TERMINAL_SEQUENCES[color.upper()]}{text}{TERMINAL_SEQUENCES["RESET_STYLE"]}'

819

820

def report_warning(self, message, only_once=False):

821

'''

822

Print the message to stderr, it will be prefixed with 'WARNING:'

823

If stderr is a tty file the 'WARNING:' will be colored

824

'''

825

if self.params.get('logger') is not None:

826

self.params['logger'].warning(message)

827

else:

828

if self.params.get('no_warnings'):

829

return

830

self.to_stderr(f'{self._color_text("WARNING:", "yellow")} {message}', only_once)

831

832

def report_error(self, message, tb=None):

833

'''

834

Do the same as trouble, but prefixes the message with 'ERROR:', colored

835

in red if stderr is a tty file.

836

'''

837

self.trouble(f'{self._color_text("ERROR:", "red")} {message}', tb)

838

839

def write_debug(self, message, only_once=False):

840

'''Log debug message or Print message to stderr'''

841

if not self.params.get('verbose', False):

842

return

843

message = '[debug] %s' % message

844

if self.params.get('logger'):

845

self.params['logger'].debug(message)

846

else:

847

self.to_stderr(message, only_once)

848

849

def report_file_already_downloaded(self, file_name):

850

"""Report file has already been fully downloaded."""

851

try:

852

self.to_screen('[download] %s has already been downloaded' % file_name)

853

except UnicodeEncodeError:

854

self.to_screen('[download] The file has already been downloaded')

855

856

def report_file_delete(self, file_name):

857

"""Report that existing file will be deleted."""

858

try:

859

self.to_screen('Deleting existing file %s' % file_name)

860

except UnicodeEncodeError:

861

self.to_screen('Deleting existing file')

862

863

def raise_no_formats(self, info, forced=False):

864

has_drm = info.get('__has_drm')

865

msg = 'This video is DRM protected' if has_drm else 'No video formats found!'

866

expected = self.params.get('ignore_no_formats_error')

867

if forced or not expected:

868

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

869

expected=has_drm or expected)

870

else:

871

self.report_warning(msg)

872

873

def parse_outtmpl(self):

874

outtmpl_dict = self.params.get('outtmpl', {})

875

if not isinstance(outtmpl_dict, dict):

876

outtmpl_dict = {'default': outtmpl_dict}

877

outtmpl_dict.update({

878

k: v for k, v in DEFAULT_OUTTMPL.items()

879

if outtmpl_dict.get(k) is None})

880

for key, val in outtmpl_dict.items():

881

if isinstance(val, bytes):

882

self.report_warning(

883

'Parameter outtmpl is bytes, but should be a unicode string. '

884

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

885

return outtmpl_dict

886

887

def get_output_path(self, dir_type='', filename=None):

888

paths = self.params.get('paths', {})

889

assert isinstance(paths, dict)

890

path = os.path.join(

891

expand_path(paths.get('home', '').strip()),

892

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

893

filename or '')

894

895

# Temporary fix for #4787

896

# 'Treat' all problem characters by passing filename through preferredencoding

897

# to workaround encoding issues with subprocess on python2 @ Windows

898

if sys.version_info < (3, 0) and sys.platform == 'win32':

899

path = encodeFilename(path, True).decode(preferredencoding())

900

return sanitize_path(path, force=self.params.get('windowsfilenames'))

901

902

@staticmethod

903

def _outtmpl_expandpath(outtmpl):

904

# expand_path translates '%%' into '%' and '$$' into '$'

905

# correspondingly that is not what we want since we need to keep

906

# '%%' intact for template dict substitution step. Working around

907

# with boundary-alike separator hack.

908

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

909

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

910

911

# outtmpl should be expand_path'ed before template dict substitution

912

# because meta fields may contain env variables we don't want to

913

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

914

# title "Hello $PATH", we don't want `$PATH` to be expanded.

915

return expand_path(outtmpl).replace(sep, '')

916

917

@staticmethod

918

def escape_outtmpl(outtmpl):

919

''' Escape any remaining strings like %s, %abc% etc. '''

920

return re.sub(

921

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

922

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

927

''' @return None or Exception object '''

928

outtmpl = re.sub(

929

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),

930

lambda mobj: f'{mobj.group(0)[:-1]}s',

931

cls._outtmpl_expandpath(outtmpl))

932

try:

933

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

934

return None

935

except ValueError as err:

936

return err

937

938

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):

939

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """

940

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

941

942

info_dict = dict(info_dict) # Do not sanitize so as not to consume LazyList

943

for key in ('__original_infodict', '__postprocessors'):

944

info_dict.pop(key, None)

945

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

946

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

947

if info_dict.get('duration', None) is not None

948

else None)

949

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

950

if info_dict.get('resolution') is None:

951

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

952

953

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

954

# of %(field)s to %(field)0Nd for backward compatibility

955

field_size_compat_map = {

956

'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),

957

'playlist_autonumber': len(str(info_dict.get('n_entries') or '')),

958

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

968

# where keys (except first) can be string, int or slice

969

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

970

MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

971

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

972

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

973

(?P<negate>-)?

974

(?P<fields>{field})

975

(?P<maths>(?:{math_op}{math_field})*)

976

(?:>(?P<strf_format>.+?))?

977

(?P<alternate>(?<!\\),[^|)]+)?

978

(?:\|(?P<default>.*?))?

979

$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

980

981

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

986

987

def get_value(mdict):

988

# Object traversal

989

value = _traverse_infodict(mdict['fields'])

990

# Negative

991

if mdict['negate']:

992

value = float_or_none(value)

993

if value is not None:

994

value *= -1

995

# Do maths

996

offset_key = mdict['maths']

997

if offset_key:

998

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1003

offset_key).group(0)

1004

offset_key = offset_key[len(item):]

1005

if operator is None:

1006

operator = MATH_FUNCTIONS[item]

1007

continue

1008

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1009

offset = float_or_none(item)

1010

if offset is None:

1011

offset = float_or_none(_traverse_infodict(item))

1012

try:

1013

value = operator(value, multiplier * offset)

1014

except (TypeError, ZeroDivisionError):

1015

return None

1016

operator = None

1017

# Datetime formatting

1018

if mdict['strf_format']:

1019

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1024

1025

def _dumpjson_default(obj):

1026

if isinstance(obj, (set, LazyList)):

1027

return list(obj)

1028

raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')

1029

1030

def create_key(outer_mobj):

1031

if not outer_mobj.group('has_key'):

1032

return f'%{outer_mobj.group(0)}'

1033

key = outer_mobj.group('key')

1034

mobj = re.match(INTERNAL_FORMAT_RE, key)

1035

initial_field = mobj.group('fields').split('.')[-1] if mobj else ''

1036

value, default = None, na

1037

while mobj:

1038

mobj = mobj.groupdict()

1039

default = mobj['default'] if mobj['default'] is not None else default

1040

value = get_value(mobj)

1041

if value is None and mobj['alternate']:

1042

mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])

else:

break

fmt = outer_mobj.group('format')

1047

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1048

fmt = '0{:d}d'.format(field_size_compat_map[key])

1049

1050

value = default if value is None else value

1051

1052

str_fmt = f'{fmt[:-1]}s'

1053

if fmt[-1] == 'l': # list

1054

delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '

1055

value, fmt = delim.join(variadic(value)), str_fmt

1056

elif fmt[-1] == 'j': # json

1057

value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt

1058

elif fmt[-1] == 'q': # quoted

1059

value, fmt = compat_shlex_quote(str(value)), str_fmt

1060

elif fmt[-1] == 'B': # bytes

1061

value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')

1062

value, fmt = value.decode('utf-8', 'ignore'), 's'

1063

elif fmt[-1] == 'U': # unicode normalized

1064

opts = outer_mobj.group('conversion') or ''

1065

value, fmt = unicodedata.normalize(

1066

# "+" = compatibility equivalence, "#" = NFD

1067

'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),

value), str_fmt

elif fmt[-1] == 'c':

if value:

value = str(value)[0]

1072

else:

1073

fmt = str_fmt

1074

elif fmt[-1] not in 'rs': # numeric

1075

value = float_or_none(value)

1076

if value is None:

1077

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1082

# So we convert it to repr first

1083

value, fmt = repr(value), str_fmt

1084

if fmt[-1] in 'csr':

1085

value = sanitize(initial_field, value)

1086

1087

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1088

TMPL_DICT[key] = value

1089

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1090

1091

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1092

1093

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1094

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1095

return self.escape_outtmpl(outtmpl) % info_dict

1096

1097

def _prepare_filename(self, info_dict, tmpl_type='default'):

1098

try:

1099

sanitize = lambda k, v: sanitize_filename(

1100

compat_str(v),

1101

restricted=self.params.get('restrictfilenames'),

1102

is_id=(k == 'id' or k.endswith('_id')))

1103

outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])

1104

outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)

1105

outtmpl = self.escape_outtmpl(self._outtmpl_expandpath(outtmpl))

1106

filename = outtmpl % template_dict

1107

1108

force_ext = OUTTMPL_TYPES.get(tmpl_type)

1109

if filename and force_ext is not None:

1110

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1111

1112

# https://github.com/blackjack4494/youtube-dlc/issues/85

1113

trim_file_name = self.params.get('trim_file_name', False)

1114

if trim_file_name:

1115

fn_groups = filename.rsplit('.')

1116

ext = fn_groups[-1]

1117

sub_ext = ''

1118

if len(fn_groups) > 2:

1119

sub_ext = fn_groups[-2]

1120

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

1121

1122

return filename

1123

except ValueError as err:

1124

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1125

return None

1126

1127

def prepare_filename(self, info_dict, dir_type='', warn=False):

1128

"""Generate the output filename."""

1129

1130

filename = self._prepare_filename(info_dict, dir_type or 'default')

1131

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1136

pass

1137

elif filename == '-':

1138

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1139

elif os.path.isabs(filename):

1140

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1141

if filename == '-' or not filename:

1142

return filename

1143

1144

return self.get_output_path(dir_type, filename)

1145

1146

def _match_entry(self, info_dict, incomplete=False, silent=False):

1147

""" Returns None if the file should be downloaded """

1148

1149

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1150

1151

def check_filter():

1152

if 'title' in info_dict:

1153

# This can happen when we're just evaluating the playlist

1154

title = info_dict['title']

1155

matchtitle = self.params.get('matchtitle', False)

1156

if matchtitle:

1157

if not re.search(matchtitle, title, re.IGNORECASE):

1158

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1159

rejecttitle = self.params.get('rejecttitle', False)

1160

if rejecttitle:

1161

if re.search(rejecttitle, title, re.IGNORECASE):

1162

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1163

date = info_dict.get('upload_date')

1164

if date is not None:

1165

dateRange = self.params.get('daterange', DateRange())

1166

if date not in dateRange:

1167

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

1168

view_count = info_dict.get('view_count')

1169

if view_count is not None:

1170

min_views = self.params.get('min_views')

1171

if min_views is not None and view_count < min_views:

1172

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1173

max_views = self.params.get('max_views')

1174

if max_views is not None and view_count > max_views:

1175

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1176

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1177

return 'Skipping "%s" because it is age restricted' % video_title

1178

1179

match_filter = self.params.get('match_filter')

1180

if match_filter is not None:

1181

try:

1182

ret = match_filter(info_dict, incomplete=incomplete)

1183

except TypeError:

1184

# For backward compatibility

1185

ret = None if incomplete else match_filter(info_dict)

if ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1191

reason = '%s has already been recorded in the archive' % video_title

1192

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1193

else:

1194

reason = check_filter()

1195

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1196

if reason is not None:

1197

if not silent:

1198

self.to_screen('[download] ' + reason)

1199

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1205

'''Set the keys from extra_info in info dict if they are missing'''

1206

for key, value in extra_info.items():

1207

info_dict.setdefault(key, value)

1208

1209

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1210

process=True, force_generic_extractor=False):

1211

"""

1212

Return a list with a dictionary for each video extracted.

1213

1214

Arguments:

1215

url -- URL to extract

1216

1217

Keyword arguments:

1218

download -- whether to download videos during extraction

1219

ie_key -- extractor key hint

1220

extra_info -- dictionary containing the extra values to add to each result

1221

process -- whether to resolve all unresolved references (URLs, playlist items),

1222

must be True for download to work.

1223

force_generic_extractor -- force using the generic extractor

1224

"""

1225

1226

if extra_info is None:

1227

extra_info = {}

1228

1229

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1238

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1243

'and will probably not work.')

1244

1245

temp_id = ie.get_temp_id(url)

1246

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1247

self.to_screen("[%s] %s: has already been recorded in archive" % (

1248

ie_key, temp_id))

1249

break

1250

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1251

else:

1252

self.report_error('no suitable InfoExtractor for URL %s' % url)

1253

1254

def __handle_extraction_exceptions(func):

1255

1256

def wrapper(self, *args, **kwargs):

1257

try:

1258

return func(self, *args, **kwargs)

1259

except GeoRestrictedError as e:

1260

msg = e.msg

1261

if e.countries:

1262

msg += '\nThis video is available in %s.' % ', '.join(

1263

map(ISO3166Utils.short2full, e.countries))

1264

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1265

self.report_error(msg)

1266

except ExtractorError as e: # An error we somewhat expected

1267

self.report_error(compat_str(e), e.format_traceback())

1268

except ThrottledDownload:

1269

self.to_stderr('\r')

1270

self.report_warning('The download speed is below throttle limit. Re-extracting data')

1271

return wrapper(self, *args, **kwargs)

1272

except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached, LazyList.IndexError):

1273

raise

1274

except Exception as e:

1275

if self.params.get('ignoreerrors'):

1276

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

1282

def __extract_info(self, url, ie, download, extra_info, process):

1283

ie_result = ie.extract(url)

1284

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1285

return

1286

if isinstance(ie_result, list):

1287

# Backwards compatibility: old IE result format

1288

ie_result = {

1289

'_type': 'compat_list',

1290

'entries': ie_result,

1291

}

1292

if extra_info.get('original_url'):

1293

ie_result.setdefault('original_url', extra_info['original_url'])

1294

self.add_default_extra_info(ie_result, ie, url)

1295

if process:

1296

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1301

if url is not None:

1302

self.add_extra_info(ie_result, {

1303

'webpage_url': url,

1304

'original_url': url,

1305

'webpage_url_basename': url_basename(url),

1306

})

1307

if ie is not None:

1308

self.add_extra_info(ie_result, {

1309

'extractor': ie.IE_NAME,

1310

'extractor_key': ie.ie_key(),

1311

})

1312

1313

def process_ie_result(self, ie_result, download=True, extra_info=None):

1314

"""

1315

Take the result of the ie(may be modified) and resolve all unresolved

1316

references (URLs, playlist items).

1317

1318

It will also download the videos if 'download'.

1319

Returns the resolved ie_result.

1320

"""

1321

if extra_info is None:

1322

extra_info = {}

1323

result_type = ie_result.get('_type', 'video')

1324

1325

if result_type in ('url', 'url_transparent'):

1326

ie_result['url'] = sanitize_url(ie_result['url'])

1327

if ie_result.get('original_url'):

1328

extra_info.setdefault('original_url', ie_result['original_url'])

1329

1330

extract_flat = self.params.get('extract_flat', False)

1331

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1332

or extract_flat is True):

1333

info_copy = ie_result.copy()

1334

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1335

if ie and not ie_result.get('id'):

1336

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1337

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1338

self.add_extra_info(info_copy, extra_info)

1339

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1340

if self.params.get('force_write_download_archive', False):

1341

self.record_download_archive(info_copy)

1342

return ie_result

1343

1344

if result_type == 'video':

1345

self.add_extra_info(ie_result, extra_info)

1346

ie_result = self.process_video_result(ie_result, download=download)

1347

additional_urls = (ie_result or {}).get('additional_urls')

1348

if additional_urls:

1349

# TODO: Improve MetadataParserPP to allow setting a list

1350

if isinstance(additional_urls, compat_str):

1351

additional_urls = [additional_urls]

1352

self.to_screen(

1353

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1354

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1355

ie_result['additional_entries'] = [

1356

self.extract_info(

1357

url, download, extra_info,

1358

force_generic_extractor=self.params.get('force_generic_extractor'))

1359

for url in additional_urls

1360

]

1361

return ie_result

1362

elif result_type == 'url':

1363

# We have to add extra_info to the results because it may be

1364

# contained in a playlist

1365

return self.extract_info(

1366

ie_result['url'], download,

1367

ie_key=ie_result.get('ie_key'),

1368

extra_info=extra_info)

1369

elif result_type == 'url_transparent':

1370

# Use the information from the embedding page

1371

info = self.extract_info(

1372

ie_result['url'], ie_key=ie_result.get('ie_key'),

1373

extra_info=extra_info, download=False, process=False)

1374

1375

# extract_info may return None when ignoreerrors is enabled and

1376

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

1382

(k, v) for k, v in ie_result.items() if v is not None)

1383

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1384

if f in force_properties:

1385

del force_properties[f]

1386

new_result = info.copy()

1387

new_result.update(force_properties)

1388

1389

# Extracted info may not be a video result (i.e.

1390

# info.get('_type', 'video') != video) but rather an url or

1391

# url_transparent. In such cases outer metadata (from ie_result)

1392

# should be propagated to inner one (info). For this to happen

1393

# _type of info should be overridden with url_transparent. This

1394

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1395

if new_result.get('_type') == 'url':

1396

new_result['_type'] = 'url_transparent'

1397

1398

return self.process_ie_result(

1399

new_result, download=download, extra_info=extra_info)

1400

elif result_type in ('playlist', 'multi_video'):

1401

# Protect from infinite recursion due to recursively nested playlists

1402

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1403

webpage_url = ie_result['webpage_url']

1404

if webpage_url in self._playlist_urls:

1405

self.to_screen(

1406

'[download] Skipping already downloaded playlist: %s'

1407

% ie_result.get('title') or ie_result.get('id'))

1408

return

1409

1410

self._playlist_level += 1

1411

self._playlist_urls.add(webpage_url)

1412

self._sanitize_thumbnails(ie_result)

1413

try:

1414

return self.__process_playlist(ie_result, download)

1415

finally:

1416

self._playlist_level -= 1

1417

if not self._playlist_level:

1418

self._playlist_urls.clear()

1419

elif result_type == 'compat_list':

1420

self.report_warning(

1421

'Extractor %s returned a compat_list result. '

1422

'It needs to be updated.' % ie_result.get('extractor'))

1423

1424

def _fixup(r):

1425

self.add_extra_info(r, {

1426

'extractor': ie_result['extractor'],

1427

'webpage_url': ie_result['webpage_url'],

1428

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1429

'extractor_key': ie_result['extractor_key'],

1430

})

1431

return r

1432

ie_result['entries'] = [

1433

self.process_ie_result(_fixup(r), download, extra_info)

1434

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1439

1440

def _ensure_dir_exists(self, path):

1441

return make_dir(path, self.report_error)

1442

1443

def __process_playlist(self, ie_result, download):

1444

# We process each entry in the playlist

1445

playlist = ie_result.get('title') or ie_result.get('id')

1446

self.to_screen('[download] Downloading playlist: %s' % playlist)

1447

1448

if 'entries' not in ie_result:

1449

raise EntryNotInPlaylist()

1450

incomplete_entries = bool(ie_result.get('requested_entries'))

1451

if incomplete_entries:

1452

def fill_missing_entries(entries, indexes):

1453

ret = [None] * max(*indexes)

1454

for i, entry in zip(indexes, entries):

1455

ret[i - 1] = entry

1456

return ret

1457

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1458

1459

playlist_results = []

1460

1461

playliststart = self.params.get('playliststart', 1)

1462

playlistend = self.params.get('playlistend')

1463

# For backwards compatibility, interpret -1 as whole list

1464

if playlistend == -1:

1465

playlistend = None

1466

1467

playlistitems_str = self.params.get('playlist_items')

1468

playlistitems = None

1469

if playlistitems_str is not None:

1470

def iter_playlistitems(format):

1471

for string_segment in format.split(','):

1472

if '-' in string_segment:

1473

start, end = string_segment.split('-')

1474

for item in range(int(start), int(end) + 1):

1475

yield int(item)

1476

else:

1477

yield int(string_segment)

1478

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1479

1480

ie_entries = ie_result['entries']

1481

msg = (

1482

'Downloading %d videos' if not isinstance(ie_entries, list)

1483

else 'Collected %d videos; downloading %%d of them' % len(ie_entries))

1484

1485

if isinstance(ie_entries, list):

1486

def get_entry(i):

1487

return ie_entries[i - 1]

1488

else:

1489

if not isinstance(ie_entries, PagedList):

1490

ie_entries = LazyList(ie_entries)

1491

1492

def get_entry(i):

1493

return YoutubeDL.__handle_extraction_exceptions(

1494

lambda self, i: ie_entries[i - 1]

)(self, i)

entries = []

items = playlistitems if playlistitems is not None else itertools.count(playliststart)

for i in items:

if i == 0:

continue

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = get_entry(i)

if entry is None:

raise EntryNotInPlaylist()

1509

except (IndexError, EntryNotInPlaylist):

1510

if incomplete_entries:

1511

raise EntryNotInPlaylist()

1512

elif not playlistitems:

1513

break

1514

entries.append(entry)

1515

try:

1516

if entry is not None:

1517

self._match_entry(entry, incomplete=True, silent=True)

1518

except (ExistingVideoReached, RejectedVideoReached):

1519

break

1520

ie_result['entries'] = entries

1521

1522

# Save playlist_index before re-ordering

1523

entries = [

1524

((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)

1525

for i, entry in enumerate(entries, 1)

1526

if entry is not None]

1527

n_entries = len(entries)

1528

1529

if not playlistitems and (playliststart or playlistend):

1530

playlistitems = list(range(playliststart, playliststart + n_entries))

1531

ie_result['requested_entries'] = playlistitems

1532

1533

if self.params.get('allow_playlist_files', True):

1534

ie_copy = {

1535

'playlist': playlist,

1536

'playlist_id': ie_result.get('id'),

1537

'playlist_title': ie_result.get('title'),

1538

'playlist_uploader': ie_result.get('uploader'),

1539

'playlist_uploader_id': ie_result.get('uploader_id'),

1540

'playlist_index': 0,

1541

}

1542

ie_copy.update(dict(ie_result))

1543

1544

if self._write_info_json('playlist', ie_result,

1545

self.prepare_filename(ie_copy, 'pl_infojson')) is None:

1546

return

1547

if self._write_description('playlist', ie_result,

1548

self.prepare_filename(ie_copy, 'pl_description')) is None:

1549

return

1550

# TODO: This should be passed to ThumbnailsConvertor if necessary

1551

self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1552

1553

if self.params.get('playlistreverse', False):

1554

entries = entries[::-1]

1555

if self.params.get('playlistrandom', False):

1556

random.shuffle(entries)

1557

1558

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1559

1560

self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))

1561

failures = 0

1562

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1563

for i, entry_tuple in enumerate(entries, 1):

1564

playlist_index, entry = entry_tuple

1565

if 'playlist-index' in self.params.get('compat_opts', []):

1566

playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1

1567

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1568

# This __x_forwarded_for_ip thing is a bit ugly but requires

1569

# minimal changes

1570

if x_forwarded_for:

1571

entry['__x_forwarded_for_ip'] = x_forwarded_for

1572

extra = {

1573

'n_entries': n_entries,

1574

'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1575

'playlist_index': playlist_index,

1576

'playlist_autonumber': i,

1577

'playlist': playlist,

1578

'playlist_id': ie_result.get('id'),

1579

'playlist_title': ie_result.get('title'),

1580

'playlist_uploader': ie_result.get('uploader'),

1581

'playlist_uploader_id': ie_result.get('uploader_id'),

1582

'extractor': ie_result['extractor'],

1583

'webpage_url': ie_result['webpage_url'],

1584

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1585

'extractor_key': ie_result['extractor_key'],

1586

}

1587

1588

if self._match_entry(entry, incomplete=True) is not None:

1589

continue

1590

1591

entry_result = self.__process_iterable_entry(entry, download, extra)

1592

if not entry_result:

1593

failures += 1

1594

if failures >= max_failures:

1595

self.report_error(

1596

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1597

break

1598

# TODO: skip failed (empty) entries?

1599

playlist_results.append(entry_result)

1600

ie_result['entries'] = playlist_results

1601

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1602

return ie_result

1603

1604

@__handle_extraction_exceptions

1605

def __process_iterable_entry(self, entry, download, extra_info):

1606

return self.process_ie_result(

1607

entry, download=download, extra_info=extra_info)

1608

1609

def _build_format_filter(self, filter_spec):

1610

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1621

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1622

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1623

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1624

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1625

m = operator_rex.fullmatch(filter_spec)

1626

if m:

1627

try:

1628

comparison_value = int(m.group('value'))

1629

except ValueError:

1630

comparison_value = parse_filesize(m.group('value'))

1631

if comparison_value is None:

1632

comparison_value = parse_filesize(m.group('value') + 'B')

1633

if comparison_value is None:

1634

raise ValueError(

1635

'Invalid value %r in format specification %r' % (

1636

m.group('value'), filter_spec))

1637

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1643

'$=': lambda attr, value: attr.endswith(value),

1644

'*=': lambda attr, value: value in attr,

1645

}

1646

str_operator_rex = re.compile(r'''(?x)\s*

1647

(?P<key>[a-zA-Z0-9._-]+)\s*

1648

(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1649

(?P<value>[a-zA-Z0-9._-]+)\s*

1650

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1651

m = str_operator_rex.fullmatch(filter_spec)

1652

if m:

1653

comparison_value = m.group('value')

1654

str_op = STR_OPERATORS[m.group('op')]

1655

if m.group('negation'):

1656

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1662

1663

def _filter(f):

1664

actual_value = f.get(m.group('key'))

1665

if actual_value is None:

1666

return m.group('none_inclusive')

1667

return op(actual_value, comparison_value)

1668

return _filter

1669

1670

def _default_format_spec(self, info_dict, download=True):

1671

1672

def can_merge():

1673

merger = FFmpegMergerPP(self)

1674

return merger.available and merger.can_merge()

1675

1676

prefer_best = (

1677

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1682

or self.outtmpl_dict['default'] == '-'))

1683

compat = (

1684

prefer_best

1685

or self.params.get('allow_multiple_audio_streams', False)

1686

or 'format-spec' in self.params.get('compat_opts', []))

1687

1688

return (

1689

'best/bestvideo+bestaudio' if prefer_best

1690

else 'bestvideo*+bestaudio/best' if not compat

1691

else 'bestvideo+bestaudio/best')

1692

1693

def build_format_selector(self, format_spec):

1694

def syntax_error(note, start):

1695

message = (

1696

'Invalid format specification: '

1697

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1698

return SyntaxError(message)

1699

1700

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1705

1706

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1707

'video': self.params.get('allow_multiple_video_streams', False)}

1708

1709

check_formats = self.params.get('check_formats')

1710

1711

def _parse_filter(tokens):

1712

filter_parts = []

1713

for type, string, start, _, _ in tokens:

1714

if type == tokenize.OP and string == ']':

1715

return ''.join(filter_parts)

1716

else:

1717

filter_parts.append(string)

1718

1719

def _remove_unused_ops(tokens):

1720

# Remove operators that we don't use and join them with the surrounding strings

1721

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1722

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1723

last_string, last_start, last_end, last_line = None, None, None, None

1724

for type, string, start, end, line in tokens:

1725

if type == tokenize.OP and string == '[':

1726

if last_string:

1727

yield tokenize.NAME, last_string, last_start, last_end, last_line

1728

last_string = None

1729

yield type, string, start, end, line

1730

# everything inside brackets will be handled by _parse_filter

1731

for type, string, start, end, line in tokens:

1732

yield type, string, start, end, line

1733

if type == tokenize.OP and string == ']':

1734

break

1735

elif type == tokenize.OP and string in ALLOWED_OPS:

1736

if last_string:

1737

yield tokenize.NAME, last_string, last_start, last_end, last_line

1738

last_string = None

1739

yield type, string, start, end, line

1740

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1747

if last_string:

1748

yield tokenize.NAME, last_string, last_start, last_end, last_line

1749

1750

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1751

selectors = []

1752

current_selector = None

1753

for type, string, start, _, _ in tokens:

1754

# ENCODING is only defined in python 3.x

1755

if type == getattr(tokenize, 'ENCODING', None):

1756

continue

1757

elif type in [tokenize.NAME, tokenize.NUMBER]:

1758

current_selector = FormatSelector(SINGLE, string, [])

1759

elif type == tokenize.OP:

1760

if string == ')':

1761

if not inside_group:

1762

# ')' will be handled by the parentheses group

1763

tokens.restore_last_token()

1764

break

1765

elif inside_merge and string in ['/', ',']:

1766

tokens.restore_last_token()

1767

break

1768

elif inside_choice and string == ',':

1769

tokens.restore_last_token()

1770

break

1771

elif string == ',':

1772

if not current_selector:

1773

raise syntax_error('"," must follow a format selector', start)

1774

selectors.append(current_selector)

1775

current_selector = None

1776

elif string == '/':

1777

if not current_selector:

1778

raise syntax_error('"/" must follow a format selector', start)

1779

first_choice = current_selector

1780

second_choice = _parse_format_selection(tokens, inside_choice=True)

1781

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1782

elif string == '[':

1783

if not current_selector:

1784

current_selector = FormatSelector(SINGLE, 'best', [])

1785

format_filter = _parse_filter(tokens)

1786

current_selector.filters.append(format_filter)

1787

elif string == '(':

1788

if current_selector:

1789

raise syntax_error('Unexpected "("', start)

1790

group = _parse_format_selection(tokens, inside_group=True)

1791

current_selector = FormatSelector(GROUP, group, [])

1792

elif string == '+':

1793

if not current_selector:

1794

raise syntax_error('Unexpected "+"', start)

1795

selector_1 = current_selector

1796

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1797

if not selector_2:

1798

raise syntax_error('Expected a selector', start)

1799

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1800

else:

1801

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1802

elif type == tokenize.ENDMARKER:

1803

break

1804

if current_selector:

1805

selectors.append(current_selector)

1806

return selectors

1807

1808

def _merge(formats_pair):

1809

format_1, format_2 = formats_pair

1810

1811

formats_info = []

1812

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1813

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1814

1815

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1816

get_no_more = {'video': False, 'audio': False}

1817

for (i, fmt_info) in enumerate(formats_info):

1818

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

1819

formats_info.pop(i)

1820

continue

1821

for aud_vid in ['audio', 'video']:

1822

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1823

if get_no_more[aud_vid]:

1824

formats_info.pop(i)

1825

break

1826

get_no_more[aud_vid] = True

1827

1828

if len(formats_info) == 1:

1829

return formats_info[0]

1830

1831

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1832

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1833

1834

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1835

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1836

1837

output_ext = self.params.get('merge_output_format')

1838

if not output_ext:

1839

if the_only_video:

1840

output_ext = the_only_video['ext']

1841

elif the_only_audio and not video_fmts:

1842

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1848

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1849

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1856

'height': the_only_video.get('height'),

1857

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

1858

'fps': the_only_video.get('fps'),

1859

'vcodec': the_only_video.get('vcodec'),

1860

'vbr': the_only_video.get('vbr'),

1861

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1867

'abr': the_only_audio.get('abr'),

})

return new_dict

def _check_formats(formats):

1873

if not check_formats:

yield from formats

return

for f in formats:

self.to_screen('[info] Testing format %s' % f['format_id'])

1878

temp_file = tempfile.NamedTemporaryFile(

1879

suffix='.tmp', delete=False,

1880

dir=self.get_output_path('temp') or None)

1881

temp_file.close()

1882

try:

1883

success, _ = self.dl(temp_file.name, f, test=True)

1884

except (DownloadError, IOError, OSError, ValueError) + network_exceptions:

1885

success = False

1886

finally:

1887

if os.path.exists(temp_file.name):

1888

try:

1889

os.remove(temp_file.name)

1890

except OSError:

1891

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1896

1897

def _build_selector_function(selector):

1898

if isinstance(selector, list): # ,

1899

fs = [_build_selector_function(s) for s in selector]

1900

1901

def selector_function(ctx):

1902

for f in fs:

1903

yield from f(ctx)

1904

return selector_function

1905

1906

elif selector.type == GROUP: # ()

1907

selector_function = _build_selector_function(selector.selector)

1908

1909

elif selector.type == PICKFIRST: # /

1910

fs = [_build_selector_function(s) for s in selector.selector]

1911

1912

def selector_function(ctx):

1913

for f in fs:

1914

picked_formats = list(f(ctx))

1915

if picked_formats:

1916

return picked_formats

1917

return []

1918

1919

elif selector.type == MERGE: # +

1920

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1921

1922

def selector_function(ctx):

1923

for pair in itertools.product(

1924

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1925

yield _merge(pair)

1926

1927

elif selector.type == SINGLE: # atom

1928

format_spec = selector.selector or 'best'

1929

1930

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

1931

if format_spec == 'all':

1932

def selector_function(ctx):

1933

yield from _check_formats(ctx['formats'])

1934

elif format_spec == 'mergeall':

1935

def selector_function(ctx):

1936

formats = list(_check_formats(ctx['formats']))

1937

if not formats:

1938

return

1939

merged_format = formats[-1]

1940

for f in formats[-2::-1]:

1941

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, format_reverse, format_idx = False, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

1951

format_reverse = mobj.group('bw')[0] == 'b'

1952

format_type = (mobj.group('type') or [None])[0]

1953

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

1954

format_modified = mobj.group('mod') is not None

1955

1956

format_fallback = not format_type and not format_modified # for b, w

1957

_filter_f = (

1958

(lambda f: f.get('%scodec' % format_type) != 'none')

1959

if format_type and format_modified # bv*, ba*, wv*, wa*

1960

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

1961

if format_type # bv, ba, wv, wa

1962

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1963

if not format_modified # b, w

1964

else lambda f: True) # b*, w*

1965

filter_f = lambda f: _filter_f(f) and (

1966

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

1967

else:

1968

if format_spec in ('m4a', 'mp3', 'ogg', 'aac'): # audio extension

1969

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

1970

elif format_spec in ('mp4', 'flv', 'webm', '3gp'): # video extension

1971

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

1972

elif format_spec in ('mhtml', ): # storyboards extension

1973

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

1974

else:

1975

filter_f = (lambda f: f.get('format_id') == format_spec) # id

1976

1977

def selector_function(ctx):

1978

formats = list(ctx['formats'])

1979

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1980

if format_fallback and ctx['incomplete_formats'] and not matches:

1981

# for extractors with incomplete formats (audio only (soundcloud)

1982

# or video only (imgur)) best/worst will fallback to

1983

# best/worst {video,audio}-only format

1984

matches = formats

1985

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

1986

try:

1987

yield matches[format_idx - 1]

except IndexError:

return

filters = [self._build_format_filter(f) for f in selector.filters]

1992

1993

def final_selector(ctx):

1994

ctx_copy = copy.deepcopy(ctx)

1995

for _filter in filters:

1996

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1997

return selector_function(ctx_copy)

1998

return final_selector

1999

2000

stream = io.BytesIO(format_spec.encode('utf-8'))

2001

try:

2002

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

2003

except tokenize.TokenError:

2004

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2005

2006

class TokenIterator(object):

2007

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2016

raise StopIteration()

2017

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2024

self.counter -= 1

2025

2026

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2027

return _build_selector_function(parsed_selector)

2028

2029

def _calc_headers(self, info_dict):

2030

res = std_headers.copy()

2031

2032

add_headers = info_dict.get('http_headers')

2033

if add_headers:

2034

res.update(add_headers)

2035

2036

cookies = self._calc_cookies(info_dict)

2037

if cookies:

2038

res['Cookie'] = cookies

2039

2040

if 'X-Forwarded-For' not in res:

2041

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2042

if x_forwarded_for_ip:

2043

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

2048

pr = sanitized_Request(info_dict['url'])

2049

self.cookiejar.add_cookie_header(pr)

2050

return pr.get_header('Cookie')

2051

2052

def _sanitize_thumbnails(self, info_dict):

2053

thumbnails = info_dict.get('thumbnails')

2054

if thumbnails is None:

2055

thumbnail = info_dict.get('thumbnail')

2056

if thumbnail:

2057

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

2058

if thumbnails:

2059

thumbnails.sort(key=lambda t: (

2060

t.get('preference') if t.get('preference') is not None else -1,

2061

t.get('width') if t.get('width') is not None else -1,

2062

t.get('height') if t.get('height') is not None else -1,

2063

t.get('id') if t.get('id') is not None else '',

2064

t.get('url')))

2065

2066

def thumbnail_tester():

2067

if self.params.get('check_formats'):

2068

test_all = True

2069

to_screen = lambda msg: self.to_screen(f'[info] {msg}')

2070

else:

2071

test_all = False

2072

to_screen = self.write_debug

2073

2074

def test_thumbnail(t):

2075

if not test_all and not t.get('_test_url'):

2076

return True

2077

to_screen('Testing thumbnail %s' % t['id'])

2078

try:

2079

self.urlopen(HEADRequest(t['url']))

2080

except network_exceptions as err:

2081

to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (

2082

t['id'], t['url'], error_to_compat_str(err)))

return False

return True

return test_thumbnail

2087

2088

for i, t in enumerate(thumbnails):

2089

if t.get('id') is None:

2090

t['id'] = '%d' % i

2091

if t.get('width') and t.get('height'):

2092

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2093

t['url'] = sanitize_url(t['url'])

2094

2095

if self.params.get('check_formats') is not False:

2096

info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()

2097

else:

2098

info_dict['thumbnails'] = thumbnails

2099

2100

def process_video_result(self, info_dict, download=True):

2101

assert info_dict.get('_type', 'video') == 'video'

2102

2103

if 'id' not in info_dict:

2104

raise ExtractorError('Missing "id" field in extractor result')

2105

if 'title' not in info_dict:

2106

raise ExtractorError('Missing "title" field in extractor result',

2107

video_id=info_dict['id'], ie=info_dict['extractor'])

2108

2109

def report_force_conversion(field, field_not, conversion):

2110

self.report_warning(

2111

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2112

% (field, field_not, conversion))

2113

2114

def sanitize_string_field(info, string_field):

2115

field = info.get(string_field)

2116

if field is None or isinstance(field, compat_str):

2117

return

2118

report_force_conversion(string_field, 'a string', 'string')

2119

info[string_field] = compat_str(field)

2120

2121

def sanitize_numeric_fields(info):

2122

for numeric_field in self._NUMERIC_FIELDS:

2123

field = info.get(numeric_field)

2124

if field is None or isinstance(field, compat_numeric_types):

2125

continue

2126

report_force_conversion(numeric_field, 'numeric', 'int')

2127

info[numeric_field] = int_or_none(field)

2128

2129

sanitize_string_field(info_dict, 'id')

2130

sanitize_numeric_fields(info_dict)

2131

2132

if 'playlist' not in info_dict:

2133

# It isn't part of a playlist

2134

info_dict['playlist'] = None

2135

info_dict['playlist_index'] = None

2136

2137

self._sanitize_thumbnails(info_dict)

2138

2139

thumbnail = info_dict.get('thumbnail')

2140

thumbnails = info_dict.get('thumbnails')

2141

if thumbnail:

2142

info_dict['thumbnail'] = sanitize_url(thumbnail)

2143

elif thumbnails:

2144

info_dict['thumbnail'] = thumbnails[-1]['url']

2145

2146

if info_dict.get('display_id') is None and 'id' in info_dict:

2147

info_dict['display_id'] = info_dict['id']

2148

2149

for ts_key, date_key in (

2150

('timestamp', 'upload_date'),

2151

('release_timestamp', 'release_date'),

2152

):

2153

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2154

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2155

# see http://bugs.python.org/issue1646728)

2156

try:

2157

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2158

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2159

except (ValueError, OverflowError, OSError):

2160

pass

2161

2162

live_keys = ('is_live', 'was_live')

2163

live_status = info_dict.get('live_status')

2164

if live_status is None:

2165

for key in live_keys:

2166

if info_dict.get(key) is False:

2167

continue

2168

if info_dict.get(key):

2169

live_status = key

2170

break

2171

if all(info_dict.get(key) is False for key in live_keys):

2172

live_status = 'not_live'

2173

if live_status:

2174

info_dict['live_status'] = live_status

2175

for key in live_keys:

2176

if info_dict.get(key) is None:

2177

info_dict[key] = (live_status == key)

2178

2179

# Auto generate title fields corresponding to the *_number fields when missing

2180

# in order to always have clean titles. This is very common for TV series.

2181

for field in ('chapter', 'season', 'episode'):

2182

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2183

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2184

2185

for cc_kind in ('subtitles', 'automatic_captions'):

2186

cc = info_dict.get(cc_kind)

2187

if cc:

2188

for _, subtitle in cc.items():

2189

for subtitle_format in subtitle:

2190

if subtitle_format.get('url'):

2191

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2192

if subtitle_format.get('ext') is None:

2193

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2194

2195

automatic_captions = info_dict.get('automatic_captions')

2196

subtitles = info_dict.get('subtitles')

2197

2198

info_dict['requested_subtitles'] = self.process_subtitles(

2199

info_dict['id'], subtitles, automatic_captions)

2200

2201

# We now pick which formats have to be downloaded

2202

if info_dict.get('formats') is None:

2203

# There's only one format available

2204

formats = [info_dict]

2205

else:

2206

formats = info_dict['formats']

2207

2208

info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)

2209

if not self.params.get('allow_unplayable_formats'):

2210

formats = [f for f in formats if not f.get('has_drm')]

2211

2212

if not formats:

2213

self.raise_no_formats(info_dict)

2214

2215

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2220

'there is an error in extractor')

2221

return False

2222

if isinstance(url, bytes):

2223

sanitize_string_field(f, 'url')

2224

return True

2225

2226

# Filter out malformed formats for better extraction robustness

2227

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2232

for i, format in enumerate(formats):

2233

sanitize_string_field(format, 'format_id')

2234

sanitize_numeric_fields(format)

2235

format['url'] = sanitize_url(format['url'])

2236

if not format.get('format_id'):

2237

format['format_id'] = compat_str(i)

2238

else:

2239

# Sanitize format_id from characters used in format selector expression

2240

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2241

format_id = format['format_id']

2242

if format_id not in formats_dict:

2243

formats_dict[format_id] = []

2244

formats_dict[format_id].append(format)

2245

2246

# Make sure all formats have unique format_id

2247

for format_id, ambiguous_formats in formats_dict.items():

2248

if len(ambiguous_formats) > 1:

2249

for i, format in enumerate(ambiguous_formats):

2250

format['format_id'] = '%s-%d' % (format_id, i)

2251

2252

for i, format in enumerate(formats):

2253

if format.get('format') is None:

2254

format['format'] = '{id} - {res}{note}'.format(

2255

id=format['format_id'],

2256

res=self.format_resolution(format),

2257

note=format_field(format, 'format_note', ' (%s)'),

2258

)

2259

# Automatically determine file extension if missing

2260

if format.get('ext') is None:

2261

format['ext'] = determine_ext(format['url']).lower()

2262

# Automatically determine protocol if missing (useful for format

2263

# selection purposes)

2264

if format.get('protocol') is None:

2265

format['protocol'] = determine_protocol(format)

2266

# Add HTTP headers, so that external programs can use them from the

2267

# json output

2268

full_format_info = info_dict.copy()

2269

full_format_info.update(format)

2270

format['http_headers'] = self._calc_headers(full_format_info)

2271

# Remove private housekeeping stuff

2272

if '__x_forwarded_for_ip' in info_dict:

2273

del info_dict['__x_forwarded_for_ip']

2274

2275

# TODO Central sorting goes here

2276

2277

if not formats or formats[0] is not info_dict:

2278

# only set the 'formats' fields if the original info_dict list them

2279

# otherwise we end up with a circular reference, the first (and unique)

2280

# element in the 'formats' field in info_dict is info_dict itself,

2281

# which can't be exported to json

2282

info_dict['formats'] = formats

2283

2284

info_dict, _ = self.pre_process(info_dict)

2285

2286

if self.params.get('list_thumbnails'):

2287

self.list_thumbnails(info_dict)

2288

if self.params.get('listformats'):

2289

if not info_dict.get('formats') and not info_dict.get('url'):

2290

self.to_screen('%s has no formats' % info_dict['id'])

2291

else:

2292

self.list_formats(info_dict)

2293

if self.params.get('listsubtitles'):

2294

if 'automatic_captions' in info_dict:

2295

self.list_subtitles(

2296

info_dict['id'], automatic_captions, 'automatic captions')

2297

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2298

list_only = self.params.get('simulate') is None and (

2299

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2300

if list_only:

2301

# Without this printing, -F --print-json will not work

2302

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2303

return

2304

2305

format_selector = self.format_selector

2306

if format_selector is None:

2307

req_format = self._default_format_spec(info_dict, download=download)

2308

self.write_debug('Default format spec: %s' % req_format)

2309

format_selector = self.build_format_selector(req_format)

2310

2311

# While in format selection we may need to have an access to the original

2312

# format set in order to calculate some metrics or do some processing.

2313

# For now we need to be able to guess whether original formats provided

2314

# by extractor are incomplete or not (i.e. whether extractor provides only

2315

# video-only or audio-only formats) for proper formats selection for

2316

# extractors with such incomplete formats (see

2317

# https://github.com/ytdl-org/youtube-dl/pull/5556).

2318

# Since formats may be filtered during format selection and may not match

2319

# the original formats the results may be incorrect. Thus original formats

2320

# or pre-calculated metrics should be passed to format selection routines

2321

# as well.

2322

# We will pass a context object containing all necessary additional data

2323

# instead of just formats.

2324

# This fixes incorrect format selection issue (see

2325

# https://github.com/ytdl-org/youtube-dl/issues/10083).

2326

incomplete_formats = (

2327

# All formats are video-only or

2328

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2329

# all formats are audio-only

2330

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

2335

}

2336

2337

formats_to_download = list(format_selector(ctx))

2338

if not formats_to_download:

2339

if not self.params.get('ignore_no_formats_error'):

2340

raise ExtractorError('Requested format is not available', expected=True,

2341

video_id=info_dict['id'], ie=info_dict['extractor'])

2342

else:

2343

self.report_warning('Requested format is not available')

2344

# Process what we can, even without any available formats.

2345

self.process_info(dict(info_dict))

2346

elif download:

2347

self.to_screen(

2348

'[info] %s: Downloading %d format(s): %s' % (

2349

info_dict['id'], len(formats_to_download),

2350

", ".join([f['format_id'] for f in formats_to_download])))

2351

for fmt in formats_to_download:

2352

new_info = dict(info_dict)

2353

# Save a reference to the original info_dict so that it can be modified in process_info if needed

2354

new_info['__original_infodict'] = info_dict

2355

new_info.update(fmt)

2356

self.process_info(new_info)

2357

# We update the info dict with the best quality format (backwards compatibility)

2358

if formats_to_download:

2359

info_dict.update(formats_to_download[-1])

2360

return info_dict

2361

2362

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2363

"""Select the requested subtitles and their format"""

2364

available_subs = {}

2365

if normal_subtitles and self.params.get('writesubtitles'):

2366

available_subs.update(normal_subtitles)

2367

if automatic_captions and self.params.get('writeautomaticsub'):

2368

for lang, cap_info in automatic_captions.items():

2369

if lang not in available_subs:

2370

available_subs[lang] = cap_info

2371

2372

if (not self.params.get('writesubtitles') and not

2373

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = available_subs.keys()

2378

if self.params.get('allsubtitles', False):

2379

requested_langs = all_sub_langs

2380

elif self.params.get('subtitleslangs', False):

2381

# A list is used so that the order of languages will be the same as

2382

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2383

requested_langs = []

2384

for lang_re in self.params.get('subtitleslangs'):

2385

if lang_re == 'all':

2386

requested_langs.extend(all_sub_langs)

2387

continue

2388

discard = lang_re[0] == '-'

2389

if discard:

2390

lang_re = lang_re[1:]

2391

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2392

if discard:

2393

for lang in current_langs:

2394

while lang in requested_langs:

2395

requested_langs.remove(lang)

2396

else:

2397

requested_langs.extend(current_langs)

2398

requested_langs = orderedSet(requested_langs)

2399

elif 'en' in available_subs:

2400

requested_langs = ['en']

2401

else:

2402

requested_langs = [list(all_sub_langs)[0]]

2403

if requested_langs:

2404

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2405

2406

formats_query = self.params.get('subtitlesformat', 'best')

2407

formats_preference = formats_query.split('/') if formats_query else []

2408

subs = {}

2409

for lang in requested_langs:

2410

formats = available_subs.get(lang)

2411

if formats is None:

2412

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

2413

continue

2414

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2426

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

2431

def print_mandatory(field, actual_field=None):

2432

if actual_field is None:

2433

actual_field = field

2434

if (self.params.get('force%s' % field, False)

2435

and (not incomplete or info_dict.get(actual_field) is not None)):

2436

self.to_stdout(info_dict[actual_field])

2437

2438

def print_optional(field):

2439

if (self.params.get('force%s' % field, False)

2440

and info_dict.get(field) is not None):

2441

self.to_stdout(info_dict[field])

2442

2443

info_dict = info_dict.copy()

2444

if filename is not None:

2445

info_dict['filename'] = filename

2446

if info_dict.get('requested_formats') is not None:

2447

# For RTMP URLs, also include the playpath

2448

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2449

elif 'url' in info_dict:

2450

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2451

2452

if self.params.get('forceprint') or self.params.get('forcejson'):

2453

self.post_extract(info_dict)

2454

for tmpl in self.params.get('forceprint', []):

2455

self.to_stdout(self.evaluate_outtmpl(

2456

f'%({tmpl})s' if re.match(r'\w+$', tmpl) else tmpl, info_dict))

2457

2458

print_mandatory('title')

2459

print_mandatory('id')

2460

print_mandatory('url', 'urls')

2461

print_optional('thumbnail')

2462

print_optional('description')

2463

print_optional('filename')

2464

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2465

self.to_stdout(formatSeconds(info_dict['duration']))

2466

print_mandatory('format')

2467

2468

if self.params.get('forcejson'):

2469

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2470

2471

def dl(self, name, info, subtitle=False, test=False):

2472

if not info.get('url'):

2473

self.raise_no_formats(info, True)

2474

2475

if test:

2476

verbose = self.params.get('verbose')

2477

params = {

2478

'test': True,

2479

'quiet': not verbose,

2480

'verbose': verbose,

2481

'noprogress': not verbose,

2482

'nopart': True,

2483

'skip_unavailable_fragments': False,

2484

'keep_fragments': False,

2485

'overwrites': True,

2486

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2491

if not test:

2492

for ph in self._progress_hooks:

2493

fd.add_progress_hook(ph)

2494

urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])

2495

self.write_debug('Invoking downloader on "%s"' % urls)

2496

new_info = dict(info)

2497

if new_info.get('http_headers') is None:

2498

new_info['http_headers'] = self._calc_headers(new_info)

2499

return fd.download(name, new_info, subtitle)

2500

2501

def process_info(self, info_dict):

2502

"""Process a single resolved IE result."""

2503

2504

assert info_dict.get('_type', 'video') == 'video'

2505

2506

max_downloads = self.params.get('max_downloads')

2507

if max_downloads is not None:

2508

if self._num_downloads >= int(max_downloads):

2509

raise MaxDownloadsReached()

2510

2511

# TODO: backward compatibility, to be removed

2512

info_dict['fulltitle'] = info_dict['title']

2513

2514

if 'format' not in info_dict and 'ext' in info_dict:

2515

info_dict['format'] = info_dict['ext']

2516

2517

if self._match_entry(info_dict) is not None:

2518

return

2519

2520

self.post_extract(info_dict)

2521

self._num_downloads += 1

2522

2523

# info_dict['_filename'] needs to be set for backward compatibility

2524

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2525

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2530

2531

if self.params.get('simulate'):

2532

if self.params.get('force_write_download_archive', False):

2533

self.record_download_archive(info_dict)

2534

# Do nothing else if in simulate mode

2535

return

2536

2537

if full_filename is None:

2538

return

2539

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2540

return

2541

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2542

return

2543

2544

if self._write_description('video', info_dict,

2545

self.prepare_filename(info_dict, 'description')) is None:

2546

return

2547

2548

sub_files = self._write_subtitles(info_dict, temp_filename)

2549

if sub_files is None:

2550

return

2551

files_to_move.update(dict(sub_files))

2552

2553

thumb_files = self._write_thumbnails(

2554

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2555

if thumb_files is None:

2556

return

2557

files_to_move.update(dict(thumb_files))

2558

2559

infofn = self.prepare_filename(info_dict, 'infojson')

2560

_infojson_written = self._write_info_json('video', info_dict, infofn)

2561

if _infojson_written:

2562

info_dict['__infojson_filename'] = infofn

2563

elif _infojson_written is None:

2564

return

2565

2566

# Note: Annotations are deprecated

2567

annofn = None

2568

if self.params.get('writeannotations', False):

2569

annofn = self.prepare_filename(info_dict, 'annotation')

2570

if annofn:

2571

if not self._ensure_dir_exists(encodeFilename(annofn)):

2572

return

2573

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2574

self.to_screen('[info] Video annotations are already present')

2575

elif not info_dict.get('annotations'):

2576

self.report_warning('There are no annotations to write.')

2577

else:

2578

try:

2579

self.to_screen('[info] Writing video annotations to: ' + annofn)

2580

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2581

annofile.write(info_dict['annotations'])

2582

except (KeyError, TypeError):

2583

self.report_warning('There are no annotations to write.')

2584

except (OSError, IOError):

2585

self.report_error('Cannot write annotations file: ' + annofn)

2586

return

2587

2588

# Write internet shortcut files

2589

url_link = webloc_link = desktop_link = False

2590

if self.params.get('writelink', False):

2591

if sys.platform == "darwin": # macOS.

2592

webloc_link = True

2593

elif sys.platform.startswith("linux"):

2594

desktop_link = True

2595

else: # if sys.platform in ['win32', 'cygwin']:

2596

url_link = True

2597

if self.params.get('writeurllink', False):

2598

url_link = True

2599

if self.params.get('writewebloclink', False):

2600

webloc_link = True

2601

if self.params.get('writedesktoplink', False):

2602

desktop_link = True

2603

2604

if url_link or webloc_link or desktop_link:

2605

if 'webpage_url' not in info_dict:

2606

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2607

return

2608

ascii_url = iri_to_uri(info_dict['webpage_url'])

2609

2610

def _write_link_file(extension, template, newline, embed_filename):

2611

linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))

2612

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2613

self.to_screen('[info] Internet shortcut is already present')

2614

else:

2615

try:

2616

self.to_screen('[info] Writing internet shortcut to: ' + linkfn)

2617

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:

2618

template_vars = {'url': ascii_url}

2619

if embed_filename:

2620

template_vars['filename'] = linkfn[:-(len(extension) + 1)]

2621

linkfile.write(template % template_vars)

2622

except (OSError, IOError):

2623

self.report_error('Cannot write internet shortcut ' + linkfn)

return False

return True

if url_link:

if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):

2629

return

2630

if webloc_link:

2631

if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):

2632

return

2633

if desktop_link:

2634

if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):

return

try:

info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2639

except PostProcessingError as err:

2640

self.report_error('Preprocessing: %s' % str(err))

2641

return

2642

2643

must_record_download_archive = False

2644

if self.params.get('skip_download', False):

2645

info_dict['filepath'] = temp_filename

2646

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2647

info_dict['__files_to_move'] = files_to_move

2648

info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)

2649

else:

2650

# Download

2651

info_dict.setdefault('__postprocessors', [])

2652

try:

2653

2654

def existing_file(*filepaths):

2655

ext = info_dict.get('ext')

2656

final_ext = self.params.get('final_ext', ext)

2657

existing_files = []

2658

for file in orderedSet(filepaths):

2659

if final_ext != ext:

2660

converted = replace_extension(file, final_ext, ext)

2661

if os.path.exists(encodeFilename(converted)):

2662

existing_files.append(converted)

2663

if os.path.exists(encodeFilename(file)):

2664

existing_files.append(file)

2665

2666

if not existing_files or self.params.get('overwrites', False):

2667

for file in orderedSet(existing_files):

2668

self.report_file_delete(file)

2669

os.remove(encodeFilename(file))

2670

return None

2671

2672

info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]

2673

return existing_files[0]

2674

2675

success = True

2676

if info_dict.get('requested_formats') is not None:

2677

2678

def compatible_formats(formats):

2679

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2680

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2681

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2682

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2687

COMPATIBLE_EXTS = (

2688

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2689

set(('webm',)),

2690

)

2691

for ext_sets in COMPATIBLE_EXTS:

2692

if ext_sets.issuperset(exts):

2693

return True

2694

# TODO: Check acodec/vcodec

2695

return False

2696

2697

requested_formats = info_dict['requested_formats']

2698

old_ext = info_dict['ext']

2699

if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):

2700

info_dict['ext'] = 'mkv'

2701

self.report_warning(

2702

'Requested formats are incompatible for merge and will be merged into mkv.')

2703

new_ext = info_dict['ext']

2704

2705

def correct_ext(filename, ext=new_ext):

2706

if filename == '-':

2707

return filename

2708

filename_real_ext = os.path.splitext(filename)[1][1:]

2709

filename_wo_ext = (

2710

os.path.splitext(filename)[0]

2711

if filename_real_ext in (old_ext, new_ext)

2712

else filename)

2713

return '%s.%s' % (filename_wo_ext, ext)

2714

2715

# Ensure filename always has a correct extension for successful merge

2716

full_filename = correct_ext(full_filename)

2717

temp_filename = correct_ext(temp_filename)

2718

dl_filename = existing_file(full_filename, temp_filename)

2719

info_dict['__real_download'] = False

2720

2721

_protocols = set(determine_protocol(f) for f in requested_formats)

2722

if len(_protocols) == 1: # All requested formats have same protocol

2723

info_dict['protocol'] = _protocols.pop()

2724

directly_mergable = FFmpegFD.can_merge_formats(info_dict, self.params)

2725

if dl_filename is not None:

2726

self.report_file_already_downloaded(dl_filename)

2727

elif (directly_mergable and get_suitable_downloader(

2728

info_dict, self.params, to_stdout=(temp_filename == '-')) == FFmpegFD):

2729

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

2730

success, real_download = self.dl(temp_filename, info_dict)

2731

info_dict['__real_download'] = real_download

2732

else:

2733

downloaded = []

2734

merger = FFmpegMergerPP(self)

2735

if self.params.get('allow_unplayable_formats'):

2736

self.report_warning(

2737

'You have requested merging of multiple formats '

2738

'while also allowing unplayable formats to be downloaded. '

2739

'The formats won\'t be merged to prevent data corruption.')

2740

elif not merger.available:

2741

self.report_warning(

2742

'You have requested merging of multiple formats but ffmpeg is not installed. '

2743

'The formats won\'t be merged.')

2744

2745

if temp_filename == '-':

2746

reason = ('using a downloader other than ffmpeg' if directly_mergable

2747

else 'but the formats are incompatible for simultaneous download' if merger.available

2748

else 'but ffmpeg is not installed')

2749

self.report_warning(

2750

f'You have requested downloading multiple formats to stdout {reason}. '

2751

'The formats will be streamed one after the other')

2752

fname = temp_filename

2753

for f in requested_formats:

2754

new_info = dict(info_dict)

2755

del new_info['requested_formats']

2756

new_info.update(f)

2757

if temp_filename != '-':

2758

fname = prepend_extension(

2759

correct_ext(temp_filename, new_info['ext']),

2760

'f%s' % f['format_id'], new_info['ext'])

2761

if not self._ensure_dir_exists(fname):

2762

return

2763

f['filepath'] = fname

2764

downloaded.append(fname)

2765

partial_success, real_download = self.dl(fname, new_info)

2766

info_dict['__real_download'] = info_dict['__real_download'] or real_download

2767

success = success and partial_success

2768

if merger.available and not self.params.get('allow_unplayable_formats'):

2769

info_dict['__postprocessors'].append(merger)

2770

info_dict['__files_to_merge'] = downloaded

2771

# Even if there were no downloads, it is being merged only now

2772

info_dict['__real_download'] = True

2773

else:

2774

for file in downloaded:

2775

files_to_move[file] = None

2776

else:

2777

# Just a single file

2778

dl_filename = existing_file(full_filename, temp_filename)

2779

if dl_filename is None or dl_filename == temp_filename:

2780

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

2781

# So we should try to resume the download

2782

success, real_download = self.dl(temp_filename, info_dict)

2783

info_dict['__real_download'] = real_download

2784

else:

2785

self.report_file_already_downloaded(dl_filename)

2786

2787

dl_filename = dl_filename or temp_filename

2788

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2789

2790

except network_exceptions as err:

2791

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2792

return

2793

except (OSError, IOError) as err:

2794

raise UnavailableVideoError(err)

2795

except (ContentTooShortError, ) as err:

2796

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2797

return

2798

2799

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

2804

vid = info_dict['id']

2805

2806

if fixup_policy in ('ignore', 'never'):

2807

return

2808

elif fixup_policy == 'warn':

2809

do_fixup = False

2810

elif fixup_policy != 'force':

2811

assert fixup_policy in ('detect_or_warn', None)

2812

if not info_dict.get('__real_download'):

2813

do_fixup = False

2814

2815

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

2824

else:

2825

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

2826

2827

stretched_ratio = info_dict.get('stretched_ratio')

2828

ffmpeg_fixup(

2829

stretched_ratio not in (1, None),

2830

f'Non-uniform pixel ratio {stretched_ratio}',

2831

FFmpegFixupStretchedPP)

2832

2833

ffmpeg_fixup(

2834

(info_dict.get('requested_formats') is None

2835

and info_dict.get('container') == 'm4a_dash'

2836

and info_dict.get('ext') == 'm4a'),

2837

'writing DASH m4a. Only some players support this container',

2838

FFmpegFixupM4aPP)

2839

2840

downloader = (get_suitable_downloader(info_dict, self.params).__name__

2841

if 'protocol' in info_dict else None)

2842

ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',

2843

'malformed AAC bitstream detected', FFmpegFixupM3u8PP)

2844

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)

2845

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

info_dict = self.post_process(dl_filename, info_dict, files_to_move)

2850

except PostProcessingError as err:

2851

self.report_error('Postprocessing: %s' % str(err))

2852

return

2853

try:

2854

for ph in self._post_hooks:

2855

ph(info_dict['filepath'])

2856

except Exception as err:

2857

self.report_error('post hooks: %s' % str(err))

2858

return

2859

must_record_download_archive = True

2860

2861

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2862

self.record_download_archive(info_dict)

2863

max_downloads = self.params.get('max_downloads')

2864

if max_downloads is not None and self._num_downloads >= int(max_downloads):

2865

raise MaxDownloadsReached()

2866

2867

def download(self, url_list):

2868

"""Download a given list of URLs."""

2869

outtmpl = self.outtmpl_dict['default']

2870

if (len(url_list) > 1

2871

and outtmpl != '-'

2872

and '%' not in outtmpl

2873

and self.params.get('max_downloads') != 1):

2874

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2879

res = self.extract_info(

2880

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2881

except UnavailableVideoError:

2882

self.report_error('unable to download video')

2883

except MaxDownloadsReached:

2884

self.to_screen('[info] Maximum number of downloads reached')

2885

raise

2886

except ExistingVideoReached:

2887

self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')

2888

raise

2889

except RejectedVideoReached:

2890

self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')

2891

raise

2892

else:

2893

if self.params.get('dump_single_json', False):

2894

self.post_extract(res)

2895

self.to_stdout(json.dumps(self.sanitize_info(res)))

2896

2897

return self._download_retcode

2898

2899

def download_with_info_file(self, info_filename):

2900

with contextlib.closing(fileinput.FileInput(

2901

[info_filename], mode='r',

2902

openhook=fileinput.hook_encoded('utf-8'))) as f:

2903

# FileInput doesn't have a read method, we can't call json.load

2904

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

2905

try:

2906

self.process_ie_result(info, download=True)

2907

except (DownloadError, EntryNotInPlaylist, ThrottledDownload):

2908

webpage_url = info.get('webpage_url')

2909

if webpage_url is not None:

2910

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2911

return self.download([webpage_url])

2912

else:

2913

raise

2914

return self._download_retcode

2915

2916

@staticmethod

2917

def sanitize_info(info_dict, remove_private_keys=False):

2918

''' Sanitize the infodict for converting to json '''

2919

if info_dict is None:

2920

return info_dict

2921

info_dict.setdefault('epoch', int(time.time()))

2922

remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict

2923

keep_keys = ['_type'], # Always keep this to facilitate load-info-json

2924

if remove_private_keys:

2925

remove_keys |= {

2926

'requested_formats', 'requested_subtitles', 'requested_entries',

2927

'filepath', 'entries', 'original_url', 'playlist_autonumber',

2928

}

2929

empty_values = (None, {}, [], set(), tuple())

2930

reject = lambda k, v: k not in keep_keys and (

2931

k.startswith('_') or k in remove_keys or v in empty_values)

2932

else:

2933

reject = lambda k, v: k in remove_keys

2934

filter_fn = lambda obj: (

2935

list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))

2936

else obj if not isinstance(obj, dict)

2937

else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))

2938

return filter_fn(info_dict)

2939

2940

@staticmethod

2941

def filter_requested_info(info_dict, actually_filter=True):

2942

''' Alias of sanitize_info for backward compatibility '''

2943

return YoutubeDL.sanitize_info(info_dict, actually_filter)

2944

2945

def run_pp(self, pp, infodict):

2946

files_to_delete = []

2947

if '__files_to_move' not in infodict:

2948

infodict['__files_to_move'] = {}

2949

try:

2950

files_to_delete, infodict = pp.run(infodict)

2951

except PostProcessingError as e:

2952

# Must be True and not 'only_download'

2953

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

2959

return infodict

2960

if self.params.get('keepvideo', False):

2961

for f in files_to_delete:

2962

infodict['__files_to_move'].setdefault(f, '')

2963

else:

2964

for old_filename in set(files_to_delete):

2965

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2966

try:

2967

os.remove(encodeFilename(old_filename))

2968

except (IOError, OSError):

2969

self.report_warning('Unable to remove downloaded original file')

2970

if old_filename in infodict['__files_to_move']:

2971

del infodict['__files_to_move'][old_filename]

return infodict

@staticmethod

def post_extract(info_dict):

2976

def actual_post_extract(info_dict):

2977

if info_dict.get('_type') in ('playlist', 'multi_video'):

2978

for video_dict in info_dict.get('entries', {}):

2979

actual_post_extract(video_dict or {})

2980

return

2981

2982

post_extractor = info_dict.get('__post_extractor') or (lambda: {})

2983

extra = post_extractor().items()

2984

info_dict.update(extra)

2985

info_dict.pop('__post_extractor', None)

2986

2987

original_infodict = info_dict.get('__original_infodict') or {}

2988

original_infodict.update(extra)

2989

original_infodict.pop('__post_extractor', None)

2990

2991

actual_post_extract(info_dict or {})

2992

2993

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

2994

info = dict(ie_info)

2995

info['__files_to_move'] = files_to_move or {}

2996

for pp in self._pps[key]:

2997

info = self.run_pp(pp, info)

2998

return info, info.pop('__files_to_move', None)

2999

3000

def post_process(self, filename, ie_info, files_to_move=None):

3001

"""Run all the postprocessors on the given file."""

3002

info = dict(ie_info)

3003

info['filepath'] = filename

3004

info['__files_to_move'] = files_to_move or {}

3005

3006

for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:

3007

info = self.run_pp(pp, info)

3008

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3009

del info['__files_to_move']

3010

for pp in self._pps['after_move']:

3011

info = self.run_pp(pp, info)

3012

return info

3013

3014

def _make_archive_id(self, info_dict):

3015

video_id = info_dict.get('id')

3016

if not video_id:

3017

return

3018

# Future-proof against any change in case

3019

# and backwards compatibility with prior versions

3020

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3021

if extractor is None:

3022

url = str_or_none(info_dict.get('url'))

3023

if not url:

3024

return

3025

# Try to find matching extractor for the URL and take its ie_key

3026

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return '%s %s' % (extractor.lower(), video_id)

3033

3034

def in_download_archive(self, info_dict):

3035

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

3040

if not vid_id:

3041

return False # Incomplete video information

3042

3043

return vid_id in self.archive

3044

3045

def record_download_archive(self, info_dict):

3046

fn = self.params.get('download_archive')

3047

if fn is None:

3048

return

3049

vid_id = self._make_archive_id(info_dict)

3050

assert vid_id

3051

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3052

archive_file.write(vid_id + '\n')

3053

self.archive.add(vid_id)

3054

3055

@staticmethod

3056

def format_resolution(format, default='unknown'):

3057

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3058

return 'audio only'

3059

if format.get('resolution') is not None:

3060

return format['resolution']

3061

if format.get('width') and format.get('height'):

3062

res = '%dx%d' % (format['width'], format['height'])

3063

elif format.get('height'):

3064

res = '%sp' % format['height']

3065

elif format.get('width'):

3066

res = '%dx?' % format['width']

3067

else:

3068

res = default

3069

if format.get('vcodec') == 'none' and format.get('acodec') == 'none':

res += ' (images)'

return res

def _format_note(self, fdict):

3074

res = ''

3075

if fdict.get('ext') in ['f4f', 'f4m']:

3076

res += '(unsupported) '

3077

if fdict.get('language'):

3078

if res:

3079

res += ' '

3080

res += '[%s] ' % fdict['language']

3081

if fdict.get('format_note') is not None:

3082

res += fdict['format_note'] + ' '

3083

if fdict.get('tbr') is not None:

3084

res += '%4dk ' % fdict['tbr']

3085

if fdict.get('container') is not None:

3086

if res:

3087

res += ', '

3088

res += '%s container' % fdict['container']

3089

if (fdict.get('vcodec') is not None

3090

and fdict.get('vcodec') != 'none'):

3091

if res:

3092

res += ', '

3093

res += fdict['vcodec']

3094

if fdict.get('vbr') is not None:

3095

res += '@'

3096

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3097

res += 'video@'

3098

if fdict.get('vbr') is not None:

3099

res += '%4dk' % fdict['vbr']

3100

if fdict.get('fps') is not None:

3101

if res:

3102

res += ', '

3103

res += '%sfps' % fdict['fps']

3104

if fdict.get('acodec') is not None:

3105

if res:

3106

res += ', '

3107

if fdict['acodec'] == 'none':

3108

res += 'video only'

3109

else:

3110

res += '%-5s' % fdict['acodec']

3111

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3116

res += '@%3dk' % fdict['abr']

3117

if fdict.get('asr') is not None:

3118

res += ' (%5dHz)' % fdict['asr']

3119

if fdict.get('filesize') is not None:

3120

if res:

3121

res += ', '

3122

res += format_bytes(fdict['filesize'])

3123

elif fdict.get('filesize_approx') is not None:

3124

if res:

3125

res += ', '

3126

res += '~' + format_bytes(fdict['filesize_approx'])

3127

return res

3128

3129

def list_formats(self, info_dict):

3130

formats = info_dict.get('formats', [info_dict])

3131

new_format = (

3132

'list-formats' not in self.params.get('compat_opts', [])

3133

and self.params.get('listformats_table', True) is not False)

if new_format:

table = [

[

format_field(f, 'format_id'),

3138

format_field(f, 'ext'),

3139

self.format_resolution(f),

3140

format_field(f, 'fps', '%d'),

3141

'|',

3142

format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),

3143

format_field(f, 'tbr', '%4dk'),

3144

shorten_protocol_name(f.get('protocol', '').replace("native", "n")),

3145

'|',

3146

format_field(f, 'vcodec', default='unknown').replace('none', ''),

3147

format_field(f, 'vbr', '%4dk'),

3148

format_field(f, 'acodec', default='unknown').replace('none', ''),

3149

format_field(f, 'abr', '%3dk'),

3150

format_field(f, 'asr', '%5dHz'),

3151

', '.join(filter(None, (

3152

'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',

3153

format_field(f, 'language', '[%s]'),

3154

format_field(f, 'format_note'),

3155

format_field(f, 'container', ignore=(None, f.get('ext'))),

3156

))),

3157

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3158

header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',

3159

'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']

else:

table = [

[

format_field(f, 'format_id'),

3164

format_field(f, 'ext'),

3165

self.format_resolution(f),

3166

self._format_note(f)]

3167

for f in formats

3168

if f.get('preference') is None or f['preference'] >= -1000]

3169

header_line = ['format code', 'extension', 'resolution', 'note']

3170

3171

self.to_screen(

3172

'[info] Available formats for %s:' % info_dict['id'])

3173

self.to_stdout(render_table(

3174

header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))

3175

3176

def list_thumbnails(self, info_dict):

3177

thumbnails = list(info_dict.get('thumbnails'))

3178

if not thumbnails:

3179

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

3184

self.to_stdout(render_table(

3185

['ID', 'width', 'height', 'URL'],

3186

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

3187

3188

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3189

if not subtitles:

3190

self.to_screen('%s has no %s' % (video_id, name))

3191

return

3192

self.to_screen(

3193

'Available %s for %s:' % (name, video_id))

3194

3195

def _row(lang, formats):

3196

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3197

if len(set(names)) == 1:

3198

names = [] if names[0] == 'unknown' else names[:1]

3199

return [lang, ', '.join(names), ', '.join(exts)]

3200

3201

self.to_stdout(render_table(

3202

['Language', 'Name', 'Formats'],

3203

[_row(lang, formats) for lang, formats in subtitles.items()],

3204

hideEmpty=True))

3205

3206

def urlopen(self, req):

3207

""" Start an HTTP download """

3208

if isinstance(req, compat_basestring):

3209

req = sanitized_Request(req)

3210

return self._opener.open(req, timeout=self._socket_timeout)

3211

3212

def print_debug_header(self):

3213

if not self.params.get('verbose'):

3214

return

3215

3216

stdout_encoding = getattr(

3217

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

3218

encoding_str = (

3219

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

3220

locale.getpreferredencoding(),

3221

sys.getfilesystemencoding(),

3222

stdout_encoding,

3223

self.get_encoding()))

3224

write_string(encoding_str, encoding=None)

3225

3226

source = detect_variant()

3227

self._write_string('[debug] yt-dlp version %s%s\n' % (__version__, '' if source == 'unknown' else f' ({source})'))

3228

if _LAZY_LOADER:

3229

self._write_string('[debug] Lazy loading extractors enabled\n')

3230

if plugin_extractors or plugin_postprocessors:

3231

self._write_string('[debug] Plugins: %s\n' % [

3232

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3233

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3234

if self.params.get('compat_opts'):

3235

self._write_string(

3236

'[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))

3237

try:

3238

sp = subprocess.Popen(

3239

['git', 'rev-parse', '--short', 'HEAD'],

3240

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3241

cwd=os.path.dirname(os.path.abspath(__file__)))

3242

out, err = process_communicate_or_kill(sp)

3243

out = out.decode().strip()

3244

if re.match('[0-9a-f]+', out):

3245

self._write_string('[debug] Git HEAD: %s\n' % out)

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

3253

impl_name = platform.python_implementation()

3254

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3255

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3256

return impl_name

3257

3258

self._write_string('[debug] Python version %s (%s %s) - %s\n' % (

3259

platform.python_version(),

3260

python_implementation(),

3261

platform.architecture()[0],

3262

platform_name()))

3263

3264

exe_versions = FFmpegPostProcessor.get_versions(self)

3265

exe_versions['rtmpdump'] = rtmpdump_version()

3266

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3267

exe_str = ', '.join(

3268

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3269

) or 'none'

3270

self._write_string('[debug] exe versions: %s\n' % exe_str)

3271

3272

from .downloader.websocket import has_websockets

3273

from .postprocessor.embedthumbnail import has_mutagen

3274

from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE

3275

3276

lib_str = ', '.join(sorted(filter(None, (

3277

compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],

3278

has_websockets and 'websockets',

3279

has_mutagen and 'mutagen',

3280

SQLITE_AVAILABLE and 'sqlite',

3281

KEYRING_AVAILABLE and 'keyring',

3282

)))) or 'none'

3283

self._write_string('[debug] Optional libraries: %s\n' % lib_str)

3284

3285

proxy_map = {}

3286

for handler in self._opener.handlers:

3287

if hasattr(handler, 'proxies'):

3288

proxy_map.update(handler.proxies)

3289

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

3290

3291

if self.params.get('call_home', False):

3292

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3293

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

3294

return

3295

latest_version = self.urlopen(

3296

'https://yt-dl.org/latest/version').read().decode('utf-8')

3297

if version_tuple(latest_version) > version_tuple(__version__):

3298

self.report_warning(

3299

'You are using an outdated version (newest version: %s)! '

3300

'See https://yt-dl.org/update if you need help updating.' %

3301

latest_version)

3302

3303

def _setup_opener(self):

3304

timeout_val = self.params.get('socket_timeout')

3305

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

3306

3307

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3308

opts_cookiefile = self.params.get('cookiefile')

3309

opts_proxy = self.params.get('proxy')

3310

3311

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3312

3313

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3314

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3319

else:

3320

proxies = compat_urllib_request.getproxies()

3321

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3322

if 'http' in proxies and 'https' not in proxies:

3323

proxies['https'] = proxies['http']

3324

proxy_handler = PerRequestProxyHandler(proxies)

3325

3326

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3327

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3328

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3329

redirect_handler = YoutubeDLRedirectHandler()

3330

data_handler = compat_urllib_request_DataHandler()

3331

3332

# When passing our own FileHandler instance, build_opener won't add the

3333

# default FileHandler and allows us to disable the file protocol, which

3334

# can be used for malicious purposes (see

3335

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3336

file_handler = compat_urllib_request.FileHandler()

3337

3338

def file_open(*args, **kwargs):

3339

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3340

file_handler.file_open = file_open

3341

3342

opener = compat_urllib_request.build_opener(

3343

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3344

3345

# Delete the default user-agent header, which would otherwise apply in

3346

# cases where our custom HTTP handler doesn't come into play

3347

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3348

opener.addheaders = []

3349

self._opener = opener

3350

3351

def encode(self, s):

3352

if isinstance(s, bytes):

3353

return s # Already encoded

3354

3355

try:

3356

return s.encode(self.get_encoding())

3357

except UnicodeEncodeError as err:

3358

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3359

raise

3360

3361

def get_encoding(self):

3362

encoding = self.params.get('encoding')

3363

if encoding is None:

3364

encoding = preferredencoding()

3365

return encoding

3366

3367

def _write_info_json(self, label, ie_result, infofn):

3368

''' Write infojson and returns True = written, False = skip, None = error '''

3369

if not self.params.get('writeinfojson'):

3370

return False

3371

elif not infofn:

3372

self.write_debug(f'Skipping writing {label} infojson')

3373

return False

3374

elif not self._ensure_dir_exists(infofn):

3375

return None

3376

elif not self.params.get('overwrites', True) and os.path.exists(infofn):

3377

self.to_screen(f'[info] {label.title()} metadata is already present')

3378

else:

3379

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3380

try:

3381

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3382

except (OSError, IOError):

3383

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

return None

return True

def _write_description(self, label, ie_result, descfn):

3388

''' Write description and returns True = written, False = skip, None = error '''

3389

if not self.params.get('writedescription'):

3390

return False

3391

elif not descfn:

3392

self.write_debug(f'Skipping writing {label} description')

3393

return False

3394

elif not self._ensure_dir_exists(descfn):

3395

return None

3396

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3397

self.to_screen(f'[info] {label.title()} description is already present')

3398

elif ie_result.get('description') is None:

3399

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3404

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3405

descfile.write(ie_result['description'])

3406

except (OSError, IOError):

3407

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3412

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3413

ret = []

3414

subtitles = info_dict.get('requested_subtitles')

3415

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3416

# subtitles download errors are already managed as troubles in relevant IE

3417

# that way it will silently go on when used with unsupporting IE

3418

return ret

3419

3420

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3421

if not sub_filename_base:

3422

self.to_screen('[info] Skipping writing video subtitles')

3423

return ret

3424

for sub_lang, sub_info in subtitles.items():

3425

sub_format = sub_info['ext']

3426

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3427

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3428

if not self.params.get('overwrites', True) and os.path.exists(sub_filename):

3429

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3430

sub_info['filepath'] = sub_filename

3431

ret.append((sub_filename, sub_filename_final))

3432

continue

3433

3434

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3435

if sub_info.get('data') is not None:

3436

try:

3437

# Use newline='' to prevent conversion of newline characters

3438

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3439

with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3440

subfile.write(sub_info['data'])

3441

sub_info['filepath'] = sub_filename

3442

ret.append((sub_filename, sub_filename_final))

3443

continue

3444

except (OSError, IOError):

3445

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3450

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3451

self.dl(sub_filename, sub_copy, subtitle=True)

3452

sub_info['filepath'] = sub_filename

3453

ret.append((sub_filename, sub_filename_final))

3454

except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3455

self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')

continue

return ret

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3460

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3461

write_all = self.params.get('write_all_thumbnails', False)

3462

thumbnails, ret = [], []

3463

if write_all or self.params.get('writethumbnail', False):

3464

thumbnails = info_dict.get('thumbnails') or []

3465

multiple = write_all and len(thumbnails) > 1

3466

3467

if thumb_filename_base is None:

3468

thumb_filename_base = filename

3469

if thumbnails and not thumb_filename_base:

3470

self.write_debug(f'Skipping writing {label} thumbnail')

3471

return ret

3472

3473

for t in thumbnails[::-1]:

3474

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3475

thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')

3476

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3477

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3478

3479

if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):

3480

ret.append((thumb_filename, thumb_filename_final))

3481

t['filepath'] = thumb_filename

3482

self.to_screen(f'[info] {thumb_display_id.title()} is already present')

3483

else:

3484

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3485

try:

3486

uf = self.urlopen(t['url'])

3487

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3488

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3489

shutil.copyfileobj(uf, thumbf)

3490

ret.append((thumb_filename, thumb_filename_final))

3491

t['filepath'] = thumb_filename

3492

except network_exceptions as err:

3493

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3494

if ret and not write_all:

3495

break

3496

return ret