jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import functools
	13	import io
	14	import itertools
	15	import json
	16	import locale
	17	import operator
	18	import os
	19	import platform
	20	import re
	21	import shutil
	22	import subprocess
	23	import sys
	24	import tempfile
	25	import time
	26	import tokenize
	27	import traceback
	28	import random
	29	import unicodedata
	30
	31	from enum import Enum
	32	from string import ascii_letters
	33
	34	from .compat import (
	35	compat_basestring,
	36	compat_get_terminal_size,
	37	compat_kwargs,
	38	compat_numeric_types,
	39	compat_os_name,
	40	compat_pycrypto_AES,
	41	compat_shlex_quote,
	42	compat_str,
	43	compat_tokenize_tokenize,
	44	compat_urllib_error,
	45	compat_urllib_request,
	46	compat_urllib_request_DataHandler,
	47	windows_enable_vt_mode,
	48	)
	49	from .cookies import load_cookies
	50	from .utils import (
	51	age_restricted,
	52	args_to_str,
	53	ContentTooShortError,
	54	date_from_str,
	55	DateRange,
	56	DEFAULT_OUTTMPL,
	57	determine_ext,
	58	determine_protocol,
	59	DownloadCancelled,
	60	DownloadError,
	61	encode_compat_str,
	62	encodeFilename,
	63	EntryNotInPlaylist,
	64	error_to_compat_str,
	65	ExistingVideoReached,
	66	expand_path,
	67	ExtractorError,
	68	float_or_none,
	69	format_bytes,
	70	format_field,
	71	formatSeconds,
	72	GeoRestrictedError,
	73	HEADRequest,
	74	int_or_none,
	75	iri_to_uri,
	76	ISO3166Utils,
	77	join_nonempty,
	78	LazyList,
	79	LINK_TEMPLATES,
	80	locked_file,
	81	make_dir,
	82	make_HTTPS_handler,
	83	MaxDownloadsReached,
	84	network_exceptions,
	85	number_of_digits,
	86	orderedSet,
	87	OUTTMPL_TYPES,
	88	PagedList,
	89	parse_filesize,
	90	PerRequestProxyHandler,
	91	platform_name,
	92	Popen,
	93	PostProcessingError,
	94	preferredencoding,
	95	prepend_extension,
	96	register_socks_protocols,
	97	RejectedVideoReached,
	98	render_table,
	99	replace_extension,
	100	SameFileError,
	101	sanitize_filename,
	102	sanitize_path,
	103	sanitize_url,
	104	sanitized_Request,
	105	std_headers,
	106	STR_FORMAT_RE_TMPL,
	107	STR_FORMAT_TYPES,
	108	str_or_none,
	109	strftime_or_none,
	110	subtitles_filename,
	111	supports_terminal_sequences,
	112	ThrottledDownload,
	113	to_high_limit_path,
	114	traverse_obj,
	115	try_get,
	116	UnavailableVideoError,
	117	url_basename,
	118	variadic,
	119	version_tuple,
	120	write_json_file,
	121	write_string,
	122	YoutubeDLCookieProcessor,
	123	YoutubeDLHandler,
	124	YoutubeDLRedirectHandler,
	125	)
	126	from .cache import Cache
	127	from .minicurses import format_text
	128	from .extractor import (
	129	gen_extractor_classes,
	130	get_info_extractor,
	131	_LAZY_LOADER,
	132	_PLUGIN_CLASSES as plugin_extractors
	133	)
	134	from .extractor.openload import PhantomJSwrapper
	135	from .downloader import (
	136	FFmpegFD,
	137	get_suitable_downloader,
	138	shorten_protocol_name
	139	)
	140	from .downloader.rtmp import rtmpdump_version
	141	from .postprocessor import (
	142	get_postprocessor,
	143	EmbedThumbnailPP,
	144	FFmpegFixupDurationPP,
	145	FFmpegFixupM3u8PP,
	146	FFmpegFixupM4aPP,
	147	FFmpegFixupStretchedPP,
	148	FFmpegFixupTimestampPP,
	149	FFmpegMergerPP,
	150	FFmpegPostProcessor,
	151	MoveFilesAfterDownloadPP,
	152	_PLUGIN_CLASSES as plugin_postprocessors
	153	)
	154	from .update import detect_variant
	155	from .version import __version__
	156
	157	if compat_os_name == 'nt':
	158	import ctypes
	159
	160
	161	class YoutubeDL(object):
	162	"""YoutubeDL class.
	163
	164	YoutubeDL objects are the ones responsible of downloading the
	165	actual video file and writing it to disk if the user has requested
	166	it, among some other tasks. In most cases there should be one per
	167	program. As, given a video URL, the downloader doesn't know how to
	168	extract all the needed information, task that InfoExtractors do, it
	169	has to pass the URL to one of them.
	170
	171	For this, YoutubeDL objects have a method that allows
	172	InfoExtractors to be registered in a given order. When it is passed
	173	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	174	finds that reports being able to handle it. The InfoExtractor extracts
	175	all the information about the video or videos the URL refers to, and
	176	YoutubeDL process the extracted information, possibly using a File
	177	Downloader to download the video.
	178
	179	YoutubeDL objects accept a lot of parameters. In order not to saturate
	180	the object constructor with arguments, it receives a dictionary of
	181	options instead. These options are available through the params
	182	attribute for the InfoExtractors to use. The YoutubeDL also
	183	registers itself as the downloader in charge for the InfoExtractors
	184	that are added to it, so this is a "mutual registration".
	185
	186	Available options:
	187
	188	username: Username for authentication purposes.
	189	password: Password for authentication purposes.
	190	videopassword: Password for accessing a video.
	191	ap_mso: Adobe Pass multiple-system operator identifier.
	192	ap_username: Multiple-system operator account username.
	193	ap_password: Multiple-system operator account password.
	194	usenetrc: Use netrc for authentication instead.
	195	verbose: Print additional info to stdout.
	196	quiet: Do not print messages to stdout.
	197	no_warnings: Do not print out anything for warnings.
	198	forceprint: A list of templates to force print
	199	forceurl: Force printing final URL. (Deprecated)
	200	forcetitle: Force printing title. (Deprecated)
	201	forceid: Force printing ID. (Deprecated)
	202	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	203	forcedescription: Force printing description. (Deprecated)
	204	forcefilename: Force printing final filename. (Deprecated)
	205	forceduration: Force printing duration. (Deprecated)
	206	forcejson: Force printing info_dict as JSON.
	207	dump_single_json: Force printing the info_dict of the whole playlist
	208	(or video) as a single JSON line.
	209	force_write_download_archive: Force writing download archive regardless
	210	of 'skip_download' or 'simulate'.
	211	simulate: Do not download the video files. If unset (or None),
	212	simulate only if listsubtitles, listformats or list_thumbnails is used
	213	format: Video format code. see "FORMAT SELECTION" for more details.
	214	You can also pass a function. The function takes 'ctx' as
	215	argument and returns the formats to download.
	216	See "build_format_selector" for an implementation
	217	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	218	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	219	extracting metadata even if the video is not actually
	220	available for download (experimental)
	221	format_sort: A list of fields by which to sort the video formats.
	222	See "Sorting Formats" for more details.
	223	format_sort_force: Force the given format_sort. see "Sorting Formats"
	224	for more details.
	225	allow_multiple_video_streams: Allow multiple video streams to be merged
	226	into a single file
	227	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	228	into a single file
	229	check_formats Whether to test if the formats are downloadable.
	230	Can be True (check all), False (check none),
	231	'selected' (check selected formats),
	232	or None (check only if requested by extractor)
	233	paths: Dictionary of output paths. The allowed keys are 'home'
	234	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	235	outtmpl: Dictionary of templates for output names. Allowed keys
	236	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	237	For compatibility with youtube-dl, a single string can also be used
	238	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	239	restrictfilenames: Do not allow "&" and spaces in file names
	240	trim_file_name: Limit length of filename (extension excluded)
	241	windowsfilenames: Force the filenames to be windows compatible
	242	ignoreerrors: Do not stop on download/postprocessing errors.
	243	Can be 'only_download' to ignore only download errors.
	244	Default is 'only_download' for CLI, but False for API
	245	skip_playlist_after_errors: Number of allowed failures until the rest of
	246	the playlist is skipped
	247	force_generic_extractor: Force downloader to use the generic extractor
	248	overwrites: Overwrite all video and metadata files if True,
	249	overwrite only non-video files if None
	250	and don't overwrite any file if False
	251	For compatibility with youtube-dl,
	252	"nooverwrites" may also be used instead
	253	playliststart: Playlist item to start at.
	254	playlistend: Playlist item to end at.
	255	playlist_items: Specific indices of playlist to download.
	256	playlistreverse: Download playlist items in reverse order.
	257	playlistrandom: Download playlist items in random order.
	258	matchtitle: Download only matching titles.
	259	rejecttitle: Reject downloads for matching titles.
	260	logger: Log messages to a logging.Logger instance.
	261	logtostderr: Log messages to stderr instead of stdout.
	262	consoletitle: Display progress in console window's titlebar.
	263	writedescription: Write the video description to a .description file
	264	writeinfojson: Write the video description to a .info.json file
	265	clean_infojson: Remove private fields from the infojson
	266	getcomments: Extract video comments. This will not be written to disk
	267	unless writeinfojson is also given
	268	writeannotations: Write the video annotations to a .annotations.xml file
	269	writethumbnail: Write the thumbnail image to a file
	270	allow_playlist_files: Whether to write playlists' description, infojson etc
	271	also to disk when using the 'write*' options
	272	write_all_thumbnails: Write all thumbnail formats to files
	273	writelink: Write an internet shortcut file, depending on the
	274	current platform (.url/.webloc/.desktop)
	275	writeurllink: Write a Windows internet shortcut file (.url)
	276	writewebloclink: Write a macOS internet shortcut file (.webloc)
	277	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	278	writesubtitles: Write the video subtitles to a file
	279	writeautomaticsub: Write the automatically generated subtitles to a file
	280	allsubtitles: Deprecated - Use subtitleslangs = ['all']
	281	Downloads all the subtitles of the video
	282	(requires writesubtitles or writeautomaticsub)
	283	listsubtitles: Lists all available subtitles for the video
	284	subtitlesformat: The format code for subtitles
	285	subtitleslangs: List of languages of the subtitles to download (can be regex).
	286	The list may contain "all" to refer to all the available
	287	subtitles. The language can be prefixed with a "-" to
	288	exclude it from the requested languages. Eg: ['all', '-live_chat']
	289	keepvideo: Keep the video file after post-processing
	290	daterange: A DateRange object, download only if the upload_date is in the range.
	291	skip_download: Skip the actual download of the video file
	292	cachedir: Location of the cache files in the filesystem.
	293	False to disable filesystem cache.
	294	noplaylist: Download single video instead of a playlist if in doubt.
	295	age_limit: An integer representing the user's age in years.
	296	Unsuitable videos for the given age are skipped.
	297	min_views: An integer representing the minimum view count the video
	298	must have in order to not be skipped.
	299	Videos without view count information are always
	300	downloaded. None for no limit.
	301	max_views: An integer representing the maximum view count.
	302	Videos that are more popular than that are not
	303	downloaded.
	304	Videos without view count information are always
	305	downloaded. None for no limit.
	306	download_archive: File name of a file where all downloads are recorded.
	307	Videos already present in the file are not downloaded
	308	again.
	309	break_on_existing: Stop the download process after attempting to download a
	310	file that is in the archive.
	311	break_on_reject: Stop the download process when encountering a video that
	312	has been filtered out.
	313	cookiefile: File name where cookies should be read from and dumped to
	314	cookiesfrombrowser: A tuple containing the name of the browser and the profile
	315	name/path from where cookies are loaded.
	316	Eg: ('chrome', ) or ('vivaldi', 'default')
	317	nocheckcertificate:Do not verify SSL certificates
	318	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	319	At the moment, this is only supported by YouTube.
	320	proxy: URL of the proxy server to use
	321	geo_verification_proxy: URL of the proxy to use for IP address verification
	322	on geo-restricted sites.
	323	socket_timeout: Time to wait for unresponsive hosts, in seconds
	324	bidi_workaround: Work around buggy terminals without bidirectional text
	325	support, using fridibi
	326	debug_printtraffic:Print out sent and received HTTP traffic
	327	include_ads: Download ads as well
	328	default_search: Prepend this string if an input url is not valid.
	329	'auto' for elaborate guessing
	330	encoding: Use this encoding instead of the system-specified.
	331	extract_flat: Do not resolve URLs, return the immediate result.
	332	Pass in 'in_playlist' to only show this behavior for
	333	playlist items.
	334	postprocessors: A list of dictionaries, each with an entry
	335	* key: The name of the postprocessor. See
	336	yt_dlp/postprocessor/__init__.py for a list.
	337	* when: When to run the postprocessor. Can be one of
	338	pre_process\|before_dl\|post_process\|after_move.
	339	Assumed to be 'post_process' if not given
	340	post_hooks: Deprecated - Register a custom postprocessor instead
	341	A list of functions that get called as the final step
	342	for each video file, after all postprocessors have been
	343	called. The filename will be passed as the only argument.
	344	progress_hooks: A list of functions that get called on download
	345	progress, with a dictionary with the entries
	346	* status: One of "downloading", "error", or "finished".
	347	Check this first and ignore unknown values.
	348	* info_dict: The extracted info_dict
	349
	350	If status is one of "downloading", or "finished", the
	351	following properties may also be present:
	352	* filename: The final filename (always present)
	353	* tmpfilename: The filename we're currently writing to
	354	* downloaded_bytes: Bytes on disk
	355	* total_bytes: Size of the whole file, None if unknown
	356	* total_bytes_estimate: Guess of the eventual file size,
	357	None if unavailable.
	358	* elapsed: The number of seconds since download started.
	359	* eta: The estimated time in seconds, None if unknown
	360	* speed: The download speed in bytes/second, None if
	361	unknown
	362	* fragment_index: The counter of the currently
	363	downloaded video fragment.
	364	* fragment_count: The number of fragments (= individual
	365	files that will be merged)
	366
	367	Progress hooks are guaranteed to be called at least once
	368	(with status "finished") if the download is successful.
	369	postprocessor_hooks: A list of functions that get called on postprocessing
	370	progress, with a dictionary with the entries
	371	* status: One of "started", "processing", or "finished".
	372	Check this first and ignore unknown values.
	373	* postprocessor: Name of the postprocessor
	374	* info_dict: The extracted info_dict
	375
	376	Progress hooks are guaranteed to be called at least twice
	377	(with status "started" and "finished") if the processing is successful.
	378	merge_output_format: Extension to use when merging formats.
	379	final_ext: Expected final extension; used to detect when the file was
	380	already downloaded and converted
	381	fixup: Automatically correct known faults of the file.
	382	One of:
	383	- "never": do nothing
	384	- "warn": only emit a warning
	385	- "detect_or_warn": check whether we can do anything
	386	about it, warn otherwise (default)
	387	source_address: Client-side IP address to bind to.
	388	call_home: Boolean, true iff we are allowed to contact the
	389	yt-dlp servers for debugging. (BROKEN)
	390	sleep_interval_requests: Number of seconds to sleep between requests
	391	during extraction
	392	sleep_interval: Number of seconds to sleep before each download when
	393	used alone or a lower bound of a range for randomized
	394	sleep before each download (minimum possible number
	395	of seconds to sleep) when used along with
	396	max_sleep_interval.
	397	max_sleep_interval:Upper bound of a range for randomized sleep before each
	398	download (maximum possible number of seconds to sleep).
	399	Must only be used along with sleep_interval.
	400	Actual sleep time will be a random float from range
	401	[sleep_interval; max_sleep_interval].
	402	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	403	listformats: Print an overview of available video formats and exit.
	404	list_thumbnails: Print a table of all thumbnails and exit.
	405	match_filter: A function that gets called with the info_dict of
	406	every video.
	407	If it returns a message, the video is ignored.
	408	If it returns None, the video is downloaded.
	409	match_filter_func in utils.py is one example for this.
	410	no_color: Do not emit color codes in output.
	411	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	412	HTTP header
	413	geo_bypass_country:
	414	Two-letter ISO 3166-2 country code that will be used for
	415	explicit geographic restriction bypassing via faking
	416	X-Forwarded-For HTTP header
	417	geo_bypass_ip_block:
	418	IP range in CIDR notation that will be used similarly to
	419	geo_bypass_country
	420
	421	The following options determine which downloader is picked:
	422	external_downloader: A dictionary of protocol keys and the executable of the
	423	external downloader to use for it. The allowed protocols
	424	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	425	Set the value to 'native' to use the native downloader
	426	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	427	or {'m3u8': 'ffmpeg'} instead.
	428	Use the native HLS downloader instead of ffmpeg/avconv
	429	if True, otherwise use ffmpeg/avconv if False, otherwise
	430	use downloader suggested by extractor if None.
	431	compat_opts: Compatibility options. See "Differences in default behavior".
	432	The following options do not work when used through the API:
	433	filename, abort-on-error, multistreams, no-live-chat, format-sort
	434	no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
	435	Refer __init__.py for their implementation
	436	progress_template: Dictionary of templates for progress outputs.
	437	Allowed keys are 'download', 'postprocess',
	438	'download-title' (console title) and 'postprocess-title'.
	439	The template is mapped on a dictionary with keys 'progress' and 'info'
	440
	441	The following parameters are not used by YoutubeDL itself, they are used by
	442	the downloader (see yt_dlp/downloader/common.py):
	443	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	444	max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
	445	noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
	446	external_downloader_args, concurrent_fragment_downloads.
	447
	448	The following options are used by the post processors:
	449	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	450	otherwise prefer ffmpeg. (avconv support is deprecated)
	451	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	452	to the binary or its containing directory.
	453	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	454	and a list of additional command-line arguments for the
	455	postprocessor/executable. The dict can also have "PP+EXE" keys
	456	which are used when the given exe is used by the given PP.
	457	Use 'default' as the name for arguments to passed to all PP
	458	For compatibility with youtube-dl, a single list of args
	459	can also be used
	460
	461	The following options are used by the extractors:
	462	extractor_retries: Number of times to retry for known errors
	463	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	464	hls_split_discontinuity: Split HLS playlists to different formats at
	465	discontinuities such as ad breaks (default: False)
	466	extractor_args: A dictionary of arguments to be passed to the extractors.
	467	See "EXTRACTOR ARGUMENTS" for details.
	468	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	469	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	470	If True (default), DASH manifests and related
	471	data will be downloaded and processed by extractor.
	472	You can reduce network I/O by disabling it if you don't
	473	care about DASH. (only for youtube)
	474	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	475	If True (default), HLS manifests and related
	476	data will be downloaded and processed by extractor.
	477	You can reduce network I/O by disabling it if you don't
	478	care about HLS. (only for youtube)
	479	"""
	480
	481	_NUMERIC_FIELDS = set((
	482	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	483	'timestamp', 'release_timestamp',
	484	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	485	'average_rating', 'comment_count', 'age_limit',
	486	'start_time', 'end_time',
	487	'chapter_number', 'season_number', 'episode_number',
	488	'track_number', 'disc_number', 'release_year',
	489	))
	490
	491	_format_selection_exts = {
	492	'audio': {'m4a', 'mp3', 'ogg', 'aac'},
	493	'video': {'mp4', 'flv', 'webm', '3gp'},
	494	'storyboards': {'mhtml'},
	495	}
	496
	497	params = None
	498	_ies = {}
	499	_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	500	_printed_messages = set()

1

#!/usr/bin/env python3

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import functools

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import random

import unicodedata

from enum import Enum

32

from string import ascii_letters

33

34

from .compat import (

35

compat_basestring,

36

compat_get_terminal_size,

37

compat_kwargs,

38

compat_numeric_types,

compat_os_name,

compat_pycrypto_AES,

compat_shlex_quote,

compat_str,

compat_tokenize_tokenize,

44

compat_urllib_error,

45

compat_urllib_request,

46

compat_urllib_request_DataHandler,

47

windows_enable_vt_mode,

48

)

49

from .cookies import load_cookies

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DownloadCancelled,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

formatSeconds,

GeoRestrictedError,

HEADRequest,

int_or_none,

iri_to_uri,

ISO3166Utils,

join_nonempty,

LazyList,

LINK_TEMPLATES,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

network_exceptions,

number_of_digits,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

Popen,

PostProcessingError,

preferredencoding,

prepend_extension,

register_socks_protocols,

97

RejectedVideoReached,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

STR_FORMAT_RE_TMPL,

STR_FORMAT_TYPES,

str_or_none,

strftime_or_none,

subtitles_filename,

supports_terminal_sequences,

ThrottledDownload,

to_high_limit_path,

traverse_obj,

try_get,

UnavailableVideoError,

url_basename,

variadic,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieProcessor,

123

YoutubeDLHandler,

124

YoutubeDLRedirectHandler,

125

)

126

from .cache import Cache

127

from .minicurses import format_text

128

from .extractor import (

129

gen_extractor_classes,

130

get_info_extractor,

131

_LAZY_LOADER,

132

_PLUGIN_CLASSES as plugin_extractors

133

)

134

from .extractor.openload import PhantomJSwrapper

135

from .downloader import (

136

FFmpegFD,

137

get_suitable_downloader,

138

shorten_protocol_name

139

)

140

from .downloader.rtmp import rtmpdump_version

141

from .postprocessor import (

142

get_postprocessor,

143

EmbedThumbnailPP,

144

FFmpegFixupDurationPP,

145

FFmpegFixupM3u8PP,

146

FFmpegFixupM4aPP,

147

FFmpegFixupStretchedPP,

148

FFmpegFixupTimestampPP,

149

FFmpegMergerPP,

150

FFmpegPostProcessor,

151

MoveFilesAfterDownloadPP,

152

_PLUGIN_CLASSES as plugin_postprocessors

153

)

154

from .update import detect_variant

155

from .version import __version__

156

157

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

162

"""YoutubeDL class.

163

164

YoutubeDL objects are the ones responsible of downloading the

165

actual video file and writing it to disk if the user has requested

166

it, among some other tasks. In most cases there should be one per

167

program. As, given a video URL, the downloader doesn't know how to

168

extract all the needed information, task that InfoExtractors do, it

169

has to pass the URL to one of them.

170

171

For this, YoutubeDL objects have a method that allows

172

InfoExtractors to be registered in a given order. When it is passed

173

a URL, the YoutubeDL object handles it to the first InfoExtractor it

174

finds that reports being able to handle it. The InfoExtractor extracts

175

all the information about the video or videos the URL refers to, and

176

YoutubeDL process the extracted information, possibly using a File

177

Downloader to download the video.

178

179

YoutubeDL objects accept a lot of parameters. In order not to saturate

180

the object constructor with arguments, it receives a dictionary of

181

options instead. These options are available through the params

182

attribute for the InfoExtractors to use. The YoutubeDL also

183

registers itself as the downloader in charge for the InfoExtractors

184

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

189

password: Password for authentication purposes.

190

videopassword: Password for accessing a video.

191

ap_mso: Adobe Pass multiple-system operator identifier.

192

ap_username: Multiple-system operator account username.

193

ap_password: Multiple-system operator account password.

194

usenetrc: Use netrc for authentication instead.

195

verbose: Print additional info to stdout.

196

quiet: Do not print messages to stdout.

197

no_warnings: Do not print out anything for warnings.

198

forceprint: A list of templates to force print

199

forceurl: Force printing final URL. (Deprecated)

200

forcetitle: Force printing title. (Deprecated)

201

forceid: Force printing ID. (Deprecated)

202

forcethumbnail: Force printing thumbnail URL. (Deprecated)

203

forcedescription: Force printing description. (Deprecated)

204

forcefilename: Force printing final filename. (Deprecated)

205

forceduration: Force printing duration. (Deprecated)

206

forcejson: Force printing info_dict as JSON.

207

dump_single_json: Force printing the info_dict of the whole playlist

208

(or video) as a single JSON line.

209

force_write_download_archive: Force writing download archive regardless

210

of 'skip_download' or 'simulate'.

211

simulate: Do not download the video files. If unset (or None),

212

simulate only if listsubtitles, listformats or list_thumbnails is used

213

format: Video format code. see "FORMAT SELECTION" for more details.

214

You can also pass a function. The function takes 'ctx' as

215

argument and returns the formats to download.

216

See "build_format_selector" for an implementation

217

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

218

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

219

extracting metadata even if the video is not actually

220

available for download (experimental)

221

format_sort: A list of fields by which to sort the video formats.

222

See "Sorting Formats" for more details.

223

format_sort_force: Force the given format_sort. see "Sorting Formats"

224

for more details.

225

allow_multiple_video_streams: Allow multiple video streams to be merged

226

into a single file

227

allow_multiple_audio_streams: Allow multiple audio streams to be merged

228

into a single file

229

check_formats Whether to test if the formats are downloadable.

230

Can be True (check all), False (check none),

231

'selected' (check selected formats),

232

or None (check only if requested by extractor)

233

paths: Dictionary of output paths. The allowed keys are 'home'

234

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

235

outtmpl: Dictionary of templates for output names. Allowed keys

236

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

237

For compatibility with youtube-dl, a single string can also be used

238

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

239

restrictfilenames: Do not allow "&" and spaces in file names

240

trim_file_name: Limit length of filename (extension excluded)

241

windowsfilenames: Force the filenames to be windows compatible

242

ignoreerrors: Do not stop on download/postprocessing errors.

243

Can be 'only_download' to ignore only download errors.

244

Default is 'only_download' for CLI, but False for API

245

skip_playlist_after_errors: Number of allowed failures until the rest of

246

the playlist is skipped

247

force_generic_extractor: Force downloader to use the generic extractor

248

overwrites: Overwrite all video and metadata files if True,

249

overwrite only non-video files if None

250

and don't overwrite any file if False

251

For compatibility with youtube-dl,

252

"nooverwrites" may also be used instead

253

playliststart: Playlist item to start at.

254

playlistend: Playlist item to end at.

255

playlist_items: Specific indices of playlist to download.

256

playlistreverse: Download playlist items in reverse order.

257

playlistrandom: Download playlist items in random order.

258

matchtitle: Download only matching titles.

259

rejecttitle: Reject downloads for matching titles.

260

logger: Log messages to a logging.Logger instance.

261

logtostderr: Log messages to stderr instead of stdout.

262

consoletitle: Display progress in console window's titlebar.

263

writedescription: Write the video description to a .description file

264

writeinfojson: Write the video description to a .info.json file

265

clean_infojson: Remove private fields from the infojson

266

getcomments: Extract video comments. This will not be written to disk

267

unless writeinfojson is also given

268

writeannotations: Write the video annotations to a .annotations.xml file

269

writethumbnail: Write the thumbnail image to a file

270

allow_playlist_files: Whether to write playlists' description, infojson etc

271

also to disk when using the 'write*' options

272

write_all_thumbnails: Write all thumbnail formats to files

273

writelink: Write an internet shortcut file, depending on the

274

current platform (.url/.webloc/.desktop)

275

writeurllink: Write a Windows internet shortcut file (.url)

276

writewebloclink: Write a macOS internet shortcut file (.webloc)

277

writedesktoplink: Write a Linux internet shortcut file (.desktop)

278

writesubtitles: Write the video subtitles to a file

279

writeautomaticsub: Write the automatically generated subtitles to a file

280

allsubtitles: Deprecated - Use subtitleslangs = ['all']

281

Downloads all the subtitles of the video

282

(requires writesubtitles or writeautomaticsub)

283

listsubtitles: Lists all available subtitles for the video

284

subtitlesformat: The format code for subtitles

285

subtitleslangs: List of languages of the subtitles to download (can be regex).

286

The list may contain "all" to refer to all the available

287

subtitles. The language can be prefixed with a "-" to

288

exclude it from the requested languages. Eg: ['all', '-live_chat']

289

keepvideo: Keep the video file after post-processing

290

daterange: A DateRange object, download only if the upload_date is in the range.

291

skip_download: Skip the actual download of the video file

292

cachedir: Location of the cache files in the filesystem.

293

False to disable filesystem cache.

294

noplaylist: Download single video instead of a playlist if in doubt.

295

age_limit: An integer representing the user's age in years.

296

Unsuitable videos for the given age are skipped.

297

min_views: An integer representing the minimum view count the video

298

must have in order to not be skipped.

299

Videos without view count information are always

300

downloaded. None for no limit.

301

max_views: An integer representing the maximum view count.

302

Videos that are more popular than that are not

303

downloaded.

304

Videos without view count information are always

305

downloaded. None for no limit.

306

download_archive: File name of a file where all downloads are recorded.

307

Videos already present in the file are not downloaded

308

again.

309

break_on_existing: Stop the download process after attempting to download a

310

file that is in the archive.

311

break_on_reject: Stop the download process when encountering a video that

312

has been filtered out.

313

cookiefile: File name where cookies should be read from and dumped to

314

cookiesfrombrowser: A tuple containing the name of the browser and the profile

315

name/path from where cookies are loaded.

316

Eg: ('chrome', ) or ('vivaldi', 'default')

317

nocheckcertificate:Do not verify SSL certificates

318

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

319

At the moment, this is only supported by YouTube.

320

proxy: URL of the proxy server to use

321

geo_verification_proxy: URL of the proxy to use for IP address verification

322

on geo-restricted sites.

323

socket_timeout: Time to wait for unresponsive hosts, in seconds

324

bidi_workaround: Work around buggy terminals without bidirectional text

325

support, using fridibi

326

debug_printtraffic:Print out sent and received HTTP traffic

327

include_ads: Download ads as well

328

default_search: Prepend this string if an input url is not valid.

329

'auto' for elaborate guessing

330

encoding: Use this encoding instead of the system-specified.

331

extract_flat: Do not resolve URLs, return the immediate result.

332

Pass in 'in_playlist' to only show this behavior for

333

playlist items.

334

postprocessors: A list of dictionaries, each with an entry

335

* key: The name of the postprocessor. See

336

yt_dlp/postprocessor/__init__.py for a list.

337

* when: When to run the postprocessor. Can be one of

338

pre_process|before_dl|post_process|after_move.

339

Assumed to be 'post_process' if not given

340

post_hooks: Deprecated - Register a custom postprocessor instead

341

A list of functions that get called as the final step

342

for each video file, after all postprocessors have been

343

called. The filename will be passed as the only argument.

344

progress_hooks: A list of functions that get called on download

345

progress, with a dictionary with the entries

346

* status: One of "downloading", "error", or "finished".

347

Check this first and ignore unknown values.

348

* info_dict: The extracted info_dict

349

350

If status is one of "downloading", or "finished", the

351

following properties may also be present:

352

* filename: The final filename (always present)

353

* tmpfilename: The filename we're currently writing to

354

* downloaded_bytes: Bytes on disk

355

* total_bytes: Size of the whole file, None if unknown

356

* total_bytes_estimate: Guess of the eventual file size,

357

None if unavailable.

358

* elapsed: The number of seconds since download started.

359

* eta: The estimated time in seconds, None if unknown

360

* speed: The download speed in bytes/second, None if

361

unknown

362

* fragment_index: The counter of the currently

363

downloaded video fragment.

364

* fragment_count: The number of fragments (= individual

365

files that will be merged)

366

367

Progress hooks are guaranteed to be called at least once

368

(with status "finished") if the download is successful.

369

postprocessor_hooks: A list of functions that get called on postprocessing

370

progress, with a dictionary with the entries

371

* status: One of "started", "processing", or "finished".

372

Check this first and ignore unknown values.

373

* postprocessor: Name of the postprocessor

374

* info_dict: The extracted info_dict

375

376

Progress hooks are guaranteed to be called at least twice

377

(with status "started" and "finished") if the processing is successful.

378

merge_output_format: Extension to use when merging formats.

379

final_ext: Expected final extension; used to detect when the file was

380

already downloaded and converted

381

fixup: Automatically correct known faults of the file.

382

One of:

383

- "never": do nothing

384

- "warn": only emit a warning

385

- "detect_or_warn": check whether we can do anything

386

about it, warn otherwise (default)

387

source_address: Client-side IP address to bind to.

388

call_home: Boolean, true iff we are allowed to contact the

389

yt-dlp servers for debugging. (BROKEN)

390

sleep_interval_requests: Number of seconds to sleep between requests

391

during extraction

392

sleep_interval: Number of seconds to sleep before each download when

393

used alone or a lower bound of a range for randomized

394

sleep before each download (minimum possible number

395

of seconds to sleep) when used along with

396

max_sleep_interval.

397

max_sleep_interval:Upper bound of a range for randomized sleep before each

398

download (maximum possible number of seconds to sleep).

399

Must only be used along with sleep_interval.

400

Actual sleep time will be a random float from range

401

[sleep_interval; max_sleep_interval].

402

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

403

listformats: Print an overview of available video formats and exit.

404

list_thumbnails: Print a table of all thumbnails and exit.

405

match_filter: A function that gets called with the info_dict of

406

every video.

407

If it returns a message, the video is ignored.

408

If it returns None, the video is downloaded.

409

match_filter_func in utils.py is one example for this.

410

no_color: Do not emit color codes in output.

411

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

412

HTTP header

413

geo_bypass_country:

414

Two-letter ISO 3166-2 country code that will be used for

415

explicit geographic restriction bypassing via faking

416

X-Forwarded-For HTTP header

417

geo_bypass_ip_block:

418

IP range in CIDR notation that will be used similarly to

419

geo_bypass_country

420

421

The following options determine which downloader is picked:

422

external_downloader: A dictionary of protocol keys and the executable of the

423

external downloader to use for it. The allowed protocols

424

425

Set the value to 'native' to use the native downloader

426

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

427

or {'m3u8': 'ffmpeg'} instead.

428

Use the native HLS downloader instead of ffmpeg/avconv

429

if True, otherwise use ffmpeg/avconv if False, otherwise

430

use downloader suggested by extractor if None.

431

compat_opts: Compatibility options. See "Differences in default behavior".

432

The following options do not work when used through the API:

433

filename, abort-on-error, multistreams, no-live-chat, format-sort

434

no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.

435

Refer __init__.py for their implementation

436

progress_template: Dictionary of templates for progress outputs.

437

Allowed keys are 'download', 'postprocess',

438

'download-title' (console title) and 'postprocess-title'.

439

The template is mapped on a dictionary with keys 'progress' and 'info'

440

441

The following parameters are not used by YoutubeDL itself, they are used by

442

the downloader (see yt_dlp/downloader/common.py):

443

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

444

max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,

445

noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,

446

external_downloader_args, concurrent_fragment_downloads.

447

448

The following options are used by the post processors:

449

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

450

otherwise prefer ffmpeg. (avconv support is deprecated)

451

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

452

to the binary or its containing directory.

453

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

454

and a list of additional command-line arguments for the

455

postprocessor/executable. The dict can also have "PP+EXE" keys

456

which are used when the given exe is used by the given PP.

457

Use 'default' as the name for arguments to passed to all PP

458

For compatibility with youtube-dl, a single list of args

459

can also be used

460

461

The following options are used by the extractors:

462

extractor_retries: Number of times to retry for known errors

463

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

464

hls_split_discontinuity: Split HLS playlists to different formats at

465

discontinuities such as ad breaks (default: False)

466

extractor_args: A dictionary of arguments to be passed to the extractors.

467

See "EXTRACTOR ARGUMENTS" for details.

468

Eg: {'youtube': {'skip': ['dash', 'hls']}}

469

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

470

If True (default), DASH manifests and related

471

data will be downloaded and processed by extractor.

472

You can reduce network I/O by disabling it if you don't

473

care about DASH. (only for youtube)

474

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

475

If True (default), HLS manifests and related

476

data will be downloaded and processed by extractor.

477

You can reduce network I/O by disabling it if you don't

478

care about HLS. (only for youtube)

479

"""

480

481

_NUMERIC_FIELDS = set((

482

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

483

'timestamp', 'release_timestamp',

484

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

485

'average_rating', 'comment_count', 'age_limit',

486

'start_time', 'end_time',

487

'chapter_number', 'season_number', 'episode_number',

488

'track_number', 'disc_number', 'release_year',

489

))

490

491

_format_selection_exts = {

492

'audio': {'m4a', 'mp3', 'ogg', 'aac'},

493

'video': {'mp4', 'flv', 'webm', '3gp'},

494

'storyboards': {'mhtml'},

}

params = None

_ies = {}

_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

500

_printed_messages = set()

501

_first_webpage_request = True

502

_download_retcode = None

503

_num_downloads = None

504

_playlist_level = 0

505

_playlist_urls = set()

506

_screen_file = None

507

508

def __init__(self, params=None, auto_init=True):

509

"""Create a FileDownloader object with the given options.

510

@param auto_init Whether to load the default extractors and print header (if verbose).

511

Set to 'no_verbose_header' to not print the header

"""

if params is None:

params = {}

self._ies = {}

self._ies_instances = {}

517

self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

518

self._printed_messages = set()

519

self._first_webpage_request = True

520

self._post_hooks = []

521

self._progress_hooks = []

522

self._postprocessor_hooks = []

523

self._download_retcode = 0

524

self._num_downloads = 0

525

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

526

self._err_file = sys.stderr

527

self.params = params

528

self.cache = Cache(self)

529

530

windows_enable_vt_mode()

531

self._allow_colors = {

532

'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),

533

'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),

534

}

535

536

if sys.version_info < (3, 6):

537

self.report_warning(

538

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

539

540

if self.params.get('allow_unplayable_formats'):

541

self.report_warning(

542

f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '

543

'This is a developer option intended for debugging. \n'

544

' If you experience any issues while using this option, '

545

f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')

546

547

def check_deprecated(param, option, suggestion):

548

if self.params.get(param) is not None:

549

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

554

if self.params.get('geo_verification_proxy') is None:

555

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

556

557

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

558

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

559

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

560

561

for msg in self.params.get('_warnings', []):

562

self.report_warning(msg)

563

564

if 'list-formats' in self.params.get('compat_opts', []):

565

self.params['listformats_table'] = False

566

567

if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:

568

# nooverwrites was unnecessarily changed to overwrites

569

# in 0c3d0f51778b153f65c21906031c2e091fcfb641

570

# This ensures compatibility with both keys

571

self.params['overwrites'] = not self.params['nooverwrites']

572

elif self.params.get('overwrites') is None:

573

self.params.pop('overwrites', None)

574

else:

575

self.params['nooverwrites'] = not self.params['overwrites']

576

577

if params.get('bidi_workaround', False):

578

try:

579

import pty

580

master, slave = pty.openpty()

581

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

586

sp_kwargs = dict(

587

stdin=subprocess.PIPE,

588

stdout=slave,

589

stderr=self._err_file)

590

try:

591

self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)

592

except OSError:

593

self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

594

self._output_channel = os.fdopen(master, 'rb')

595

except OSError as ose:

596

if ose.errno == errno.ENOENT:

597

self.report_warning(

598

'Could not find fribidi executable, ignoring --bidi-workaround. '

599

'Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

604

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

605

and not params.get('restrictfilenames', False)):

606

# Unicode filesystem API will throw errors (#1474, #13027)

607

self.report_warning(

608

'Assuming --restrict-filenames since file system encoding '

609

'cannot encode all characters. '

610

'Set the LC_ALL environment variable to fix this.')

611

self.params['restrictfilenames'] = True

612

613

self.outtmpl_dict = self.parse_outtmpl()

614

615

# Creating format selector here allows us to catch syntax errors before the extraction

616

self.format_selector = (

617

None if self.params.get('format') is None

618

else self.params['format'] if callable(self.params['format'])

619

else self.build_format_selector(self.params['format']))

self._setup_opener()

if auto_init:

if auto_init != 'no_verbose_header':

625

self.print_debug_header()

626

self.add_default_info_extractors()

627

628

for pp_def_raw in self.params.get('postprocessors', []):

629

pp_def = dict(pp_def_raw)

630

when = pp_def.pop('when', 'post_process')

631

pp_class = get_postprocessor(pp_def.pop('key'))

632

pp = pp_class(self, **compat_kwargs(pp_def))

633

self.add_post_processor(pp, when=when)

634

635

hooks = {

636

'post_hooks': self.add_post_hook,

637

'progress_hooks': self.add_progress_hook,

638

'postprocessor_hooks': self.add_postprocessor_hook,

639

}

640

for opt, fn in hooks.items():

641

for ph in self.params.get(opt, []):

642

fn(ph)

643

644

register_socks_protocols()

645

646

def preload_download_archive(fn):

647

"""Preload the archive, if any is specified"""

648

if fn is None:

649

return False

650

self.write_debug(f'Loading archive file {fn!r}')

651

try:

652

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

653

for line in archive_file:

654

self.archive.add(line.strip())

655

except IOError as ioe:

656

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

663

664

def warn_if_short_id(self, argv):

665

# short YouTube ID starting with dash?

666

idxs = [

667

i for i, a in enumerate(argv)

668

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

673

+ ['--'] + [argv[i] for i in idxs]

674

)

675

self.report_warning(

676

'Long argument string detected. '

677

'Use -- to separate parameters and URLs, like this:\n%s' %

678

args_to_str(correct_argv))

679

680

def add_info_extractor(self, ie):

681

"""Add an InfoExtractor object to the end of the list."""

682

ie_key = ie.ie_key()

683

self._ies[ie_key] = ie

684

if not isinstance(ie, type):

685

self._ies_instances[ie_key] = ie

686

ie.set_downloader(self)

687

688

def _get_info_extractor_class(self, ie_key):

689

ie = self._ies.get(ie_key)

690

if ie is None:

691

ie = get_info_extractor(ie_key)

692

self.add_info_extractor(ie)

693

return ie

694

695

def get_info_extractor(self, ie_key):

696

"""

697

Get an instance of an IE with name ie_key, it will try to get one from

698

the _ies list, if there's no instance it will create a new one and add

699

it to the extractor list.

700

"""

701

ie = self._ies_instances.get(ie_key)

702

if ie is None:

703

ie = get_info_extractor(ie_key)()

704

self.add_info_extractor(ie)

705

return ie

706

707

def add_default_info_extractors(self):

708

"""

709

Add the InfoExtractors returned by gen_extractors to the end of the list

710

"""

711

for ie in gen_extractor_classes():

712

self.add_info_extractor(ie)

713

714

def add_post_processor(self, pp, when='post_process'):

715

"""Add a PostProcessor object to the end of the chain."""

716

self._pps[when].append(pp)

717

pp.set_downloader(self)

718

719

def add_post_hook(self, ph):

720

"""Add the post hook"""

721

self._post_hooks.append(ph)

722

723

def add_progress_hook(self, ph):

724

"""Add the download progress hook"""

725

self._progress_hooks.append(ph)

726

727

def add_postprocessor_hook(self, ph):

728

"""Add the postprocessing progress hook"""

729

self._postprocessor_hooks.append(ph)

730

731

def _bidi_workaround(self, message):

732

if not hasattr(self, '_output_channel'):

733

return message

734

735

assert hasattr(self, '_output_process')

736

assert isinstance(message, compat_str)

737

line_count = message.count('\n') + 1

738

self._output_process.stdin.write((message + '\n').encode('utf-8'))

739

self._output_process.stdin.flush()

740

res = ''.join(self._output_channel.readline().decode('utf-8')

741

for _ in range(line_count))

742

return res[:-len('\n')]

743

744

def _write_string(self, message, out=None, only_once=False):

745

if only_once:

746

if message in self._printed_messages:

747

return

748

self._printed_messages.add(message)

749

write_string(message, out=out, encoding=self.params.get('encoding'))

750

751

def to_stdout(self, message, skip_eol=False, quiet=False):

752

"""Print message to stdout"""

753

if self.params.get('logger'):

754

self.params['logger'].debug(message)

755

elif not quiet or self.params.get('verbose'):

756

self._write_string(

757

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

758

self._err_file if quiet else self._screen_file)

759

760

def to_stderr(self, message, only_once=False):

761

"""Print message to stderr"""

762

assert isinstance(message, compat_str)

763

if self.params.get('logger'):

764

self.params['logger'].error(message)

765

else:

766

self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)

767

768

def to_console_title(self, message):

769

if not self.params.get('consoletitle', False):

770

return

771

if compat_os_name == 'nt':

772

if ctypes.windll.kernel32.GetConsoleWindow():

773

# c_wchar_p() might not be necessary if `message` is

774

# already of type unicode()

775

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

776

elif 'TERM' in os.environ:

777

self._write_string('\033]0;%s\007' % message, self._screen_file)

778

779

def save_console_title(self):

780

if not self.params.get('consoletitle', False):

781

return

782

if self.params.get('simulate'):

783

return

784

if compat_os_name != 'nt' and 'TERM' in os.environ:

785

# Save the title on stack

786

self._write_string('\033[22;0t', self._screen_file)

787

788

def restore_console_title(self):

789

if not self.params.get('consoletitle', False):

790

return

791

if self.params.get('simulate'):

792

return

793

if compat_os_name != 'nt' and 'TERM' in os.environ:

794

# Restore the title from stack

795

self._write_string('\033[23;0t', self._screen_file)

796

797

def __enter__(self):

798

self.save_console_title()

799

return self

800

801

def __exit__(self, *args):

802

self.restore_console_title()

803

804

if self.params.get('cookiefile') is not None:

805

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

806

807

def trouble(self, message=None, tb=None):

808

"""Determine action to take when a download problem appears.

809

810

Depending on if the downloader has been configured to ignore

811

download errors or not, this method may throw an exception or

812

not when errors are found, after printing the message.

813

814

tb, if given, is additional traceback information.

815

"""

816

if message is not None:

817

self.to_stderr(message)

818

if self.params.get('verbose'):

819

if tb is None:

820

if sys.exc_info()[0]: # if .trouble has been called from an except block

821

tb = ''

822

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

823

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

824

tb += encode_compat_str(traceback.format_exc())

825

else:

826

tb_data = traceback.format_list(traceback.extract_stack())

827

tb = ''.join(tb_data)

828

if tb:

829

self.to_stderr(tb)

830

if not self.params.get('ignoreerrors'):

831

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

832

exc_info = sys.exc_info()[1].exc_info

833

else:

834

exc_info = sys.exc_info()

835

raise DownloadError(message, exc_info)

836

self._download_retcode = 1

837

838

def to_screen(self, message, skip_eol=False):

839

"""Print message to stdout if not in quiet mode"""

840

self.to_stdout(

841

message, skip_eol, quiet=self.params.get('quiet', False))

class Styles(Enum):

HEADERS = 'yellow'

EMPHASIS = 'blue'

ID = 'green'

DELIM = 'blue'

ERROR = 'red'

WARNING = 'yellow'

SUPPRESS = 'light black'

851

852

def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):

853

assert out in ('screen', 'err')

854

if test_encoding:

855

original_text = text

856

handle = self._screen_file if out == 'screen' else self._err_file

857

encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')

858

text = text.encode(encoding, 'ignore').decode(encoding)

859

if fallback is not None and text != original_text:

860

text = fallback

861

if isinstance(f, self.Styles):

862

f = f._value_

863

return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback

864

865

def _format_screen(self, *args, **kwargs):

866

return self.__format_text('screen', *args, **kwargs)

867

868

def _format_err(self, *args, **kwargs):

869

return self.__format_text('err', *args, **kwargs)

870

871

def report_warning(self, message, only_once=False):

872

'''

873

Print the message to stderr, it will be prefixed with 'WARNING:'

874

If stderr is a tty file the 'WARNING:' will be colored

875

'''

876

if self.params.get('logger') is not None:

877

self.params['logger'].warning(message)

878

else:

879

if self.params.get('no_warnings'):

880

return

881

self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)

882

883

def report_error(self, message, tb=None):

884

'''

885

Do the same as trouble, but prefixes the message with 'ERROR:', colored

886

in red if stderr is a tty file.

887

'''

888

self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb)

889

890

def write_debug(self, message, only_once=False):

891

'''Log debug message or Print message to stderr'''

892

if not self.params.get('verbose', False):

893

return

894

message = '[debug] %s' % message

895

if self.params.get('logger'):

896

self.params['logger'].debug(message)

897

else:

898

self.to_stderr(message, only_once)

899

900

def report_file_already_downloaded(self, file_name):

901

"""Report file has already been fully downloaded."""

902

try:

903

self.to_screen('[download] %s has already been downloaded' % file_name)

904

except UnicodeEncodeError:

905

self.to_screen('[download] The file has already been downloaded')

906

907

def report_file_delete(self, file_name):

908

"""Report that existing file will be deleted."""

909

try:

910

self.to_screen('Deleting existing file %s' % file_name)

911

except UnicodeEncodeError:

912

self.to_screen('Deleting existing file')

913

914

def raise_no_formats(self, info, forced=False):

915

has_drm = info.get('__has_drm')

916

msg = 'This video is DRM protected' if has_drm else 'No video formats found!'

917

expected = self.params.get('ignore_no_formats_error')

918

if forced or not expected:

919

raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],

920

expected=has_drm or expected)

921

else:

922

self.report_warning(msg)

923

924

def parse_outtmpl(self):

925

outtmpl_dict = self.params.get('outtmpl', {})

926

if not isinstance(outtmpl_dict, dict):

927

outtmpl_dict = {'default': outtmpl_dict}

928

# Remove spaces in the default template

929

if self.params.get('restrictfilenames'):

930

sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')

931

else:

932

sanitize = lambda x: x

933

outtmpl_dict.update({

934

k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()

935

if outtmpl_dict.get(k) is None})

936

for key, val in outtmpl_dict.items():

937

if isinstance(val, bytes):

938

self.report_warning(

939

'Parameter outtmpl is bytes, but should be a unicode string. '

940

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

941

return outtmpl_dict

942

943

def get_output_path(self, dir_type='', filename=None):

944

paths = self.params.get('paths', {})

945

assert isinstance(paths, dict)

946

path = os.path.join(

947

expand_path(paths.get('home', '').strip()),

948

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

949

filename or '')

950

951

# Temporary fix for #4787

952

# 'Treat' all problem characters by passing filename through preferredencoding

953

# to workaround encoding issues with subprocess on python2 @ Windows

954

if sys.version_info < (3, 0) and sys.platform == 'win32':

955

path = encodeFilename(path, True).decode(preferredencoding())

956

return sanitize_path(path, force=self.params.get('windowsfilenames'))

957

958

@staticmethod

959

def _outtmpl_expandpath(outtmpl):

960

# expand_path translates '%%' into '%' and '$$' into '$'

961

# correspondingly that is not what we want since we need to keep

962

# '%%' intact for template dict substitution step. Working around

963

# with boundary-alike separator hack.

964

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

965

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

966

967

# outtmpl should be expand_path'ed before template dict substitution

968

# because meta fields may contain env variables we don't want to

969

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

970

# title "Hello $PATH", we don't want `$PATH` to be expanded.

971

return expand_path(outtmpl).replace(sep, '')

972

973

@staticmethod

974

def escape_outtmpl(outtmpl):

975

''' Escape any remaining strings like %s, %abc% etc. '''

976

return re.sub(

977

STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),

978

lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),

outtmpl)

@classmethod

def validate_outtmpl(cls, outtmpl):

983

''' @return None or Exception object '''

984

outtmpl = re.sub(

985

STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),

986

lambda mobj: f'{mobj.group(0)[:-1]}s',

987

cls._outtmpl_expandpath(outtmpl))

988

try:

989

cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)

990

return None

991

except ValueError as err:

return err

@staticmethod

def _copy_infodict(info_dict):

996

info_dict = dict(info_dict)

997

for key in ('__original_infodict', '__postprocessors'):

998

info_dict.pop(key, None)

999

return info_dict

1000

1001

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):

1002

""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """

1003

info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set

1004

1005

info_dict = self._copy_infodict(info_dict)

1006

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

1007

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

1008

if info_dict.get('duration', None) is not None

1009

else None)

1010

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

1011

if info_dict.get('resolution') is None:

1012

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

1013

1014

# For fields playlist_index, playlist_autonumber and autonumber convert all occurrences

1015

# of %(field)s to %(field)0Nd for backward compatibility

1016

field_size_compat_map = {

1017

'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),

1018

'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),

1019

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

1029

# where keys (except first) can be string, int or slice

1030

FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

1031

MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

1032

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

1033

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

1034

(?P<negate>-)?

1035

(?P<fields>{field})

1036

(?P<maths>(?:{math_op}{math_field})*)

1037

(?:>(?P<strf_format>.+?))?

1038

(?P<alternate>(?<!\\),[^|)]+)?

1039

(?:\|(?P<default>.*?))?

1040

$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

1041

1042

def _traverse_infodict(k):

k = k.split('.')

if k[0] == '':

k.pop(0)

return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)

1047

1048

def get_value(mdict):

1049

# Object traversal

1050

value = _traverse_infodict(mdict['fields'])

1051

# Negative

1052

if mdict['negate']:

1053

value = float_or_none(value)

1054

if value is not None:

1055

value *= -1

1056

# Do maths

1057

offset_key = mdict['maths']

1058

if offset_key:

1059

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

1064

offset_key).group(0)

1065

offset_key = offset_key[len(item):]

1066

if operator is None:

1067

operator = MATH_FUNCTIONS[item]

1068

continue

1069

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

1070

offset = float_or_none(item)

1071

if offset is None:

1072

offset = float_or_none(_traverse_infodict(item))

1073

try:

1074

value = operator(value, multiplier * offset)

1075

except (TypeError, ZeroDivisionError):

1076

return None

1077

operator = None

1078

# Datetime formatting

1079

if mdict['strf_format']:

1080

value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))

return value

na = self.params.get('outtmpl_na_placeholder', 'NA')

1085

1086

def _dumpjson_default(obj):

1087

if isinstance(obj, (set, LazyList)):

1088

return list(obj)

1089

raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')

1090

1091

def create_key(outer_mobj):

1092

if not outer_mobj.group('has_key'):

1093

return outer_mobj.group(0)

1094

key = outer_mobj.group('key')

1095

mobj = re.match(INTERNAL_FORMAT_RE, key)

1096

initial_field = mobj.group('fields').split('.')[-1] if mobj else ''

1097

value, default = None, na

1098

while mobj:

1099

mobj = mobj.groupdict()

1100

default = mobj['default'] if mobj['default'] is not None else default

1101

value = get_value(mobj)

1102

if value is None and mobj['alternate']:

1103

mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])

else:

break

fmt = outer_mobj.group('format')

1108

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

1109

fmt = '0{:d}d'.format(field_size_compat_map[key])

1110

1111

value = default if value is None else value

1112

1113

flags = outer_mobj.group('conversion') or ''

1114

str_fmt = f'{fmt[:-1]}s'

1115

if fmt[-1] == 'l': # list

1116

delim = '\n' if '#' in flags else ', '

1117

value, fmt = delim.join(variadic(value)), str_fmt

1118

elif fmt[-1] == 'j': # json

1119

value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt

1120

elif fmt[-1] == 'q': # quoted

1121

value = map(str, variadic(value) if '#' in flags else [value])

1122

value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt

1123

elif fmt[-1] == 'B': # bytes

1124

value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')

1125

value, fmt = value.decode('utf-8', 'ignore'), 's'

1126

elif fmt[-1] == 'U': # unicode normalized

1127

value, fmt = unicodedata.normalize(

1128

# "+" = compatibility equivalence, "#" = NFD

1129

'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),

value), str_fmt

elif fmt[-1] == 'c':

if value:

value = str(value)[0]

1134

else:

1135

fmt = str_fmt

1136

elif fmt[-1] not in 'rs': # numeric

1137

value = float_or_none(value)

1138

if value is None:

1139

value, fmt = default, 's'

if sanitize:

if fmt[-1] == 'r':

# If value is an object, sanitize might convert it to a string

1144

# So we convert it to repr first

1145

value, fmt = repr(value), str_fmt

1146

if fmt[-1] in 'csr':

1147

value = sanitize(initial_field, value)

1148

1149

key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))

1150

TMPL_DICT[key] = value

1151

return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))

1152

1153

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

1154

1155

def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):

1156

outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)

1157

return self.escape_outtmpl(outtmpl) % info_dict

1158

1159

def _prepare_filename(self, info_dict, tmpl_type='default'):

1160

try:

1161

sanitize = lambda k, v: sanitize_filename(

1162

compat_str(v),

1163

restricted=self.params.get('restrictfilenames'),

1164

is_id=(k == 'id' or k.endswith('_id')))

1165

outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))

1166

filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)

1167

1168

force_ext = OUTTMPL_TYPES.get(tmpl_type)

1169

if filename and force_ext is not None:

1170

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

1171

1172

# https://github.com/blackjack4494/youtube-dlc/issues/85

1173

trim_file_name = self.params.get('trim_file_name', False)

1174

if trim_file_name:

1175

fn_groups = filename.rsplit('.')

1176

ext = fn_groups[-1]

1177

sub_ext = ''

1178

if len(fn_groups) > 2:

1179

sub_ext = fn_groups[-2]

1180

filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')

1181

1182

return filename

1183

except ValueError as err:

1184

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1185

return None

1186

1187

def prepare_filename(self, info_dict, dir_type='', warn=False):

1188

"""Generate the output filename."""

1189

1190

filename = self._prepare_filename(info_dict, dir_type or 'default')

1191

if not filename and dir_type not in ('', 'temp'):

return ''

if warn:

if not self.params.get('paths'):

1196

pass

1197

elif filename == '-':

1198

self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)

1199

elif os.path.isabs(filename):

1200

self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)

1201

if filename == '-' or not filename:

1202

return filename

1203

1204

return self.get_output_path(dir_type, filename)

1205

1206

def _match_entry(self, info_dict, incomplete=False, silent=False):

1207

""" Returns None if the file should be downloaded """

1208

1209

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1210

1211

def check_filter():

1212

if 'title' in info_dict:

1213

# This can happen when we're just evaluating the playlist

1214

title = info_dict['title']

1215

matchtitle = self.params.get('matchtitle', False)

1216

if matchtitle:

1217

if not re.search(matchtitle, title, re.IGNORECASE):

1218

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1219

rejecttitle = self.params.get('rejecttitle', False)

1220

if rejecttitle:

1221

if re.search(rejecttitle, title, re.IGNORECASE):

1222

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1223

date = info_dict.get('upload_date')

1224

if date is not None:

1225

dateRange = self.params.get('daterange', DateRange())

1226

if date not in dateRange:

1227

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

1228

view_count = info_dict.get('view_count')

1229

if view_count is not None:

1230

min_views = self.params.get('min_views')

1231

if min_views is not None and view_count < min_views:

1232

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1233

max_views = self.params.get('max_views')

1234

if max_views is not None and view_count > max_views:

1235

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1236

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1237

return 'Skipping "%s" because it is age restricted' % video_title

1238

1239

match_filter = self.params.get('match_filter')

1240

if match_filter is not None:

1241

try:

1242

ret = match_filter(info_dict, incomplete=incomplete)

1243

except TypeError:

1244

# For backward compatibility

1245

ret = None if incomplete else match_filter(info_dict)

if ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1251

reason = '%s has already been recorded in the archive' % video_title

1252

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1253

else:

1254

reason = check_filter()

1255

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1256

if reason is not None:

1257

if not silent:

1258

self.to_screen('[download] ' + reason)

1259

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1265

'''Set the keys from extra_info in info dict if they are missing'''

1266

for key, value in extra_info.items():

1267

info_dict.setdefault(key, value)

1268

1269

def extract_info(self, url, download=True, ie_key=None, extra_info=None,

1270

process=True, force_generic_extractor=False):

1271

"""

1272

Return a list with a dictionary for each video extracted.

1273

1274

Arguments:

1275

url -- URL to extract

1276

1277

Keyword arguments:

1278

download -- whether to download videos during extraction

1279

ie_key -- extractor key hint

1280

extra_info -- dictionary containing the extra values to add to each result

1281

process -- whether to resolve all unresolved references (URLs, playlist items),

1282

must be True for download to work.

1283

force_generic_extractor -- force using the generic extractor

1284

"""

1285

1286

if extra_info is None:

1287

extra_info = {}

1288

1289

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = {ie_key: self._get_info_extractor_class(ie_key)}

else:

ies = self._ies

for ie_key, ie in ies.items():

1298

if not ie.suitable(url):

continue

if not ie.working():

self.report_warning('The program functionality for this site has been marked as broken, '

1303

'and will probably not work.')

1304

1305

temp_id = ie.get_temp_id(url)

1306

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1307

self.to_screen("[%s] %s: has already been recorded in archive" % (

1308

ie_key, temp_id))

1309

break

1310

return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)

1311

else:

1312

self.report_error('no suitable InfoExtractor for URL %s' % url)

1313

1314

def __handle_extraction_exceptions(func):

1315

@functools.wraps(func)

1316

def wrapper(self, *args, **kwargs):

1317

try:

1318

return func(self, *args, **kwargs)

1319

except GeoRestrictedError as e:

1320

msg = e.msg

1321

if e.countries:

1322

msg += '\nThis video is available in %s.' % ', '.join(

1323

map(ISO3166Utils.short2full, e.countries))

1324

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1325

self.report_error(msg)

1326

except ExtractorError as e: # An error we somewhat expected

1327

self.report_error(compat_str(e), e.format_traceback())

1328

except ThrottledDownload as e:

1329

self.to_stderr('\r')

1330

self.report_warning(f'{e}; Re-extracting data')

1331

return wrapper(self, *args, **kwargs)

1332

except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):

1333

raise

1334

except Exception as e:

1335

if self.params.get('ignoreerrors'):

1336

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

1342

def __extract_info(self, url, ie, download, extra_info, process):

1343

ie_result = ie.extract(url)

1344

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1345

return

1346

if isinstance(ie_result, list):

1347

# Backwards compatibility: old IE result format

1348

ie_result = {

1349

'_type': 'compat_list',

1350

'entries': ie_result,

1351

}

1352

if extra_info.get('original_url'):

1353

ie_result.setdefault('original_url', extra_info['original_url'])

1354

self.add_default_extra_info(ie_result, ie, url)

1355

if process:

1356

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1361

if url is not None:

1362

self.add_extra_info(ie_result, {

1363

'webpage_url': url,

1364

'original_url': url,

1365

'webpage_url_basename': url_basename(url),

1366

})

1367

if ie is not None:

1368

self.add_extra_info(ie_result, {

1369

'extractor': ie.IE_NAME,

1370

'extractor_key': ie.ie_key(),

1371

})

1372

1373

def process_ie_result(self, ie_result, download=True, extra_info=None):

1374

"""

1375

Take the result of the ie(may be modified) and resolve all unresolved

1376

references (URLs, playlist items).

1377

1378

It will also download the videos if 'download'.

1379

Returns the resolved ie_result.

1380

"""

1381

if extra_info is None:

1382

extra_info = {}

1383

result_type = ie_result.get('_type', 'video')

1384

1385

if result_type in ('url', 'url_transparent'):

1386

ie_result['url'] = sanitize_url(ie_result['url'])

1387

if ie_result.get('original_url'):

1388

extra_info.setdefault('original_url', ie_result['original_url'])

1389

1390

extract_flat = self.params.get('extract_flat', False)

1391

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1392

or extract_flat is True):

1393

info_copy = ie_result.copy()

1394

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1395

if ie and not ie_result.get('id'):

1396

info_copy['id'] = ie.get_temp_id(ie_result['url'])

1397

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1398

self.add_extra_info(info_copy, extra_info)

1399

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1400

if self.params.get('force_write_download_archive', False):

1401

self.record_download_archive(info_copy)

1402

return ie_result

1403

1404

if result_type == 'video':

1405

self.add_extra_info(ie_result, extra_info)

1406

ie_result = self.process_video_result(ie_result, download=download)

1407

additional_urls = (ie_result or {}).get('additional_urls')

1408

if additional_urls:

1409

# TODO: Improve MetadataParserPP to allow setting a list

1410

if isinstance(additional_urls, compat_str):

1411

additional_urls = [additional_urls]

1412

self.to_screen(

1413

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1414

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1415

ie_result['additional_entries'] = [

1416

self.extract_info(

1417

url, download, extra_info,

1418

force_generic_extractor=self.params.get('force_generic_extractor'))

1419

for url in additional_urls

1420

]

1421

return ie_result

1422

elif result_type == 'url':

1423

# We have to add extra_info to the results because it may be

1424

# contained in a playlist

1425

return self.extract_info(

1426

ie_result['url'], download,

1427

ie_key=ie_result.get('ie_key'),

1428

extra_info=extra_info)

1429

elif result_type == 'url_transparent':

1430

# Use the information from the embedding page

1431

info = self.extract_info(

1432

ie_result['url'], ie_key=ie_result.get('ie_key'),

1433

extra_info=extra_info, download=False, process=False)

1434

1435

# extract_info may return None when ignoreerrors is enabled and

1436

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

1442

(k, v) for k, v in ie_result.items() if v is not None)

1443

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1444

if f in force_properties:

1445

del force_properties[f]

1446

new_result = info.copy()

1447

new_result.update(force_properties)

1448

1449

# Extracted info may not be a video result (i.e.

1450

# info.get('_type', 'video') != video) but rather an url or

1451

# url_transparent. In such cases outer metadata (from ie_result)

1452

# should be propagated to inner one (info). For this to happen

1453

# _type of info should be overridden with url_transparent. This

1454

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1455

if new_result.get('_type') == 'url':

1456

new_result['_type'] = 'url_transparent'

1457

1458

return self.process_ie_result(

1459

new_result, download=download, extra_info=extra_info)

1460

elif result_type in ('playlist', 'multi_video'):

1461

# Protect from infinite recursion due to recursively nested playlists

1462

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1463

webpage_url = ie_result['webpage_url']

1464

if webpage_url in self._playlist_urls:

1465

self.to_screen(

1466

'[download] Skipping already downloaded playlist: %s'

1467

% ie_result.get('title') or ie_result.get('id'))

1468

return

1469

1470

self._playlist_level += 1

1471

self._playlist_urls.add(webpage_url)

1472

self._sanitize_thumbnails(ie_result)

1473

try:

1474

return self.__process_playlist(ie_result, download)

1475

finally:

1476

self._playlist_level -= 1

1477

if not self._playlist_level:

1478

self._playlist_urls.clear()

1479

elif result_type == 'compat_list':

1480

self.report_warning(

1481

'Extractor %s returned a compat_list result. '

1482

'It needs to be updated.' % ie_result.get('extractor'))

1483

1484

def _fixup(r):

1485

self.add_extra_info(r, {

1486

'extractor': ie_result['extractor'],

1487

'webpage_url': ie_result['webpage_url'],

1488

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1489

'extractor_key': ie_result['extractor_key'],

1490

})

1491

return r

1492

ie_result['entries'] = [

1493

self.process_ie_result(_fixup(r), download, extra_info)

1494

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1499

1500

def _ensure_dir_exists(self, path):

1501

return make_dir(path, self.report_error)

1502

1503

def __process_playlist(self, ie_result, download):

1504

# We process each entry in the playlist

1505

playlist = ie_result.get('title') or ie_result.get('id')

1506

self.to_screen('[download] Downloading playlist: %s' % playlist)

1507

1508

if 'entries' not in ie_result:

1509

raise EntryNotInPlaylist('There are no entries')

1510

1511

MissingEntry = object()

1512

incomplete_entries = bool(ie_result.get('requested_entries'))

1513

if incomplete_entries:

1514

def fill_missing_entries(entries, indices):

1515

ret = [MissingEntry] * max(indices)

1516

for i, entry in zip(indices, entries):

1517

ret[i - 1] = entry

1518

return ret

1519

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1520

1521

playlist_results = []

1522

1523

playliststart = self.params.get('playliststart', 1)

1524

playlistend = self.params.get('playlistend')

1525

# For backwards compatibility, interpret -1 as whole list

1526

if playlistend == -1:

1527

playlistend = None

1528

1529

playlistitems_str = self.params.get('playlist_items')

1530

playlistitems = None

1531

if playlistitems_str is not None:

1532

def iter_playlistitems(format):

1533

for string_segment in format.split(','):

1534

if '-' in string_segment:

1535

start, end = string_segment.split('-')

1536

for item in range(int(start), int(end) + 1):

1537

yield int(item)

1538

else:

1539

yield int(string_segment)

1540

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1541

1542

ie_entries = ie_result['entries']

1543

msg = (

1544

'Downloading %d videos' if not isinstance(ie_entries, list)

1545

else 'Collected %d videos; downloading %%d of them' % len(ie_entries))

1546

1547

if isinstance(ie_entries, list):

1548

def get_entry(i):

1549

return ie_entries[i - 1]

1550

else:

1551

if not isinstance(ie_entries, (PagedList, LazyList)):

1552

ie_entries = LazyList(ie_entries)

1553

1554

def get_entry(i):

1555

return YoutubeDL.__handle_extraction_exceptions(

1556

lambda self, i: ie_entries[i - 1]

)(self, i)

entries = []

items = playlistitems if playlistitems is not None else itertools.count(playliststart)

for i in items:

if i == 0:

continue

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = get_entry(i)

if entry is MissingEntry:

1570

raise EntryNotInPlaylist()

1571

except (IndexError, EntryNotInPlaylist):

1572

if incomplete_entries:

1573

raise EntryNotInPlaylist(f'Entry {i} cannot be found')

1574

elif not playlistitems:

1575

break

1576

entries.append(entry)

1577

try:

1578

if entry is not None:

1579

self._match_entry(entry, incomplete=True, silent=True)

1580

except (ExistingVideoReached, RejectedVideoReached):

1581

break

1582

ie_result['entries'] = entries

1583

1584

# Save playlist_index before re-ordering

1585

entries = [

1586

((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)

1587

for i, entry in enumerate(entries, 1)

1588

if entry is not None]

1589

n_entries = len(entries)

1590

1591

if not playlistitems and (playliststart != 1 or playlistend):

1592

playlistitems = list(range(playliststart, playliststart + n_entries))

1593

ie_result['requested_entries'] = playlistitems

1594

1595

_infojson_written = False

1596

if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):

1597

ie_copy = {

1598

'playlist': playlist,

1599

'playlist_id': ie_result.get('id'),

1600

'playlist_title': ie_result.get('title'),

1601

'playlist_uploader': ie_result.get('uploader'),

1602

'playlist_uploader_id': ie_result.get('uploader_id'),

1603

'playlist_index': 0,

1604

'n_entries': n_entries,

1605

}

1606

ie_copy.update(dict(ie_result))

1607

1608

_infojson_written = self._write_info_json(

1609

'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))

1610

if _infojson_written is None:

1611

return

1612

if self._write_description('playlist', ie_result,

1613

self.prepare_filename(ie_copy, 'pl_description')) is None:

1614

return

1615

# TODO: This should be passed to ThumbnailsConvertor if necessary

1616

self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1617

1618

if self.params.get('playlistreverse', False):

1619

entries = entries[::-1]

1620

if self.params.get('playlistrandom', False):

1621

random.shuffle(entries)

1622

1623

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1624

1625

self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))

1626

failures = 0

1627

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1628

for i, entry_tuple in enumerate(entries, 1):

1629

playlist_index, entry = entry_tuple

1630

if 'playlist-index' in self.params.get('compat_opts', []):

1631

playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1

1632

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1633

# This __x_forwarded_for_ip thing is a bit ugly but requires

1634

# minimal changes

1635

if x_forwarded_for:

1636

entry['__x_forwarded_for_ip'] = x_forwarded_for

1637

extra = {

1638

'n_entries': n_entries,

1639

'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1640

'playlist_index': playlist_index,

1641

'playlist_autonumber': i,

1642

'playlist': playlist,

1643

'playlist_id': ie_result.get('id'),

1644

'playlist_title': ie_result.get('title'),

1645

'playlist_uploader': ie_result.get('uploader'),

1646

'playlist_uploader_id': ie_result.get('uploader_id'),

1647

'extractor': ie_result['extractor'],

1648

'webpage_url': ie_result['webpage_url'],

1649

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1650

'extractor_key': ie_result['extractor_key'],

1651

}

1652

1653

if self._match_entry(entry, incomplete=True) is not None:

1654

continue

1655

1656

entry_result = self.__process_iterable_entry(entry, download, extra)

1657

if not entry_result:

1658

failures += 1

1659

if failures >= max_failures:

1660

self.report_error(

1661

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1662

break

1663

playlist_results.append(entry_result)

1664

ie_result['entries'] = playlist_results

1665

1666

# Write the updated info to json

1667

if _infojson_written and self._write_info_json(

1668

'updated playlist', ie_result,

1669

self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:

1670

return

1671

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1672

return ie_result

1673

1674

@__handle_extraction_exceptions

1675

def __process_iterable_entry(self, entry, download, extra_info):

1676

return self.process_ie_result(

1677

entry, download=download, extra_info=extra_info)

1678

1679

def _build_format_filter(self, filter_spec):

1680

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1691

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1692

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1693

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1694

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1695

m = operator_rex.fullmatch(filter_spec)

1696

if m:

1697

try:

1698

comparison_value = int(m.group('value'))

1699

except ValueError:

1700

comparison_value = parse_filesize(m.group('value'))

1701

if comparison_value is None:

1702

comparison_value = parse_filesize(m.group('value') + 'B')

1703

if comparison_value is None:

1704

raise ValueError(

1705

'Invalid value %r in format specification %r' % (

1706

m.group('value'), filter_spec))

1707

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1713

'$=': lambda attr, value: attr.endswith(value),

1714

'*=': lambda attr, value: value in attr,

1715

}

1716

str_operator_rex = re.compile(r'''(?x)\s*

1717

(?P<key>[a-zA-Z0-9._-]+)\s*

1718

(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1719

(?P<value>[a-zA-Z0-9._-]+)\s*

1720

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1721

m = str_operator_rex.fullmatch(filter_spec)

1722

if m:

1723

comparison_value = m.group('value')

1724

str_op = STR_OPERATORS[m.group('op')]

1725

if m.group('negation'):

1726

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1732

1733

def _filter(f):

1734

actual_value = f.get(m.group('key'))

1735

if actual_value is None:

1736

return m.group('none_inclusive')

1737

return op(actual_value, comparison_value)

1738

return _filter

1739

1740

def _check_formats(self, formats):

1741

for f in formats:

1742

self.to_screen('[info] Testing format %s' % f['format_id'])

1743

temp_file = tempfile.NamedTemporaryFile(

1744

suffix='.tmp', delete=False,

1745

dir=self.get_output_path('temp') or None)

1746

temp_file.close()

1747

try:

1748

success, _ = self.dl(temp_file.name, f, test=True)

1749

except (DownloadError, IOError, OSError, ValueError) + network_exceptions:

1750

success = False

1751

finally:

1752

if os.path.exists(temp_file.name):

1753

try:

1754

os.remove(temp_file.name)

1755

except OSError:

1756

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if success:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1761

1762

def _default_format_spec(self, info_dict, download=True):

1763

1764

def can_merge():

1765

merger = FFmpegMergerPP(self)

1766

return merger.available and merger.can_merge()

1767

1768

prefer_best = (

1769

not self.params.get('simulate')

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1774

or self.outtmpl_dict['default'] == '-'))

1775

compat = (

1776

prefer_best

1777

or self.params.get('allow_multiple_audio_streams', False)

1778

or 'format-spec' in self.params.get('compat_opts', []))

1779

1780

return (

1781

'best/bestvideo+bestaudio' if prefer_best

1782

else 'bestvideo*+bestaudio/best' if not compat

1783

else 'bestvideo+bestaudio/best')

1784

1785

def build_format_selector(self, format_spec):

1786

def syntax_error(note, start):

1787

message = (

1788

'Invalid format specification: '

1789

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1790

return SyntaxError(message)

1791

1792

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1797

1798

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1799

'video': self.params.get('allow_multiple_video_streams', False)}

1800

1801

check_formats = self.params.get('check_formats') == 'selected'

1802

1803

def _parse_filter(tokens):

1804

filter_parts = []

1805

for type, string, start, _, _ in tokens:

1806

if type == tokenize.OP and string == ']':

1807

return ''.join(filter_parts)

1808

else:

1809

filter_parts.append(string)

1810

1811

def _remove_unused_ops(tokens):

1812

# Remove operators that we don't use and join them with the surrounding strings

1813

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1814

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1815

last_string, last_start, last_end, last_line = None, None, None, None

1816

for type, string, start, end, line in tokens:

1817

if type == tokenize.OP and string == '[':

1818

if last_string:

1819

yield tokenize.NAME, last_string, last_start, last_end, last_line

1820

last_string = None

1821

yield type, string, start, end, line

1822

# everything inside brackets will be handled by _parse_filter

1823

for type, string, start, end, line in tokens:

1824

yield type, string, start, end, line

1825

if type == tokenize.OP and string == ']':

1826

break

1827

elif type == tokenize.OP and string in ALLOWED_OPS:

1828

if last_string:

1829

yield tokenize.NAME, last_string, last_start, last_end, last_line

1830

last_string = None

1831

yield type, string, start, end, line

1832

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1839

if last_string:

1840

yield tokenize.NAME, last_string, last_start, last_end, last_line

1841

1842

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1843

selectors = []

1844

current_selector = None

1845

for type, string, start, _, _ in tokens:

1846

# ENCODING is only defined in python 3.x

1847

if type == getattr(tokenize, 'ENCODING', None):

1848

continue

1849

elif type in [tokenize.NAME, tokenize.NUMBER]:

1850

current_selector = FormatSelector(SINGLE, string, [])

1851

elif type == tokenize.OP:

1852

if string == ')':

1853

if not inside_group:

1854

# ')' will be handled by the parentheses group

1855

tokens.restore_last_token()

1856

break

1857

elif inside_merge and string in ['/', ',']:

1858

tokens.restore_last_token()

1859

break

1860

elif inside_choice and string == ',':

1861

tokens.restore_last_token()

1862

break

1863

elif string == ',':

1864

if not current_selector:

1865

raise syntax_error('"," must follow a format selector', start)

1866

selectors.append(current_selector)

1867

current_selector = None

1868

elif string == '/':

1869

if not current_selector:

1870

raise syntax_error('"/" must follow a format selector', start)

1871

first_choice = current_selector

1872

second_choice = _parse_format_selection(tokens, inside_choice=True)

1873

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1874

elif string == '[':

1875

if not current_selector:

1876

current_selector = FormatSelector(SINGLE, 'best', [])

1877

format_filter = _parse_filter(tokens)

1878

current_selector.filters.append(format_filter)

1879

elif string == '(':

1880

if current_selector:

1881

raise syntax_error('Unexpected "("', start)

1882

group = _parse_format_selection(tokens, inside_group=True)

1883

current_selector = FormatSelector(GROUP, group, [])

1884

elif string == '+':

1885

if not current_selector:

1886

raise syntax_error('Unexpected "+"', start)

1887

selector_1 = current_selector

1888

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1889

if not selector_2:

1890

raise syntax_error('Expected a selector', start)

1891

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1892

else:

1893

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1894

elif type == tokenize.ENDMARKER:

1895

break

1896

if current_selector:

1897

selectors.append(current_selector)

1898

return selectors

1899

1900

def _merge(formats_pair):

1901

format_1, format_2 = formats_pair

1902

1903

formats_info = []

1904

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1905

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1906

1907

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1908

get_no_more = {'video': False, 'audio': False}

1909

for (i, fmt_info) in enumerate(formats_info):

1910

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

1911

formats_info.pop(i)

1912

continue

1913

for aud_vid in ['audio', 'video']:

1914

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1915

if get_no_more[aud_vid]:

1916

formats_info.pop(i)

1917

break

1918

get_no_more[aud_vid] = True

1919

1920

if len(formats_info) == 1:

1921

return formats_info[0]

1922

1923

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1924

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1925

1926

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1927

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1928

1929

output_ext = self.params.get('merge_output_format')

1930

if not output_ext:

1931

if the_only_video:

1932

output_ext = the_only_video['ext']

1933

elif the_only_audio and not video_fmts:

1934

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))

1939

1940

new_dict = {

1941

'requested_formats': formats_info,

1942

'format': '+'.join(filtered('format')),

1943

'format_id': '+'.join(filtered('format_id')),

1944

'ext': output_ext,

1945

'protocol': '+'.join(map(determine_protocol, formats_info)),

1946

'language': '+'.join(orderedSet(filtered('language'))) or None,

1947

'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,

1948

'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,

1949

'tbr': sum(filtered('tbr', 'vbr', 'abr')),

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1955

'height': the_only_video.get('height'),

1956

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

1957

'fps': the_only_video.get('fps'),

1958

'dynamic_range': the_only_video.get('dynamic_range'),

1959

'vcodec': the_only_video.get('vcodec'),

1960

'vbr': the_only_video.get('vbr'),

1961

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1967

'abr': the_only_audio.get('abr'),

1968

'asr': the_only_audio.get('asr'),

})

return new_dict

def _check_formats(formats):

1974

if not check_formats:

1975

yield from formats

1976

return

1977

yield from self._check_formats(formats)

1978

1979

def _build_selector_function(selector):

1980

if isinstance(selector, list): # ,

1981

fs = [_build_selector_function(s) for s in selector]

1982

1983

def selector_function(ctx):

1984

for f in fs:

1985

yield from f(ctx)

1986

return selector_function

1987

1988

elif selector.type == GROUP: # ()

1989

selector_function = _build_selector_function(selector.selector)

1990

1991

elif selector.type == PICKFIRST: # /

1992

fs = [_build_selector_function(s) for s in selector.selector]

1993

1994

def selector_function(ctx):

1995

for f in fs:

1996

picked_formats = list(f(ctx))

1997

if picked_formats:

1998

return picked_formats

1999

return []

2000

2001

elif selector.type == MERGE: # +

2002

selector_1, selector_2 = map(_build_selector_function, selector.selector)

2003

2004

def selector_function(ctx):

2005

for pair in itertools.product(

2006

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

2007

yield _merge(pair)

2008

2009

elif selector.type == SINGLE: # atom

2010

format_spec = selector.selector or 'best'

2011

2012

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

2013

if format_spec == 'all':

2014

def selector_function(ctx):

2015

yield from _check_formats(ctx['formats'][::-1])

2016

elif format_spec == 'mergeall':

2017

def selector_function(ctx):

2018

formats = list(_check_formats(ctx['formats']))

2019

if not formats:

2020

return

2021

merged_format = formats[-1]

2022

for f in formats[-2::-1]:

2023

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, format_reverse, format_idx = False, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

2033

format_reverse = mobj.group('bw')[0] == 'b'

2034

format_type = (mobj.group('type') or [None])[0]

2035

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

2036

format_modified = mobj.group('mod') is not None

2037

2038

format_fallback = not format_type and not format_modified # for b, w

2039

_filter_f = (

2040

(lambda f: f.get('%scodec' % format_type) != 'none')

2041

if format_type and format_modified # bv*, ba*, wv*, wa*

2042

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

2043

if format_type # bv, ba, wv, wa

2044

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

2045

if not format_modified # b, w

2046

else lambda f: True) # b*, w*

2047

filter_f = lambda f: _filter_f(f) and (

2048

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

2049

else:

2050

if format_spec in self._format_selection_exts['audio']:

2051

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'

2052

elif format_spec in self._format_selection_exts['video']:

2053

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'

2054

elif format_spec in self._format_selection_exts['storyboards']:

2055

filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'

2056

else:

2057

filter_f = lambda f: f.get('format_id') == format_spec # id

2058

2059

def selector_function(ctx):

2060

formats = list(ctx['formats'])

2061

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

2062

if format_fallback and ctx['incomplete_formats'] and not matches:

2063

# for extractors with incomplete formats (audio only (soundcloud)

2064

# or video only (imgur)) best/worst will fallback to

2065

# best/worst {video,audio}-only format

2066

matches = formats

2067

matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))

2068

try:

2069

yield matches[format_idx - 1]

except IndexError:

return

filters = [self._build_format_filter(f) for f in selector.filters]

2074

2075

def final_selector(ctx):

2076

ctx_copy = copy.deepcopy(ctx)

2077

for _filter in filters:

2078

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

2079

return selector_function(ctx_copy)

2080

return final_selector

2081

2082

stream = io.BytesIO(format_spec.encode('utf-8'))

2083

try:

2084

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

2085

except tokenize.TokenError:

2086

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

2087

2088

class TokenIterator(object):

2089

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

2098

raise StopIteration()

2099

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

2106

self.counter -= 1

2107

2108

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

2109

return _build_selector_function(parsed_selector)

2110

2111

def _calc_headers(self, info_dict):

2112

res = std_headers.copy()

2113

2114

add_headers = info_dict.get('http_headers')

2115

if add_headers:

2116

res.update(add_headers)

2117

2118

cookies = self._calc_cookies(info_dict)

2119

if cookies:

2120

res['Cookie'] = cookies

2121

2122

if 'X-Forwarded-For' not in res:

2123

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

2124

if x_forwarded_for_ip:

2125

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

2130

pr = sanitized_Request(info_dict['url'])

2131

self.cookiejar.add_cookie_header(pr)

2132

return pr.get_header('Cookie')

2133

2134

def _sort_thumbnails(self, thumbnails):

2135

thumbnails.sort(key=lambda t: (

2136

t.get('preference') if t.get('preference') is not None else -1,

2137

t.get('width') if t.get('width') is not None else -1,

2138

t.get('height') if t.get('height') is not None else -1,

2139

t.get('id') if t.get('id') is not None else '',

2140

t.get('url')))

2141

2142

def _sanitize_thumbnails(self, info_dict):

2143

thumbnails = info_dict.get('thumbnails')

2144

if thumbnails is None:

2145

thumbnail = info_dict.get('thumbnail')

2146

if thumbnail:

2147

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

if not thumbnails:

return

def check_thumbnails(thumbnails):

2152

for t in thumbnails:

2153

self.to_screen(f'[info] Testing thumbnail {t["id"]}')

2154

try:

2155

self.urlopen(HEADRequest(t['url']))

2156

except network_exceptions as err:

2157

self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')

continue

yield t

self._sort_thumbnails(thumbnails)

2162

for i, t in enumerate(thumbnails):

2163

if t.get('id') is None:

2164

t['id'] = '%d' % i

2165

if t.get('width') and t.get('height'):

2166

t['resolution'] = '%dx%d' % (t['width'], t['height'])

2167

t['url'] = sanitize_url(t['url'])

2168

2169

if self.params.get('check_formats') is True:

2170

info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)

2171

else:

2172

info_dict['thumbnails'] = thumbnails

2173

2174

def process_video_result(self, info_dict, download=True):

2175

assert info_dict.get('_type', 'video') == 'video'

2176

2177

if 'id' not in info_dict:

2178

raise ExtractorError('Missing "id" field in extractor result')

2179

if 'title' not in info_dict:

2180

raise ExtractorError('Missing "title" field in extractor result',

2181

video_id=info_dict['id'], ie=info_dict['extractor'])

2182

2183

def report_force_conversion(field, field_not, conversion):

2184

self.report_warning(

2185

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

2186

% (field, field_not, conversion))

2187

2188

def sanitize_string_field(info, string_field):

2189

field = info.get(string_field)

2190

if field is None or isinstance(field, compat_str):

2191

return

2192

report_force_conversion(string_field, 'a string', 'string')

2193

info[string_field] = compat_str(field)

2194

2195

def sanitize_numeric_fields(info):

2196

for numeric_field in self._NUMERIC_FIELDS:

2197

field = info.get(numeric_field)

2198

if field is None or isinstance(field, compat_numeric_types):

2199

continue

2200

report_force_conversion(numeric_field, 'numeric', 'int')

2201

info[numeric_field] = int_or_none(field)

2202

2203

sanitize_string_field(info_dict, 'id')

2204

sanitize_numeric_fields(info_dict)

2205

2206

if 'playlist' not in info_dict:

2207

# It isn't part of a playlist

2208

info_dict['playlist'] = None

2209

info_dict['playlist_index'] = None

2210

2211

self._sanitize_thumbnails(info_dict)

2212

2213

thumbnail = info_dict.get('thumbnail')

2214

thumbnails = info_dict.get('thumbnails')

2215

if thumbnail:

2216

info_dict['thumbnail'] = sanitize_url(thumbnail)

2217

elif thumbnails:

2218

info_dict['thumbnail'] = thumbnails[-1]['url']

2219

2220

if info_dict.get('display_id') is None and 'id' in info_dict:

2221

info_dict['display_id'] = info_dict['id']

2222

2223

if info_dict.get('duration') is not None:

2224

info_dict['duration_string'] = formatSeconds(info_dict['duration'])

2225

2226

for ts_key, date_key in (

2227

('timestamp', 'upload_date'),

2228

('release_timestamp', 'release_date'),

2229

):

2230

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2231

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2232

# see http://bugs.python.org/issue1646728)

2233

try:

2234

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2235

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2236

except (ValueError, OverflowError, OSError):

2237

pass

2238

2239

live_keys = ('is_live', 'was_live')

2240

live_status = info_dict.get('live_status')

2241

if live_status is None:

2242

for key in live_keys:

2243

if info_dict.get(key) is False:

2244

continue

2245

if info_dict.get(key):

2246

live_status = key

2247

break

2248

if all(info_dict.get(key) is False for key in live_keys):

2249

live_status = 'not_live'

2250

if live_status:

2251

info_dict['live_status'] = live_status

2252

for key in live_keys:

2253

if info_dict.get(key) is None:

2254

info_dict[key] = (live_status == key)

2255

2256

# Auto generate title fields corresponding to the *_number fields when missing

2257

# in order to always have clean titles. This is very common for TV series.

2258

for field in ('chapter', 'season', 'episode'):

2259

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2260

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2261

2262

for cc_kind in ('subtitles', 'automatic_captions'):

2263

cc = info_dict.get(cc_kind)

2264

if cc:

2265

for _, subtitle in cc.items():

2266

for subtitle_format in subtitle:

2267

if subtitle_format.get('url'):

2268

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2269

if subtitle_format.get('ext') is None:

2270

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2271

2272

automatic_captions = info_dict.get('automatic_captions')

2273

subtitles = info_dict.get('subtitles')

2274

2275

info_dict['requested_subtitles'] = self.process_subtitles(

2276

info_dict['id'], subtitles, automatic_captions)

2277

2278

if info_dict.get('formats') is None:

2279

# There's only one format available

2280

formats = [info_dict]

2281

else:

2282

formats = info_dict['formats']

2283

2284

info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)

2285

if not self.params.get('allow_unplayable_formats'):

2286

formats = [f for f in formats if not f.get('has_drm')]

2287

2288

if not formats:

2289

self.raise_no_formats(info_dict)

2290

2291

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2296

'there is an error in extractor')

2297

return False

2298

if isinstance(url, bytes):

2299

sanitize_string_field(f, 'url')

2300

return True

2301

2302

# Filter out malformed formats for better extraction robustness

2303

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2308

for i, format in enumerate(formats):

2309

sanitize_string_field(format, 'format_id')

2310

sanitize_numeric_fields(format)

2311

format['url'] = sanitize_url(format['url'])

2312

if not format.get('format_id'):

2313

format['format_id'] = compat_str(i)

2314

else:

2315

# Sanitize format_id from characters used in format selector expression

2316

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2317

format_id = format['format_id']

2318

if format_id not in formats_dict:

2319

formats_dict[format_id] = []

2320

formats_dict[format_id].append(format)

2321

2322

# Make sure all formats have unique format_id

2323

common_exts = set(itertools.chain(*self._format_selection_exts.values()))

2324

for format_id, ambiguous_formats in formats_dict.items():

2325

ambigious_id = len(ambiguous_formats) > 1

2326

for i, format in enumerate(ambiguous_formats):

2327

if ambigious_id:

2328

format['format_id'] = '%s-%d' % (format_id, i)

2329

if format.get('ext') is None:

2330

format['ext'] = determine_ext(format['url']).lower()

2331

# Ensure there is no conflict between id and ext in format selection

2332

# See https://github.com/yt-dlp/yt-dlp/issues/1282

2333

if format['format_id'] != format['ext'] and format['format_id'] in common_exts:

2334

format['format_id'] = 'f%s' % format['format_id']

2335

2336

for i, format in enumerate(formats):

2337

if format.get('format') is None:

2338

format['format'] = '{id} - {res}{note}'.format(

2339

id=format['format_id'],

2340

res=self.format_resolution(format),

2341

note=format_field(format, 'format_note', ' (%s)'),

2342

)

2343

if format.get('protocol') is None:

2344

format['protocol'] = determine_protocol(format)

2345

if format.get('resolution') is None:

2346

format['resolution'] = self.format_resolution(format, default=None)

2347

if format.get('dynamic_range') is None and format.get('vcodec') != 'none':

2348

format['dynamic_range'] = 'SDR'

2349

if (info_dict.get('duration') and format.get('tbr')

2350

and not format.get('filesize') and not format.get('filesize_approx')):

2351

format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)

2352

2353

# Add HTTP headers, so that external programs can use them from the

2354

# json output

2355

full_format_info = info_dict.copy()

2356

full_format_info.update(format)

2357

format['http_headers'] = self._calc_headers(full_format_info)

2358

# Remove private housekeeping stuff

2359

if '__x_forwarded_for_ip' in info_dict:

2360

del info_dict['__x_forwarded_for_ip']

2361

2362

# TODO Central sorting goes here

2363

2364

if self.params.get('check_formats') is True:

2365

formats = LazyList(self._check_formats(formats[::-1]), reverse=True)

2366

2367

if not formats or formats[0] is not info_dict:

2368

# only set the 'formats' fields if the original info_dict list them

2369

# otherwise we end up with a circular reference, the first (and unique)

2370

# element in the 'formats' field in info_dict is info_dict itself,

2371

# which can't be exported to json

2372

info_dict['formats'] = formats

2373

2374

info_dict, _ = self.pre_process(info_dict)

2375

2376

# The pre-processors may have modified the formats

2377

formats = info_dict.get('formats', [info_dict])

2378

2379

if self.params.get('list_thumbnails'):

2380

self.list_thumbnails(info_dict)

2381

if self.params.get('listformats'):

2382

if not info_dict.get('formats') and not info_dict.get('url'):

2383

self.to_screen('%s has no formats' % info_dict['id'])

2384

else:

2385

self.list_formats(info_dict)

2386

if self.params.get('listsubtitles'):

2387

if 'automatic_captions' in info_dict:

2388

self.list_subtitles(

2389

info_dict['id'], automatic_captions, 'automatic captions')

2390

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2391

list_only = self.params.get('simulate') is None and (

2392

self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))

2393

if list_only:

2394

# Without this printing, -F --print-json will not work

2395

self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)

2396

return

2397

2398

format_selector = self.format_selector

2399

if format_selector is None:

2400

req_format = self._default_format_spec(info_dict, download=download)

2401

self.write_debug('Default format spec: %s' % req_format)

2402

format_selector = self.build_format_selector(req_format)

2403

2404

# While in format selection we may need to have an access to the original

2405

# format set in order to calculate some metrics or do some processing.

2406

# For now we need to be able to guess whether original formats provided

2407

# by extractor are incomplete or not (i.e. whether extractor provides only

2408

# video-only or audio-only formats) for proper formats selection for

2409

# extractors with such incomplete formats (see

2410

# https://github.com/ytdl-org/youtube-dl/pull/5556).

2411

# Since formats may be filtered during format selection and may not match

2412

# the original formats the results may be incorrect. Thus original formats

2413

# or pre-calculated metrics should be passed to format selection routines

2414

# as well.

2415

# We will pass a context object containing all necessary additional data

2416

# instead of just formats.

2417

# This fixes incorrect format selection issue (see

2418

# https://github.com/ytdl-org/youtube-dl/issues/10083).

2419

incomplete_formats = (

2420

# All formats are video-only or

2421

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2422

# all formats are audio-only

2423

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

2428

}

2429

2430

formats_to_download = list(format_selector(ctx))

2431

if not formats_to_download:

2432

if not self.params.get('ignore_no_formats_error'):

2433

raise ExtractorError('Requested format is not available', expected=True,

2434

video_id=info_dict['id'], ie=info_dict['extractor'])

2435

else:

2436

self.report_warning('Requested format is not available')

2437

# Process what we can, even without any available formats.

2438

self.process_info(dict(info_dict))

2439

elif download:

2440

self.to_screen(

2441

'[info] %s: Downloading %d format(s): %s' % (

2442

info_dict['id'], len(formats_to_download),

2443

", ".join([f['format_id'] for f in formats_to_download])))

2444

for fmt in formats_to_download:

2445

new_info = dict(info_dict)

2446

# Save a reference to the original info_dict so that it can be modified in process_info if needed

2447

new_info['__original_infodict'] = info_dict

2448

new_info.update(fmt)

2449

self.process_info(new_info)

2450

# We update the info dict with the selected best quality format (backwards compatibility)

2451

if formats_to_download:

2452

info_dict.update(formats_to_download[-1])

2453

return info_dict

2454

2455

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2456

"""Select the requested subtitles and their format"""

2457

available_subs = {}

2458

if normal_subtitles and self.params.get('writesubtitles'):

2459

available_subs.update(normal_subtitles)

2460

if automatic_captions and self.params.get('writeautomaticsub'):

2461

for lang, cap_info in automatic_captions.items():

2462

if lang not in available_subs:

2463

available_subs[lang] = cap_info

2464

2465

if (not self.params.get('writesubtitles') and not

2466

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = available_subs.keys()

2471

if self.params.get('allsubtitles', False):

2472

requested_langs = all_sub_langs

2473

elif self.params.get('subtitleslangs', False):

2474

# A list is used so that the order of languages will be the same as

2475

# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041

2476

requested_langs = []

2477

for lang_re in self.params.get('subtitleslangs'):

2478

if lang_re == 'all':

2479

requested_langs.extend(all_sub_langs)

2480

continue

2481

discard = lang_re[0] == '-'

2482

if discard:

2483

lang_re = lang_re[1:]

2484

current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)

2485

if discard:

2486

for lang in current_langs:

2487

while lang in requested_langs:

2488

requested_langs.remove(lang)

2489

else:

2490

requested_langs.extend(current_langs)

2491

requested_langs = orderedSet(requested_langs)

2492

elif 'en' in available_subs:

2493

requested_langs = ['en']

2494

else:

2495

requested_langs = [list(all_sub_langs)[0]]

2496

if requested_langs:

2497

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2498

2499

formats_query = self.params.get('subtitlesformat', 'best')

2500

formats_preference = formats_query.split('/') if formats_query else []

2501

subs = {}

2502

for lang in requested_langs:

2503

formats = available_subs.get(lang)

2504

if formats is None:

2505

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

2506

continue

2507

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2519

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

2524

def print_mandatory(field, actual_field=None):

2525

if actual_field is None:

2526

actual_field = field

2527

if (self.params.get('force%s' % field, False)

2528

and (not incomplete or info_dict.get(actual_field) is not None)):

2529

self.to_stdout(info_dict[actual_field])

2530

2531

def print_optional(field):

2532

if (self.params.get('force%s' % field, False)

2533

and info_dict.get(field) is not None):

2534

self.to_stdout(info_dict[field])

2535

2536

info_dict = info_dict.copy()

2537

if filename is not None:

2538

info_dict['filename'] = filename

2539

if info_dict.get('requested_formats') is not None:

2540

# For RTMP URLs, also include the playpath

2541

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2542

elif 'url' in info_dict:

2543

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2544

2545

if self.params.get('forceprint') or self.params.get('forcejson'):

2546

self.post_extract(info_dict)

2547

for tmpl in self.params.get('forceprint', []):

2548

mobj = re.match(r'\w+(=?)$', tmpl)

2549

if mobj and mobj.group(1):

2550

tmpl = f'{tmpl[:-1]} = %({tmpl[:-1]})s'

2551

elif mobj:

2552

tmpl = '%({})s'.format(tmpl)

2553

self.to_stdout(self.evaluate_outtmpl(tmpl, info_dict))

2554

2555

print_mandatory('title')

2556

print_mandatory('id')

2557

print_mandatory('url', 'urls')

2558

print_optional('thumbnail')

2559

print_optional('description')

2560

print_optional('filename')

2561

if self.params.get('forceduration') and info_dict.get('duration') is not None:

2562

self.to_stdout(formatSeconds(info_dict['duration']))

2563

print_mandatory('format')

2564

2565

if self.params.get('forcejson'):

2566

self.to_stdout(json.dumps(self.sanitize_info(info_dict)))

2567

2568

def dl(self, name, info, subtitle=False, test=False):

2569

if not info.get('url'):

2570

self.raise_no_formats(info, True)

2571

2572

if test:

2573

verbose = self.params.get('verbose')

2574

params = {

2575

'test': True,

2576

'quiet': self.params.get('quiet') or not verbose,

2577

'verbose': verbose,

2578

'noprogress': not verbose,

2579

'nopart': True,

2580

'skip_unavailable_fragments': False,

2581

'keep_fragments': False,

2582

'overwrites': True,

2583

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)

2588

if not test:

2589

for ph in self._progress_hooks:

2590

fd.add_progress_hook(ph)

2591

urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])

2592

self.write_debug('Invoking downloader on "%s"' % urls)

2593

2594

new_info = copy.deepcopy(self._copy_infodict(info))

2595

if new_info.get('http_headers') is None:

2596

new_info['http_headers'] = self._calc_headers(new_info)

2597

return fd.download(name, new_info, subtitle)

2598

2599

def process_info(self, info_dict):

2600

"""Process a single resolved IE result."""

2601

2602

assert info_dict.get('_type', 'video') == 'video'

2603

2604

max_downloads = self.params.get('max_downloads')

2605

if max_downloads is not None:

2606

if self._num_downloads >= int(max_downloads):

2607

raise MaxDownloadsReached()

2608

2609

# TODO: backward compatibility, to be removed

2610

info_dict['fulltitle'] = info_dict['title']

2611

2612

if 'format' not in info_dict and 'ext' in info_dict:

2613

info_dict['format'] = info_dict['ext']

2614

2615

if self._match_entry(info_dict) is not None:

2616

return

2617

2618

self.post_extract(info_dict)

2619

self._num_downloads += 1

2620

2621

# info_dict['_filename'] needs to be set for backward compatibility

2622

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2623

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2628

2629

if self.params.get('simulate'):

2630

if self.params.get('force_write_download_archive', False):

2631

self.record_download_archive(info_dict)

2632

# Do nothing else if in simulate mode

2633

return

2634

2635

if full_filename is None:

2636

return

2637

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2638

return

2639

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2640

return

2641

2642

if self._write_description('video', info_dict,

2643

self.prepare_filename(info_dict, 'description')) is None:

2644

return

2645

2646

sub_files = self._write_subtitles(info_dict, temp_filename)

2647

if sub_files is None:

2648

return

2649

files_to_move.update(dict(sub_files))

2650

2651

thumb_files = self._write_thumbnails(

2652

'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))

2653

if thumb_files is None:

2654

return

2655

files_to_move.update(dict(thumb_files))

2656

2657

infofn = self.prepare_filename(info_dict, 'infojson')

2658

_infojson_written = self._write_info_json('video', info_dict, infofn)

2659

if _infojson_written:

2660

info_dict['infojson_filename'] = infofn

2661

# For backward compatability, even though it was a private field

2662

info_dict['__infojson_filename'] = infofn

2663

elif _infojson_written is None:

2664

return

2665

2666

# Note: Annotations are deprecated

2667

annofn = None

2668

if self.params.get('writeannotations', False):

2669

annofn = self.prepare_filename(info_dict, 'annotation')

2670

if annofn:

2671

if not self._ensure_dir_exists(encodeFilename(annofn)):

2672

return

2673

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2674

self.to_screen('[info] Video annotations are already present')

2675

elif not info_dict.get('annotations'):

2676

self.report_warning('There are no annotations to write.')

2677

else:

2678

try:

2679

self.to_screen('[info] Writing video annotations to: ' + annofn)

2680

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2681

annofile.write(info_dict['annotations'])

2682

except (KeyError, TypeError):

2683

self.report_warning('There are no annotations to write.')

2684

except (OSError, IOError):

2685

self.report_error('Cannot write annotations file: ' + annofn)

2686

return

2687

2688

# Write internet shortcut files

2689

def _write_link_file(link_type):

2690

if 'webpage_url' not in info_dict:

2691

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2692

return False

2693

linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))

2694

if not self._ensure_dir_exists(encodeFilename(linkfn)):

2695

return False

2696

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2697

self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')

2698

return True

2699

try:

2700

self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')

2701

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',

2702

newline='\r\n' if link_type == 'url' else '\n') as linkfile:

2703

template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}

2704

if link_type == 'desktop':

2705

template_vars['filename'] = linkfn[:-(len(link_type) + 1)]

2706

linkfile.write(LINK_TEMPLATES[link_type] % template_vars)

2707

except (OSError, IOError):

2708

self.report_error(f'Cannot write internet shortcut {linkfn}')

return False

return True

write_links = {

'url': self.params.get('writeurllink'),

2714

'webloc': self.params.get('writewebloclink'),

2715

'desktop': self.params.get('writedesktoplink'),

2716

}

2717

if self.params.get('writelink'):

2718

link_type = ('webloc' if sys.platform == 'darwin'

2719

else 'desktop' if sys.platform.startswith('linux')

2720

else 'url')

2721

write_links[link_type] = True

2722

2723

if any(should_write and not _write_link_file(link_type)

2724

for link_type, should_write in write_links.items()):

return

try:

info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2729

except PostProcessingError as err:

2730

self.report_error('Preprocessing: %s' % str(err))

2731

return

2732

2733

must_record_download_archive = False

2734

if self.params.get('skip_download', False):

2735

info_dict['filepath'] = temp_filename

2736

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2737

info_dict['__files_to_move'] = files_to_move

2738

info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)

2739

else:

2740

# Download

2741

info_dict.setdefault('__postprocessors', [])

2742

try:

2743

2744

def existing_file(*filepaths):

2745

ext = info_dict.get('ext')

2746

final_ext = self.params.get('final_ext', ext)

2747

existing_files = []

2748

for file in orderedSet(filepaths):

2749

if final_ext != ext:

2750

converted = replace_extension(file, final_ext, ext)

2751

if os.path.exists(encodeFilename(converted)):

2752

existing_files.append(converted)

2753

if os.path.exists(encodeFilename(file)):

2754

existing_files.append(file)

2755

2756

if not existing_files or self.params.get('overwrites', False):

2757

for file in orderedSet(existing_files):

2758

self.report_file_delete(file)

2759

os.remove(encodeFilename(file))

2760

return None

2761

2762

info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]

2763

return existing_files[0]

2764

2765

success = True

2766

if info_dict.get('requested_formats') is not None:

2767

2768

def compatible_formats(formats):

2769

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2770

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2771

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2772

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2777

COMPATIBLE_EXTS = (

2778

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2779

set(('webm',)),

2780

)

2781

for ext_sets in COMPATIBLE_EXTS:

2782

if ext_sets.issuperset(exts):

2783

return True

2784

# TODO: Check acodec/vcodec

2785

return False

2786

2787

requested_formats = info_dict['requested_formats']

2788

old_ext = info_dict['ext']

2789

if self.params.get('merge_output_format') is None:

2790

if not compatible_formats(requested_formats):

2791

info_dict['ext'] = 'mkv'

2792

self.report_warning(

2793

'Requested formats are incompatible for merge and will be merged into mkv')

2794

if (info_dict['ext'] == 'webm'

2795

and info_dict.get('thumbnails')

2796

# check with type instead of pp_key, __name__, or isinstance

2797

# since we dont want any custom PPs to trigger this

2798

and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):

2799

info_dict['ext'] = 'mkv'

2800

self.report_warning(

2801

'webm doesn\'t support embedding a thumbnail, mkv will be used')

2802

new_ext = info_dict['ext']

2803

2804

def correct_ext(filename, ext=new_ext):

2805

if filename == '-':

2806

return filename

2807

filename_real_ext = os.path.splitext(filename)[1][1:]

2808

filename_wo_ext = (

2809

os.path.splitext(filename)[0]

2810

if filename_real_ext in (old_ext, new_ext)

2811

else filename)

2812

return '%s.%s' % (filename_wo_ext, ext)

2813

2814

# Ensure filename always has a correct extension for successful merge

2815

full_filename = correct_ext(full_filename)

2816

temp_filename = correct_ext(temp_filename)

2817

dl_filename = existing_file(full_filename, temp_filename)

2818

info_dict['__real_download'] = False

2819

2820

if dl_filename is not None:

2821

self.report_file_already_downloaded(dl_filename)

2822

elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):

2823

info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)

2824

success, real_download = self.dl(temp_filename, info_dict)

2825

info_dict['__real_download'] = real_download

2826

else:

2827

downloaded = []

2828

merger = FFmpegMergerPP(self)

2829

if self.params.get('allow_unplayable_formats'):

2830

self.report_warning(

2831

'You have requested merging of multiple formats '

2832

'while also allowing unplayable formats to be downloaded. '

2833

'The formats won\'t be merged to prevent data corruption.')

2834

elif not merger.available:

2835

self.report_warning(

2836

'You have requested merging of multiple formats but ffmpeg is not installed. '

2837

'The formats won\'t be merged.')

2838

2839

if temp_filename == '-':

2840

reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)

2841

else 'but the formats are incompatible for simultaneous download' if merger.available

2842

else 'but ffmpeg is not installed')

2843

self.report_warning(

2844

f'You have requested downloading multiple formats to stdout {reason}. '

2845

'The formats will be streamed one after the other')

2846

fname = temp_filename

2847

for f in requested_formats:

2848

new_info = dict(info_dict)

2849

del new_info['requested_formats']

2850

new_info.update(f)

2851

if temp_filename != '-':

2852

fname = prepend_extension(

2853

correct_ext(temp_filename, new_info['ext']),

2854

'f%s' % f['format_id'], new_info['ext'])

2855

if not self._ensure_dir_exists(fname):

2856

return

2857

f['filepath'] = fname

2858

downloaded.append(fname)

2859

partial_success, real_download = self.dl(fname, new_info)

2860

info_dict['__real_download'] = info_dict['__real_download'] or real_download

2861

success = success and partial_success

2862

if merger.available and not self.params.get('allow_unplayable_formats'):

2863

info_dict['__postprocessors'].append(merger)

2864

info_dict['__files_to_merge'] = downloaded

2865

# Even if there were no downloads, it is being merged only now

2866

info_dict['__real_download'] = True

2867

else:

2868

for file in downloaded:

2869

files_to_move[file] = None

2870

else:

2871

# Just a single file

2872

dl_filename = existing_file(full_filename, temp_filename)

2873

if dl_filename is None or dl_filename == temp_filename:

2874

# dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.

2875

# So we should try to resume the download

2876

success, real_download = self.dl(temp_filename, info_dict)

2877

info_dict['__real_download'] = real_download

2878

else:

2879

self.report_file_already_downloaded(dl_filename)

2880

2881

dl_filename = dl_filename or temp_filename

2882

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2883

2884

except network_exceptions as err:

2885

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2886

return

2887

except (OSError, IOError) as err:

2888

raise UnavailableVideoError(err)

2889

except (ContentTooShortError, ) as err:

2890

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2891

return

2892

2893

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

2898

vid = info_dict['id']

2899

2900

if fixup_policy in ('ignore', 'never'):

2901

return

2902

elif fixup_policy == 'warn':

2903

do_fixup = False

2904

elif fixup_policy != 'force':

2905

assert fixup_policy in ('detect_or_warn', None)

2906

if not info_dict.get('__real_download'):

2907

do_fixup = False

2908

2909

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

2918

else:

2919

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

2920

2921

stretched_ratio = info_dict.get('stretched_ratio')

2922

ffmpeg_fixup(

2923

stretched_ratio not in (1, None),

2924

f'Non-uniform pixel ratio {stretched_ratio}',

2925

FFmpegFixupStretchedPP)

2926

2927

ffmpeg_fixup(

2928

(info_dict.get('requested_formats') is None

2929

and info_dict.get('container') == 'm4a_dash'

2930

and info_dict.get('ext') == 'm4a'),

2931

'writing DASH m4a. Only some players support this container',

2932

FFmpegFixupM4aPP)

2933

2934

downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None

2935

downloader = downloader.__name__ if downloader else None

2936

ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',

2937

'Possible MPEG-TS in MP4 container or malformed AAC timestamps',

2938

FFmpegFixupM3u8PP)

2939

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)

2940

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

info_dict = self.post_process(dl_filename, info_dict, files_to_move)

2945

except PostProcessingError as err:

2946

self.report_error('Postprocessing: %s' % str(err))

2947

return

2948

try:

2949

for ph in self._post_hooks:

2950

ph(info_dict['filepath'])

2951

except Exception as err:

2952

self.report_error('post hooks: %s' % str(err))

2953

return

2954

must_record_download_archive = True

2955

2956

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2957

self.record_download_archive(info_dict)

2958

max_downloads = self.params.get('max_downloads')

2959

if max_downloads is not None and self._num_downloads >= int(max_downloads):

2960

raise MaxDownloadsReached()

2961

2962

def __download_wrapper(self, func):

2963

@functools.wraps(func)

2964

def wrapper(*args, **kwargs):

2965

try:

2966

res = func(*args, **kwargs)

2967

except UnavailableVideoError as e:

2968

self.report_error(e)

2969

except DownloadCancelled as e:

2970

self.to_screen(f'[info] {e}')

2971

raise

2972

else:

2973

if self.params.get('dump_single_json', False):

2974

self.post_extract(res)

2975

self.to_stdout(json.dumps(self.sanitize_info(res)))

2976

return wrapper

2977

2978

def download(self, url_list):

2979

"""Download a given list of URLs."""

2980

url_list = variadic(url_list) # Passing a single URL is a common mistake

2981

outtmpl = self.outtmpl_dict['default']

2982

if (len(url_list) > 1

2983

and outtmpl != '-'

2984

and '%' not in outtmpl

2985

and self.params.get('max_downloads') != 1):

2986

raise SameFileError(outtmpl)

2987

2988

for url in url_list:

2989

self.__download_wrapper(self.extract_info)(

2990

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2991

2992

return self._download_retcode

2993

2994

def download_with_info_file(self, info_filename):

2995

with contextlib.closing(fileinput.FileInput(

2996

[info_filename], mode='r',

2997

openhook=fileinput.hook_encoded('utf-8'))) as f:

2998

# FileInput doesn't have a read method, we can't call json.load

2999

info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

3000

try:

3001

self.__download_wrapper(self.process_ie_result)(info, download=True)

3002

except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:

3003

if not isinstance(e, EntryNotInPlaylist):

3004

self.to_stderr('\r')

3005

webpage_url = info.get('webpage_url')

3006

if webpage_url is not None:

3007

self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')

3008

return self.download([webpage_url])

3009

else:

3010

raise

3011

return self._download_retcode

3012

3013

@staticmethod

3014

def sanitize_info(info_dict, remove_private_keys=False):

3015

''' Sanitize the infodict for converting to json '''

3016

if info_dict is None:

3017

return info_dict

3018

info_dict.setdefault('epoch', int(time.time()))

3019

remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict

3020

keep_keys = ['_type'] # Always keep this to facilitate load-info-json

3021

if remove_private_keys:

3022

remove_keys |= {

3023

'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',

3024

'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',

3025

}

3026

empty_values = (None, {}, [], set(), tuple())

3027

reject = lambda k, v: k not in keep_keys and (

3028

k.startswith('_') or k in remove_keys or v in empty_values)

3029

else:

3030

reject = lambda k, v: k in remove_keys

3031

filter_fn = lambda obj: (

3032

list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))

3033

else obj if not isinstance(obj, dict)

3034

else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))

3035

return filter_fn(info_dict)

3036

3037

@staticmethod

3038

def filter_requested_info(info_dict, actually_filter=True):

3039

''' Alias of sanitize_info for backward compatibility '''

3040

return YoutubeDL.sanitize_info(info_dict, actually_filter)

3041

3042

def run_pp(self, pp, infodict):

3043

files_to_delete = []

3044

if '__files_to_move' not in infodict:

3045

infodict['__files_to_move'] = {}

3046

try:

3047

files_to_delete, infodict = pp.run(infodict)

3048

except PostProcessingError as e:

3049

# Must be True and not 'only_download'

3050

if self.params.get('ignoreerrors') is True:

self.report_error(e)

return infodict

raise

if not files_to_delete:

3056

return infodict

3057

if self.params.get('keepvideo', False):

3058

for f in files_to_delete:

3059

infodict['__files_to_move'].setdefault(f, '')

3060

else:

3061

for old_filename in set(files_to_delete):

3062

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

3063

try:

3064

os.remove(encodeFilename(old_filename))

3065

except (IOError, OSError):

3066

self.report_warning('Unable to remove downloaded original file')

3067

if old_filename in infodict['__files_to_move']:

3068

del infodict['__files_to_move'][old_filename]

return infodict

@staticmethod

def post_extract(info_dict):

3073

def actual_post_extract(info_dict):

3074

if info_dict.get('_type') in ('playlist', 'multi_video'):

3075

for video_dict in info_dict.get('entries', {}):

3076

actual_post_extract(video_dict or {})

3077

return

3078

3079

post_extractor = info_dict.get('__post_extractor') or (lambda: {})

3080

extra = post_extractor().items()

3081

info_dict.update(extra)

3082

info_dict.pop('__post_extractor', None)

3083

3084

original_infodict = info_dict.get('__original_infodict') or {}

3085

original_infodict.update(extra)

3086

original_infodict.pop('__post_extractor', None)

3087

3088

actual_post_extract(info_dict or {})

3089

3090

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

3091

info = dict(ie_info)

3092

info['__files_to_move'] = files_to_move or {}

3093

for pp in self._pps[key]:

3094

info = self.run_pp(pp, info)

3095

return info, info.pop('__files_to_move', None)

3096

3097

def post_process(self, filename, ie_info, files_to_move=None):

3098

"""Run all the postprocessors on the given file."""

3099

info = dict(ie_info)

3100

info['filepath'] = filename

3101

info['__files_to_move'] = files_to_move or {}

3102

3103

for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:

3104

info = self.run_pp(pp, info)

3105

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

3106

del info['__files_to_move']

3107

for pp in self._pps['after_move']:

3108

info = self.run_pp(pp, info)

3109

return info

3110

3111

def _make_archive_id(self, info_dict):

3112

video_id = info_dict.get('id')

3113

if not video_id:

3114

return

3115

# Future-proof against any change in case

3116

# and backwards compatibility with prior versions

3117

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

3118

if extractor is None:

3119

url = str_or_none(info_dict.get('url'))

3120

if not url:

3121

return

3122

# Try to find matching extractor for the URL and take its ie_key

3123

for ie_key, ie in self._ies.items():

if ie.suitable(url):

extractor = ie_key

break

else:

return

return '%s %s' % (extractor.lower(), video_id)

3130

3131

def in_download_archive(self, info_dict):

3132

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

3137

if not vid_id:

3138

return False # Incomplete video information

3139

3140

return vid_id in self.archive

3141

3142

def record_download_archive(self, info_dict):

3143

fn = self.params.get('download_archive')

3144

if fn is None:

3145

return

3146

vid_id = self._make_archive_id(info_dict)

3147

assert vid_id

3148

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

3149

archive_file.write(vid_id + '\n')

3150

self.archive.add(vid_id)

3151

3152

@staticmethod

3153

def format_resolution(format, default='unknown'):

3154

if format.get('vcodec') == 'none' and format.get('acodec') != 'none':

3155

return 'audio only'

3156

if format.get('resolution') is not None:

3157

return format['resolution']

3158

if format.get('width') and format.get('height'):

3159

return '%dx%d' % (format['width'], format['height'])

3160

elif format.get('height'):

3161

return '%sp' % format['height']

3162

elif format.get('width'):

3163

return '%dx?' % format['width']

3164

return default

3165

3166

def _format_note(self, fdict):

3167

res = ''

3168

if fdict.get('ext') in ['f4f', 'f4m']:

3169

res += '(unsupported) '

3170

if fdict.get('language'):

3171

if res:

3172

res += ' '

3173

res += '[%s] ' % fdict['language']

3174

if fdict.get('format_note') is not None:

3175

res += fdict['format_note'] + ' '

3176

if fdict.get('tbr') is not None:

3177

res += '%4dk ' % fdict['tbr']

3178

if fdict.get('container') is not None:

3179

if res:

3180

res += ', '

3181

res += '%s container' % fdict['container']

3182

if (fdict.get('vcodec') is not None

3183

and fdict.get('vcodec') != 'none'):

3184

if res:

3185

res += ', '

3186

res += fdict['vcodec']

3187

if fdict.get('vbr') is not None:

3188

res += '@'

3189

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

3190

res += 'video@'

3191

if fdict.get('vbr') is not None:

3192

res += '%4dk' % fdict['vbr']

3193

if fdict.get('fps') is not None:

3194

if res:

3195

res += ', '

3196

res += '%sfps' % fdict['fps']

3197

if fdict.get('acodec') is not None:

3198

if res:

3199

res += ', '

3200

if fdict['acodec'] == 'none':

3201

res += 'video only'

3202

else:

3203

res += '%-5s' % fdict['acodec']

3204

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

3209

res += '@%3dk' % fdict['abr']

3210

if fdict.get('asr') is not None:

3211

res += ' (%5dHz)' % fdict['asr']

3212

if fdict.get('filesize') is not None:

3213

if res:

3214

res += ', '

3215

res += format_bytes(fdict['filesize'])

3216

elif fdict.get('filesize_approx') is not None:

3217

if res:

3218

res += ', '

3219

res += '~' + format_bytes(fdict['filesize_approx'])

3220

return res

3221

3222

def _list_format_headers(self, *headers):

3223

if self.params.get('listformats_table', True) is not False:

3224

return [self._format_screen(header, self.Styles.HEADERS) for header in headers]

3225

return headers

3226

3227

def list_formats(self, info_dict):

3228

formats = info_dict.get('formats', [info_dict])

3229

new_format = self.params.get('listformats_table', True) is not False

3230

if new_format:

3231

delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)

3232

table = [

3233

[

3234

self._format_screen(format_field(f, 'format_id'), self.Styles.ID),

3235

format_field(f, 'ext'),

3236

format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),

3237

format_field(f, 'fps', '\t%d'),

3238

format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),

3239

delim,

3240

format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),

3241

format_field(f, 'tbr', '\t%dk'),

3242

shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),

3243

delim,

3244

format_field(f, 'vcodec', default='unknown').replace(

3245

'none',

3246

'images' if f.get('acodec') == 'none'

3247

else self._format_screen('audio only', self.Styles.SUPPRESS)),

3248

format_field(f, 'vbr', '\t%dk'),

3249

format_field(f, 'acodec', default='unknown').replace(

3250

'none',

3251

'' if f.get('vcodec') == 'none'

3252

else self._format_screen('video only', self.Styles.SUPPRESS)),

3253

format_field(f, 'abr', '\t%dk'),

3254

format_field(f, 'asr', '\t%dHz'),

3255

join_nonempty(

3256

self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,

3257

format_field(f, 'language', '[%s]'),

3258

join_nonempty(

3259

format_field(f, 'format_note'),

3260

format_field(f, 'container', ignore=(None, f.get('ext'))),

3261

delim=', '),

3262

delim=' '),

3263

] for f in formats if f.get('preference') is None or f['preference'] >= -1000]

3264

header_line = self._list_format_headers(

3265

'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',

3266

delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')

else:

table = [

[

format_field(f, 'format_id'),

3271

format_field(f, 'ext'),

3272

self.format_resolution(f),

3273

self._format_note(f)]

3274

for f in formats

3275

if f.get('preference') is None or f['preference'] >= -1000]

3276

header_line = ['format code', 'extension', 'resolution', 'note']

3277

3278

self.to_screen(

3279

'[info] Available formats for %s:' % info_dict['id'])

3280

self.to_stdout(render_table(

3281

header_line, table,

3282

extra_gap=(0 if new_format else 1),

3283

hide_empty=new_format,

3284

delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))

3285

3286

def list_thumbnails(self, info_dict):

3287

thumbnails = list(info_dict.get('thumbnails'))

3288

if not thumbnails:

3289

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

3294

self.to_stdout(render_table(

3295

self._list_format_headers('ID', 'Width', 'Height', 'URL'),

3296

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

3297

3298

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3299

if not subtitles:

3300

self.to_screen('%s has no %s' % (video_id, name))

3301

return

3302

self.to_screen(

3303

'Available %s for %s:' % (name, video_id))

3304

3305

def _row(lang, formats):

3306

exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))

3307

if len(set(names)) == 1:

3308

names = [] if names[0] == 'unknown' else names[:1]

3309

return [lang, ', '.join(names), ', '.join(exts)]

3310

3311

self.to_stdout(render_table(

3312

self._list_format_headers('Language', 'Name', 'Formats'),

3313

[_row(lang, formats) for lang, formats in subtitles.items()],

3314

hide_empty=True))

3315

3316

def urlopen(self, req):

3317

""" Start an HTTP download """

3318

if isinstance(req, compat_basestring):

3319

req = sanitized_Request(req)

3320

return self._opener.open(req, timeout=self._socket_timeout)

3321

3322

def print_debug_header(self):

3323

if not self.params.get('verbose'):

3324

return

3325

3326

def get_encoding(stream):

3327

ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)

3328

if not supports_terminal_sequences(stream):

ret += ' (No ANSI)'

return ret

encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (

3333

locale.getpreferredencoding(),

3334

sys.getfilesystemencoding(),

3335

get_encoding(self._screen_file), get_encoding(self._err_file),

3336

self.get_encoding())

3337

3338

logger = self.params.get('logger')

3339

if logger:

3340

write_debug = lambda msg: logger.debug(f'[debug] {msg}')

3341

write_debug(encoding_str)

3342

else:

3343

write_string(f'[debug] {encoding_str}\n', encoding=None)

3344

write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')

3345

3346

source = detect_variant()

3347

write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))

3348

if not _LAZY_LOADER:

3349

if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):

3350

write_debug('Lazy loading extractors is forcibly disabled')

3351

else:

3352

write_debug('Lazy loading extractors is disabled')

3353

if plugin_extractors or plugin_postprocessors:

3354

write_debug('Plugins: %s' % [

3355

'%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')

3356

for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])

3357

if self.params.get('compat_opts'):

3358

write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))

3359

try:

3360

sp = Popen(

3361

['git', 'rev-parse', '--short', 'HEAD'],

3362

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3363

cwd=os.path.dirname(os.path.abspath(__file__)))

3364

out, err = sp.communicate_or_kill()

3365

out = out.decode().strip()

3366

if re.match('[0-9a-f]+', out):

3367

write_debug('Git HEAD: %s' % out)

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

3375

impl_name = platform.python_implementation()

3376

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3377

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3378

return impl_name

3379

3380

write_debug('Python version %s (%s %s) - %s' % (

3381

platform.python_version(),

3382

python_implementation(),

3383

platform.architecture()[0],

3384

platform_name()))

3385

3386

exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)

3387

ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}

3388

if ffmpeg_features:

3389

exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)

3390

3391

exe_versions['rtmpdump'] = rtmpdump_version()

3392

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3393

exe_str = ', '.join(

3394

f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v

3395

) or 'none'

3396

write_debug('exe versions: %s' % exe_str)

3397

3398

from .downloader.websocket import has_websockets

3399

from .postprocessor.embedthumbnail import has_mutagen

3400

from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE

3401

3402

lib_str = join_nonempty(

3403

compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],

3404

KEYRING_AVAILABLE and 'keyring',

3405

has_mutagen and 'mutagen',

3406

SQLITE_AVAILABLE and 'sqlite',

3407

has_websockets and 'websockets',

3408

delim=', ') or 'none'

3409

write_debug('Optional libraries: %s' % lib_str)

3410

3411

proxy_map = {}

3412

for handler in self._opener.handlers:

3413

if hasattr(handler, 'proxies'):

3414

proxy_map.update(handler.proxies)

3415

write_debug(f'Proxy map: {proxy_map}')

3416

3417

# Not implemented

3418

if False and self.params.get('call_home'):

3419

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3420

write_debug('Public IP address: %s' % ipaddr)

3421

latest_version = self.urlopen(

3422

'https://yt-dl.org/latest/version').read().decode('utf-8')

3423

if version_tuple(latest_version) > version_tuple(__version__):

3424

self.report_warning(

3425

'You are using an outdated version (newest version: %s)! '

3426

'See https://yt-dl.org/update if you need help updating.' %

3427

latest_version)

3428

3429

def _setup_opener(self):

3430

timeout_val = self.params.get('socket_timeout')

3431

self._socket_timeout = 20 if timeout_val is None else float(timeout_val)

3432

3433

opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')

3434

opts_cookiefile = self.params.get('cookiefile')

3435

opts_proxy = self.params.get('proxy')

3436

3437

self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)

3438

3439

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3440

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3445

else:

3446

proxies = compat_urllib_request.getproxies()

3447

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3448

if 'http' in proxies and 'https' not in proxies:

3449

proxies['https'] = proxies['http']

3450

proxy_handler = PerRequestProxyHandler(proxies)

3451

3452

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3453

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3454

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3455

redirect_handler = YoutubeDLRedirectHandler()

3456

data_handler = compat_urllib_request_DataHandler()

3457

3458

# When passing our own FileHandler instance, build_opener won't add the

3459

# default FileHandler and allows us to disable the file protocol, which

3460

# can be used for malicious purposes (see

3461

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3462

file_handler = compat_urllib_request.FileHandler()

3463

3464

def file_open(*args, **kwargs):

3465

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3466

file_handler.file_open = file_open

3467

3468

opener = compat_urllib_request.build_opener(

3469

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3470

3471

# Delete the default user-agent header, which would otherwise apply in

3472

# cases where our custom HTTP handler doesn't come into play

3473

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3474

opener.addheaders = []

3475

self._opener = opener

3476

3477

def encode(self, s):

3478

if isinstance(s, bytes):

3479

return s # Already encoded

3480

3481

try:

3482

return s.encode(self.get_encoding())

3483

except UnicodeEncodeError as err:

3484

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3485

raise

3486

3487

def get_encoding(self):

3488

encoding = self.params.get('encoding')

3489

if encoding is None:

3490

encoding = preferredencoding()

3491

return encoding

3492

3493

def _write_info_json(self, label, ie_result, infofn, overwrite=None):

3494

''' Write infojson and returns True = written, False = skip, None = error '''

3495

if overwrite is None:

3496

overwrite = self.params.get('overwrites', True)

3497

if not self.params.get('writeinfojson'):

3498

return False

3499

elif not infofn:

3500

self.write_debug(f'Skipping writing {label} infojson')

3501

return False

3502

elif not self._ensure_dir_exists(infofn):

3503

return None

3504

elif not overwrite and os.path.exists(infofn):

3505

self.to_screen(f'[info] {label.title()} metadata is already present')

3506

else:

3507

self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')

3508

try:

3509

write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)

3510

except (OSError, IOError):

3511

self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')

return None

return True

def _write_description(self, label, ie_result, descfn):

3516

''' Write description and returns True = written, False = skip, None = error '''

3517

if not self.params.get('writedescription'):

3518

return False

3519

elif not descfn:

3520

self.write_debug(f'Skipping writing {label} description')

3521

return False

3522

elif not self._ensure_dir_exists(descfn):

3523

return None

3524

elif not self.params.get('overwrites', True) and os.path.exists(descfn):

3525

self.to_screen(f'[info] {label.title()} description is already present')

3526

elif ie_result.get('description') is None:

3527

self.report_warning(f'There\'s no {label} description to write')

return False

else:

try:

self.to_screen(f'[info] Writing {label} description to: {descfn}')

3532

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

3533

descfile.write(ie_result['description'])

3534

except (OSError, IOError):

3535

self.report_error(f'Cannot write {label} description file {descfn}')

return None

return True

def _write_subtitles(self, info_dict, filename):

3540

''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''

3541

ret = []

3542

subtitles = info_dict.get('requested_subtitles')

3543

if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):

3544

# subtitles download errors are already managed as troubles in relevant IE

3545

# that way it will silently go on when used with unsupporting IE

3546

return ret

3547

3548

sub_filename_base = self.prepare_filename(info_dict, 'subtitle')

3549

if not sub_filename_base:

3550

self.to_screen('[info] Skipping writing video subtitles')

3551

return ret

3552

for sub_lang, sub_info in subtitles.items():

3553

sub_format = sub_info['ext']

3554

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

3555

sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))

3556

if not self.params.get('overwrites', True) and os.path.exists(sub_filename):

3557

self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')

3558

sub_info['filepath'] = sub_filename

3559

ret.append((sub_filename, sub_filename_final))

3560

continue

3561

3562

self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')

3563

if sub_info.get('data') is not None:

3564

try:

3565

# Use newline='' to prevent conversion of newline characters

3566

# See https://github.com/ytdl-org/youtube-dl/issues/10268

3567

with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:

3568

subfile.write(sub_info['data'])

3569

sub_info['filepath'] = sub_filename

3570

ret.append((sub_filename, sub_filename_final))

3571

continue

3572

except (OSError, IOError):

3573

self.report_error(f'Cannot write video subtitles file {sub_filename}')

return None

try:

sub_copy = sub_info.copy()

3578

sub_copy.setdefault('http_headers', info_dict.get('http_headers'))

3579

self.dl(sub_filename, sub_copy, subtitle=True)

3580

sub_info['filepath'] = sub_filename

3581

ret.append((sub_filename, sub_filename_final))

3582

except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

3583

self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')

continue

return ret

def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):

3588

''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''

3589

write_all = self.params.get('write_all_thumbnails', False)

3590

thumbnails, ret = [], []

3591

if write_all or self.params.get('writethumbnail', False):

3592

thumbnails = info_dict.get('thumbnails') or []

3593

multiple = write_all and len(thumbnails) > 1

3594

3595

if thumb_filename_base is None:

3596

thumb_filename_base = filename

3597

if thumbnails and not thumb_filename_base:

3598

self.write_debug(f'Skipping writing {label} thumbnail')

3599

return ret

3600

3601

for t in thumbnails[::-1]:

3602

thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')

3603

thumb_display_id = f'{label} thumbnail {t["id"]}'

3604

thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))

3605

thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))

3606

3607

if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):

3608

ret.append((thumb_filename, thumb_filename_final))

3609

t['filepath'] = thumb_filename

3610

self.to_screen('[info] %s is already present' % (

3611

thumb_display_id if multiple else f'{label} thumbnail').capitalize())

3612

else:

3613

self.to_screen(f'[info] Downloading {thumb_display_id} ...')

3614

try:

3615

uf = self.urlopen(t['url'])

3616

self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')

3617

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3618

shutil.copyfileobj(uf, thumbf)

3619

ret.append((thumb_filename, thumb_filename_final))

3620

t['filepath'] = thumb_filename

3621

except network_exceptions as err:

3622

self.report_warning(f'Unable to download {thumb_display_id}: {err}')

3623

if ret and not write_all:

3624

break

3625

return ret