jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import sys
	23	import tempfile
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30	from zipimport import zipimporter
	31
	32	from .compat import (
	33	compat_basestring,
	34	compat_cookiejar,
	35	compat_get_terminal_size,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DOT_DESKTOP_LINK_TEMPLATE,
	55	DOT_URL_LINK_TEMPLATE,
	56	DOT_WEBLOC_LINK_TEMPLATE,
	57	DownloadError,
	58	encode_compat_str,
	59	encodeFilename,
	60	EntryNotInPlaylist,
	61	error_to_compat_str,
	62	ExistingVideoReached,
	63	expand_path,
	64	ExtractorError,
	65	float_or_none,
	66	format_bytes,
	67	format_field,
	68	STR_FORMAT_RE,
	69	formatSeconds,
	70	GeoRestrictedError,
	71	HEADRequest,
	72	int_or_none,
	73	iri_to_uri,
	74	ISO3166Utils,
	75	LazyList,
	76	locked_file,
	77	make_dir,
	78	make_HTTPS_handler,
	79	MaxDownloadsReached,
	80	network_exceptions,
	81	orderedSet,
	82	OUTTMPL_TYPES,
	83	PagedList,
	84	parse_filesize,
	85	PerRequestProxyHandler,
	86	platform_name,
	87	PostProcessingError,
	88	preferredencoding,
	89	prepend_extension,
	90	process_communicate_or_kill,
	91	register_socks_protocols,
	92	RejectedVideoReached,
	93	render_table,
	94	replace_extension,
	95	SameFileError,
	96	sanitize_filename,
	97	sanitize_path,
	98	sanitize_url,
	99	sanitized_Request,
	100	std_headers,
	101	str_or_none,
	102	strftime_or_none,
	103	subtitles_filename,
	104	ThrottledDownload,
	105	to_high_limit_path,
	106	traverse_obj,
	107	try_get,
	108	UnavailableVideoError,
	109	url_basename,
	110	version_tuple,
	111	write_json_file,
	112	write_string,
	113	YoutubeDLCookieJar,
	114	YoutubeDLCookieProcessor,
	115	YoutubeDLHandler,
	116	YoutubeDLRedirectHandler,
	117	)
	118	from .cache import Cache
	119	from .extractor import (
	120	gen_extractor_classes,
	121	get_info_extractor,
	122	_LAZY_LOADER,
	123	_PLUGIN_CLASSES
	124	)
	125	from .extractor.openload import PhantomJSwrapper
	126	from .downloader import (
	127	get_suitable_downloader,
	128	shorten_protocol_name
	129	)
	130	from .downloader.rtmp import rtmpdump_version
	131	from .postprocessor import (
	132	get_postprocessor,
	133	FFmpegFixupDurationPP,
	134	FFmpegFixupM3u8PP,
	135	FFmpegFixupM4aPP,
	136	FFmpegFixupStretchedPP,
	137	FFmpegFixupTimestampPP,
	138	FFmpegMergerPP,
	139	FFmpegPostProcessor,
	140	MoveFilesAfterDownloadPP,
	141	)
	142	from .version import __version__
	143
	144	if compat_os_name == 'nt':
	145	import ctypes
	146
	147
	148	class YoutubeDL(object):
	149	"""YoutubeDL class.
	150
	151	YoutubeDL objects are the ones responsible of downloading the
	152	actual video file and writing it to disk if the user has requested
	153	it, among some other tasks. In most cases there should be one per
	154	program. As, given a video URL, the downloader doesn't know how to
	155	extract all the needed information, task that InfoExtractors do, it
	156	has to pass the URL to one of them.
	157
	158	For this, YoutubeDL objects have a method that allows
	159	InfoExtractors to be registered in a given order. When it is passed
	160	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	161	finds that reports being able to handle it. The InfoExtractor extracts
	162	all the information about the video or videos the URL refers to, and
	163	YoutubeDL process the extracted information, possibly using a File
	164	Downloader to download the video.
	165
	166	YoutubeDL objects accept a lot of parameters. In order not to saturate
	167	the object constructor with arguments, it receives a dictionary of
	168	options instead. These options are available through the params
	169	attribute for the InfoExtractors to use. The YoutubeDL also
	170	registers itself as the downloader in charge for the InfoExtractors
	171	that are added to it, so this is a "mutual registration".
	172
	173	Available options:
	174
	175	username: Username for authentication purposes.
	176	password: Password for authentication purposes.
	177	videopassword: Password for accessing a video.
	178	ap_mso: Adobe Pass multiple-system operator identifier.
	179	ap_username: Multiple-system operator account username.
	180	ap_password: Multiple-system operator account password.
	181	usenetrc: Use netrc for authentication instead.
	182	verbose: Print additional info to stdout.
	183	quiet: Do not print messages to stdout.
	184	no_warnings: Do not print out anything for warnings.
	185	forceprint: A list of templates to force print
	186	forceurl: Force printing final URL. (Deprecated)
	187	forcetitle: Force printing title. (Deprecated)
	188	forceid: Force printing ID. (Deprecated)
	189	forcethumbnail: Force printing thumbnail URL. (Deprecated)
	190	forcedescription: Force printing description. (Deprecated)
	191	forcefilename: Force printing final filename. (Deprecated)
	192	forceduration: Force printing duration. (Deprecated)
	193	forcejson: Force printing info_dict as JSON.
	194	dump_single_json: Force printing the info_dict of the whole playlist
	195	(or video) as a single JSON line.
	196	force_write_download_archive: Force writing download archive regardless
	197	of 'skip_download' or 'simulate'.
	198	simulate: Do not download the video files.
	199	format: Video format code. see "FORMAT SELECTION" for more details.
	200	allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
	201	ignore_no_formats_error: Ignore "No video formats" error. Usefull for
	202	extracting metadata even if the video is not actually
	203	available for download (experimental)
	204	format_sort: How to sort the video formats. see "Sorting Formats"
	205	for more details.
	206	format_sort_force: Force the given format_sort. see "Sorting Formats"
	207	for more details.
	208	allow_multiple_video_streams: Allow multiple video streams to be merged
	209	into a single file
	210	allow_multiple_audio_streams: Allow multiple audio streams to be merged
	211	into a single file
	212	paths: Dictionary of output paths. The allowed keys are 'home'
	213	'temp' and the keys of OUTTMPL_TYPES (in utils.py)
	214	outtmpl: Dictionary of templates for output names. Allowed keys
	215	are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
	216	A string a also accepted for backward compatibility
	217	outtmpl_na_placeholder: Placeholder for unavailable meta fields.
	218	restrictfilenames: Do not allow "&" and spaces in file names
	219	trim_file_name: Limit length of filename (extension excluded)
	220	windowsfilenames: Force the filenames to be windows compatible
	221	ignoreerrors: Do not stop on download errors
	222	(Default True when running yt-dlp,
	223	but False when directly accessing YoutubeDL class)
	224	skip_playlist_after_errors: Number of allowed failures until the rest of
	225	the playlist is skipped
	226	force_generic_extractor: Force downloader to use the generic extractor
	227	overwrites: Overwrite all video and metadata files if True,
	228	overwrite only non-video files if None
	229	and don't overwrite any file if False
	230	playliststart: Playlist item to start at.
	231	playlistend: Playlist item to end at.
	232	playlist_items: Specific indices of playlist to download.
	233	playlistreverse: Download playlist items in reverse order.
	234	playlistrandom: Download playlist items in random order.
	235	matchtitle: Download only matching titles.
	236	rejecttitle: Reject downloads for matching titles.
	237	logger: Log messages to a logging.Logger instance.
	238	logtostderr: Log messages to stderr instead of stdout.
	239	writedescription: Write the video description to a .description file
	240	writeinfojson: Write the video description to a .info.json file
	241	clean_infojson: Remove private fields from the infojson
	242	writecomments: Extract video comments. This will not be written to disk
	243	unless writeinfojson is also given
	244	writeannotations: Write the video annotations to a .annotations.xml file
	245	writethumbnail: Write the thumbnail image to a file
	246	allow_playlist_files: Whether to write playlists' description, infojson etc
	247	also to disk when using the 'write*' options
	248	write_all_thumbnails: Write all thumbnail formats to files
	249	writelink: Write an internet shortcut file, depending on the
	250	current platform (.url/.webloc/.desktop)
	251	writeurllink: Write a Windows internet shortcut file (.url)
	252	writewebloclink: Write a macOS internet shortcut file (.webloc)
	253	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	254	writesubtitles: Write the video subtitles to a file
	255	writeautomaticsub: Write the automatically generated subtitles to a file
	256	allsubtitles: Deprecated - Use subtitlelangs = ['all']
	257	Downloads all the subtitles of the video
	258	(requires writesubtitles or writeautomaticsub)
	259	listsubtitles: Lists all available subtitles for the video
	260	subtitlesformat: The format code for subtitles
	261	subtitleslangs: List of languages of the subtitles to download (can be regex).
	262	The list may contain "all" to refer to all the available
	263	subtitles. The language can be prefixed with a "-" to
	264	exclude it from the requested languages. Eg: ['all', '-live_chat']
	265	keepvideo: Keep the video file after post-processing
	266	daterange: A DateRange object, download only if the upload_date is in the range.
	267	skip_download: Skip the actual download of the video file
	268	cachedir: Location of the cache files in the filesystem.
	269	False to disable filesystem cache.
	270	noplaylist: Download single video instead of a playlist if in doubt.
	271	age_limit: An integer representing the user's age in years.
	272	Unsuitable videos for the given age are skipped.
	273	min_views: An integer representing the minimum view count the video
	274	must have in order to not be skipped.
	275	Videos without view count information are always
	276	downloaded. None for no limit.
	277	max_views: An integer representing the maximum view count.
	278	Videos that are more popular than that are not
	279	downloaded.
	280	Videos without view count information are always
	281	downloaded. None for no limit.
	282	download_archive: File name of a file where all downloads are recorded.
	283	Videos already present in the file are not downloaded
	284	again.
	285	break_on_existing: Stop the download process after attempting to download a
	286	file that is in the archive.
	287	break_on_reject: Stop the download process when encountering a video that
	288	has been filtered out.
	289	cookiefile: File name where cookies should be read from and dumped to
	290	nocheckcertificate:Do not verify SSL certificates
	291	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	292	At the moment, this is only supported by YouTube.
	293	proxy: URL of the proxy server to use
	294	geo_verification_proxy: URL of the proxy to use for IP address verification
	295	on geo-restricted sites.
	296	socket_timeout: Time to wait for unresponsive hosts, in seconds
	297	bidi_workaround: Work around buggy terminals without bidirectional text
	298	support, using fridibi
	299	debug_printtraffic:Print out sent and received HTTP traffic
	300	include_ads: Download ads as well
	301	default_search: Prepend this string if an input url is not valid.
	302	'auto' for elaborate guessing
	303	encoding: Use this encoding instead of the system-specified.
	304	extract_flat: Do not resolve URLs, return the immediate result.
	305	Pass in 'in_playlist' to only show this behavior for
	306	playlist items.
	307	postprocessors: A list of dictionaries, each with an entry
	308	* key: The name of the postprocessor. See
	309	yt_dlp/postprocessor/__init__.py for a list.
	310	* when: When to run the postprocessor. Can be one of
	311	pre_process\|before_dl\|post_process\|after_move.
	312	Assumed to be 'post_process' if not given
	313	post_hooks: A list of functions that get called as the final step
	314	for each video file, after all postprocessors have been
	315	called. The filename will be passed as the only argument.
	316	progress_hooks: A list of functions that get called on download
	317	progress, with a dictionary with the entries
	318	* status: One of "downloading", "error", or "finished".
	319	Check this first and ignore unknown values.
	320
	321	If status is one of "downloading", or "finished", the
	322	following properties may also be present:
	323	* filename: The final filename (always present)
	324	* tmpfilename: The filename we're currently writing to
	325	* downloaded_bytes: Bytes on disk
	326	* total_bytes: Size of the whole file, None if unknown
	327	* total_bytes_estimate: Guess of the eventual file size,
	328	None if unavailable.
	329	* elapsed: The number of seconds since download started.
	330	* eta: The estimated time in seconds, None if unknown
	331	* speed: The download speed in bytes/second, None if
	332	unknown
	333	* fragment_index: The counter of the currently
	334	downloaded video fragment.
	335	* fragment_count: The number of fragments (= individual
	336	files that will be merged)
	337
	338	Progress hooks are guaranteed to be called at least once
	339	(with status "finished") if the download is successful.
	340	merge_output_format: Extension to use when merging formats.
	341	final_ext: Expected final extension; used to detect when the file was
	342	already downloaded and converted. "merge_output_format" is
	343	replaced by this extension when given
	344	fixup: Automatically correct known faults of the file.
	345	One of:
	346	- "never": do nothing
	347	- "warn": only emit a warning
	348	- "detect_or_warn": check whether we can do anything
	349	about it, warn otherwise (default)
	350	source_address: Client-side IP address to bind to.
	351	call_home: Boolean, true iff we are allowed to contact the
	352	yt-dlp servers for debugging. (BROKEN)
	353	sleep_interval_requests: Number of seconds to sleep between requests
	354	during extraction
	355	sleep_interval: Number of seconds to sleep before each download when
	356	used alone or a lower bound of a range for randomized
	357	sleep before each download (minimum possible number
	358	of seconds to sleep) when used along with
	359	max_sleep_interval.
	360	max_sleep_interval:Upper bound of a range for randomized sleep before each
	361	download (maximum possible number of seconds to sleep).
	362	Must only be used along with sleep_interval.
	363	Actual sleep time will be a random float from range
	364	[sleep_interval; max_sleep_interval].
	365	sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
	366	listformats: Print an overview of available video formats and exit.
	367	list_thumbnails: Print a table of all thumbnails and exit.
	368	match_filter: A function that gets called with the info_dict of
	369	every video.
	370	If it returns a message, the video is ignored.
	371	If it returns None, the video is downloaded.
	372	match_filter_func in utils.py is one example for this.
	373	no_color: Do not emit color codes in output.
	374	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	375	HTTP header
	376	geo_bypass_country:
	377	Two-letter ISO 3166-2 country code that will be used for
	378	explicit geographic restriction bypassing via faking
	379	X-Forwarded-For HTTP header
	380	geo_bypass_ip_block:
	381	IP range in CIDR notation that will be used similarly to
	382	geo_bypass_country
	383
	384	The following options determine which downloader is picked:
	385	external_downloader: A dictionary of protocol keys and the executable of the
	386	external downloader to use for it. The allowed protocols
	387	are default\|http\|ftp\|m3u8\|dash\|rtsp\|rtmp\|mms.
	388	Set the value to 'native' to use the native downloader
	389	hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
	390	or {'m3u8': 'ffmpeg'} instead.
	391	Use the native HLS downloader instead of ffmpeg/avconv
	392	if True, otherwise use ffmpeg/avconv if False, otherwise
	393	use downloader suggested by extractor if None.
	394	compat_opts: Compatibility options. See "Differences in default behavior".
	395	Note that only format-sort, format-spec, no-live-chat,
	396	no-attach-info-json, playlist-index, list-formats,
	397	no-direct-merge, embed-thumbnail-atomicparsley,
	398	no-youtube-unavailable-videos, no-youtube-channel-redirect,
	399	works when used via the API
	400
	401	The following parameters are not used by YoutubeDL itself, they are used by
	402	the downloader (see yt_dlp/downloader/common.py):
	403	nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
	404	max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
	405	xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
	406
	407	The following options are used by the post processors:
	408	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	409	otherwise prefer ffmpeg. (avconv support is deprecated)
	410	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	411	to the binary or its containing directory.
	412	postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
	413	and a list of additional command-line arguments for the
	414	postprocessor/executable. The dict can also have "PP+EXE" keys
	415	which are used when the given exe is used by the given PP.
	416	Use 'default' as the name for arguments to passed to all PP
	417
	418	The following options are used by the extractors:
	419	extractor_retries: Number of times to retry for known errors
	420	dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
	421	hls_split_discontinuity: Split HLS playlists to different formats at
	422	discontinuities such as ad breaks (default: False)
	423	extractor_args: A dictionary of arguments to be passed to the extractors.
	424	See "EXTRACTOR ARGUMENTS" for details.
	425	Eg: {'youtube': {'skip': ['dash', 'hls']}}
	426	youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
	427	If True (default), DASH manifests and related
	428	data will be downloaded and processed by extractor.
	429	You can reduce network I/O by disabling it if you don't
	430	care about DASH. (only for youtube)
	431	youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
	432	If True (default), HLS manifests and related
	433	data will be downloaded and processed by extractor.
	434	You can reduce network I/O by disabling it if you don't
	435	care about HLS. (only for youtube)
	436	"""
	437
	438	_NUMERIC_FIELDS = set((
	439	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	440	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	441	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	442	'average_rating', 'comment_count', 'age_limit',
	443	'start_time', 'end_time',
	444	'chapter_number', 'season_number', 'episode_number',
	445	'track_number', 'disc_number', 'release_year',
	446	'playlist_index',
	447	))
	448
	449	params = None
	450	_ies = []
	451	_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	452	__prepare_filename_warned = False
	453	_first_webpage_request = True
	454	_download_retcode = None
	455	_num_downloads = None
	456	_playlist_level = 0
	457	_playlist_urls = set()
	458	_screen_file = None
	459
	460	def __init__(self, params=None, auto_init=True):
	461	"""Create a FileDownloader object with the given options."""
	462	if params is None:
	463	params = {}
	464	self._ies = []
	465	self._ies_instances = {}
	466	self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
	467	self.__prepare_filename_warned = False
	468	self._first_webpage_request = True
	469	self._post_hooks = []
	470	self._progress_hooks = []
	471	self._download_retcode = 0
	472	self._num_downloads = 0
	473	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	474	self._err_file = sys.stderr
	475	self.params = {
	476	# Default parameters
	477	'nocheckcertificate': False,
	478	}
	479	self.params.update(params)
	480	self.cache = Cache(self)
	481
	482	if sys.version_info < (3, 6):
	483	self.report_warning(
	484	'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
	485
	486	def check_deprecated(param, option, suggestion):
	487	if self.params.get(param) is not None:
	488	self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
	489	return True
	490	return False
	491
	492	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	493	if self.params.get('geo_verification_proxy') is None:
	494	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	495
	496	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	497	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	498	check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
	499
	500	for msg in self.params.get('warnings', []):

1

#!/usr/bin/env python3

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import sys

import tempfile

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

from zipimport import zipimporter

31

32

from .compat import (

33

compat_basestring,

34

compat_cookiejar,

35

compat_get_terminal_size,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

55

DOT_URL_LINK_TEMPLATE,

56

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

EntryNotInPlaylist,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

float_or_none,

format_bytes,

format_field,

STR_FORMAT_RE,

formatSeconds,

GeoRestrictedError,

HEADRequest,

int_or_none,

iri_to_uri,

ISO3166Utils,

LazyList,

locked_file,

make_dir,

make_HTTPS_handler,

MaxDownloadsReached,

network_exceptions,

orderedSet,

OUTTMPL_TYPES,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

process_communicate_or_kill,

91

register_socks_protocols,

92

RejectedVideoReached,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

strftime_or_none,

subtitles_filename,

ThrottledDownload,

to_high_limit_path,

traverse_obj,

try_get,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

115

YoutubeDLHandler,

116

YoutubeDLRedirectHandler,

117

)

118

from .cache import Cache

119

from .extractor import (

120

gen_extractor_classes,

get_info_extractor,

_LAZY_LOADER,

_PLUGIN_CLASSES

)

from .extractor.openload import PhantomJSwrapper

126

from .downloader import (

127

get_suitable_downloader,

128

shorten_protocol_name

129

)

130

from .downloader.rtmp import rtmpdump_version

131

from .postprocessor import (

132

get_postprocessor,

133

FFmpegFixupDurationPP,

134

FFmpegFixupM3u8PP,

135

FFmpegFixupM4aPP,

136

FFmpegFixupStretchedPP,

137

FFmpegFixupTimestampPP,

138

FFmpegMergerPP,

139

FFmpegPostProcessor,

140

MoveFilesAfterDownloadPP,

141

)

142

from .version import __version__

143

144

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

149

"""YoutubeDL class.

150

151

YoutubeDL objects are the ones responsible of downloading the

152

actual video file and writing it to disk if the user has requested

153

it, among some other tasks. In most cases there should be one per

154

program. As, given a video URL, the downloader doesn't know how to

155

extract all the needed information, task that InfoExtractors do, it

156

has to pass the URL to one of them.

157

158

For this, YoutubeDL objects have a method that allows

159

InfoExtractors to be registered in a given order. When it is passed

160

a URL, the YoutubeDL object handles it to the first InfoExtractor it

161

finds that reports being able to handle it. The InfoExtractor extracts

162

all the information about the video or videos the URL refers to, and

163

YoutubeDL process the extracted information, possibly using a File

164

Downloader to download the video.

165

166

YoutubeDL objects accept a lot of parameters. In order not to saturate

167

the object constructor with arguments, it receives a dictionary of

168

options instead. These options are available through the params

169

attribute for the InfoExtractors to use. The YoutubeDL also

170

registers itself as the downloader in charge for the InfoExtractors

171

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

176

password: Password for authentication purposes.

177

videopassword: Password for accessing a video.

178

ap_mso: Adobe Pass multiple-system operator identifier.

179

ap_username: Multiple-system operator account username.

180

ap_password: Multiple-system operator account password.

181

usenetrc: Use netrc for authentication instead.

182

verbose: Print additional info to stdout.

183

quiet: Do not print messages to stdout.

184

no_warnings: Do not print out anything for warnings.

185

forceprint: A list of templates to force print

186

forceurl: Force printing final URL. (Deprecated)

187

forcetitle: Force printing title. (Deprecated)

188

forceid: Force printing ID. (Deprecated)

189

forcethumbnail: Force printing thumbnail URL. (Deprecated)

190

forcedescription: Force printing description. (Deprecated)

191

forcefilename: Force printing final filename. (Deprecated)

192

forceduration: Force printing duration. (Deprecated)

193

forcejson: Force printing info_dict as JSON.

194

dump_single_json: Force printing the info_dict of the whole playlist

195

(or video) as a single JSON line.

196

force_write_download_archive: Force writing download archive regardless

197

of 'skip_download' or 'simulate'.

198

simulate: Do not download the video files.

199

format: Video format code. see "FORMAT SELECTION" for more details.

200

allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.

201

ignore_no_formats_error: Ignore "No video formats" error. Usefull for

202

extracting metadata even if the video is not actually

203

available for download (experimental)

204

format_sort: How to sort the video formats. see "Sorting Formats"

205

for more details.

206

format_sort_force: Force the given format_sort. see "Sorting Formats"

207

for more details.

208

allow_multiple_video_streams: Allow multiple video streams to be merged

209

into a single file

210

allow_multiple_audio_streams: Allow multiple audio streams to be merged

211

into a single file

212

paths: Dictionary of output paths. The allowed keys are 'home'

213

'temp' and the keys of OUTTMPL_TYPES (in utils.py)

214

outtmpl: Dictionary of templates for output names. Allowed keys

215

are 'default' and the keys of OUTTMPL_TYPES (in utils.py).

216

A string a also accepted for backward compatibility

217

outtmpl_na_placeholder: Placeholder for unavailable meta fields.

218

restrictfilenames: Do not allow "&" and spaces in file names

219

trim_file_name: Limit length of filename (extension excluded)

220

windowsfilenames: Force the filenames to be windows compatible

221

ignoreerrors: Do not stop on download errors

222

(Default True when running yt-dlp,

223

but False when directly accessing YoutubeDL class)

224

skip_playlist_after_errors: Number of allowed failures until the rest of

225

the playlist is skipped

226

force_generic_extractor: Force downloader to use the generic extractor

227

overwrites: Overwrite all video and metadata files if True,

228

overwrite only non-video files if None

229

and don't overwrite any file if False

230

playliststart: Playlist item to start at.

231

playlistend: Playlist item to end at.

232

playlist_items: Specific indices of playlist to download.

233

playlistreverse: Download playlist items in reverse order.

234

playlistrandom: Download playlist items in random order.

235

matchtitle: Download only matching titles.

236

rejecttitle: Reject downloads for matching titles.

237

logger: Log messages to a logging.Logger instance.

238

logtostderr: Log messages to stderr instead of stdout.

239

writedescription: Write the video description to a .description file

240

writeinfojson: Write the video description to a .info.json file

241

clean_infojson: Remove private fields from the infojson

242

writecomments: Extract video comments. This will not be written to disk

243

unless writeinfojson is also given

244

writeannotations: Write the video annotations to a .annotations.xml file

245

writethumbnail: Write the thumbnail image to a file

246

allow_playlist_files: Whether to write playlists' description, infojson etc

247

also to disk when using the 'write*' options

248

write_all_thumbnails: Write all thumbnail formats to files

249

writelink: Write an internet shortcut file, depending on the

250

current platform (.url/.webloc/.desktop)

251

writeurllink: Write a Windows internet shortcut file (.url)

252

writewebloclink: Write a macOS internet shortcut file (.webloc)

253

writedesktoplink: Write a Linux internet shortcut file (.desktop)

254

writesubtitles: Write the video subtitles to a file

255

writeautomaticsub: Write the automatically generated subtitles to a file

256

allsubtitles: Deprecated - Use subtitlelangs = ['all']

257

Downloads all the subtitles of the video

258

(requires writesubtitles or writeautomaticsub)

259

listsubtitles: Lists all available subtitles for the video

260

subtitlesformat: The format code for subtitles

261

subtitleslangs: List of languages of the subtitles to download (can be regex).

262

The list may contain "all" to refer to all the available

263

subtitles. The language can be prefixed with a "-" to

264

exclude it from the requested languages. Eg: ['all', '-live_chat']

265

keepvideo: Keep the video file after post-processing

266

daterange: A DateRange object, download only if the upload_date is in the range.

267

skip_download: Skip the actual download of the video file

268

cachedir: Location of the cache files in the filesystem.

269

False to disable filesystem cache.

270

noplaylist: Download single video instead of a playlist if in doubt.

271

age_limit: An integer representing the user's age in years.

272

Unsuitable videos for the given age are skipped.

273

min_views: An integer representing the minimum view count the video

274

must have in order to not be skipped.

275

Videos without view count information are always

276

downloaded. None for no limit.

277

max_views: An integer representing the maximum view count.

278

Videos that are more popular than that are not

279

downloaded.

280

Videos without view count information are always

281

downloaded. None for no limit.

282

download_archive: File name of a file where all downloads are recorded.

283

Videos already present in the file are not downloaded

284

again.

285

break_on_existing: Stop the download process after attempting to download a

286

file that is in the archive.

287

break_on_reject: Stop the download process when encountering a video that

288

has been filtered out.

289

cookiefile: File name where cookies should be read from and dumped to

290

nocheckcertificate:Do not verify SSL certificates

291

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

292

At the moment, this is only supported by YouTube.

293

proxy: URL of the proxy server to use

294

geo_verification_proxy: URL of the proxy to use for IP address verification

295

on geo-restricted sites.

296

socket_timeout: Time to wait for unresponsive hosts, in seconds

297

bidi_workaround: Work around buggy terminals without bidirectional text

298

support, using fridibi

299

debug_printtraffic:Print out sent and received HTTP traffic

300

include_ads: Download ads as well

301

default_search: Prepend this string if an input url is not valid.

302

'auto' for elaborate guessing

303

encoding: Use this encoding instead of the system-specified.

304

extract_flat: Do not resolve URLs, return the immediate result.

305

Pass in 'in_playlist' to only show this behavior for

306

playlist items.

307

postprocessors: A list of dictionaries, each with an entry

308

* key: The name of the postprocessor. See

309

yt_dlp/postprocessor/__init__.py for a list.

310

* when: When to run the postprocessor. Can be one of

311

pre_process|before_dl|post_process|after_move.

312

Assumed to be 'post_process' if not given

313

post_hooks: A list of functions that get called as the final step

314

for each video file, after all postprocessors have been

315

called. The filename will be passed as the only argument.

316

progress_hooks: A list of functions that get called on download

317

progress, with a dictionary with the entries

318

* status: One of "downloading", "error", or "finished".

319

Check this first and ignore unknown values.

320

321

If status is one of "downloading", or "finished", the

322

following properties may also be present:

323

* filename: The final filename (always present)

324

* tmpfilename: The filename we're currently writing to

325

* downloaded_bytes: Bytes on disk

326

* total_bytes: Size of the whole file, None if unknown

327

* total_bytes_estimate: Guess of the eventual file size,

328

None if unavailable.

329

* elapsed: The number of seconds since download started.

330

* eta: The estimated time in seconds, None if unknown

331

* speed: The download speed in bytes/second, None if

332

unknown

333

* fragment_index: The counter of the currently

334

downloaded video fragment.

335

* fragment_count: The number of fragments (= individual

336

files that will be merged)

337

338

Progress hooks are guaranteed to be called at least once

339

(with status "finished") if the download is successful.

340

merge_output_format: Extension to use when merging formats.

341

final_ext: Expected final extension; used to detect when the file was

342

already downloaded and converted. "merge_output_format" is

343

replaced by this extension when given

344

fixup: Automatically correct known faults of the file.

345

One of:

346

- "never": do nothing

347

- "warn": only emit a warning

348

- "detect_or_warn": check whether we can do anything

349

about it, warn otherwise (default)

350

source_address: Client-side IP address to bind to.

351

call_home: Boolean, true iff we are allowed to contact the

352

yt-dlp servers for debugging. (BROKEN)

353

sleep_interval_requests: Number of seconds to sleep between requests

354

during extraction

355

sleep_interval: Number of seconds to sleep before each download when

356

used alone or a lower bound of a range for randomized

357

sleep before each download (minimum possible number

358

of seconds to sleep) when used along with

359

max_sleep_interval.

360

max_sleep_interval:Upper bound of a range for randomized sleep before each

361

download (maximum possible number of seconds to sleep).

362

Must only be used along with sleep_interval.

363

Actual sleep time will be a random float from range

364

[sleep_interval; max_sleep_interval].

365

sleep_interval_subtitles: Number of seconds to sleep before each subtitle download

366

listformats: Print an overview of available video formats and exit.

367

list_thumbnails: Print a table of all thumbnails and exit.

368

match_filter: A function that gets called with the info_dict of

369

every video.

370

If it returns a message, the video is ignored.

371

If it returns None, the video is downloaded.

372

match_filter_func in utils.py is one example for this.

373

no_color: Do not emit color codes in output.

374

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

375

HTTP header

376

geo_bypass_country:

377

Two-letter ISO 3166-2 country code that will be used for

378

explicit geographic restriction bypassing via faking

379

X-Forwarded-For HTTP header

380

geo_bypass_ip_block:

381

IP range in CIDR notation that will be used similarly to

382

geo_bypass_country

383

384

The following options determine which downloader is picked:

385

external_downloader: A dictionary of protocol keys and the executable of the

386

external downloader to use for it. The allowed protocols

387

388

Set the value to 'native' to use the native downloader

389

hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}

390

or {'m3u8': 'ffmpeg'} instead.

391

Use the native HLS downloader instead of ffmpeg/avconv

392

if True, otherwise use ffmpeg/avconv if False, otherwise

393

use downloader suggested by extractor if None.

394

compat_opts: Compatibility options. See "Differences in default behavior".

395

Note that only format-sort, format-spec, no-live-chat,

396

no-attach-info-json, playlist-index, list-formats,

397

no-direct-merge, embed-thumbnail-atomicparsley,

398

no-youtube-unavailable-videos, no-youtube-channel-redirect,

399

works when used via the API

400

401

The following parameters are not used by YoutubeDL itself, they are used by

402

the downloader (see yt_dlp/downloader/common.py):

403

nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,

404

max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,

405

xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.

406

407

The following options are used by the post processors:

408

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

409

otherwise prefer ffmpeg. (avconv support is deprecated)

410

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

411

to the binary or its containing directory.

412

postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)

413

and a list of additional command-line arguments for the

414

postprocessor/executable. The dict can also have "PP+EXE" keys

415

which are used when the given exe is used by the given PP.

416

Use 'default' as the name for arguments to passed to all PP

417

418

The following options are used by the extractors:

419

extractor_retries: Number of times to retry for known errors

420

dynamic_mpd: Whether to process dynamic DASH manifests (default: True)

421

hls_split_discontinuity: Split HLS playlists to different formats at

422

discontinuities such as ad breaks (default: False)

423

extractor_args: A dictionary of arguments to be passed to the extractors.

424

See "EXTRACTOR ARGUMENTS" for details.

425

Eg: {'youtube': {'skip': ['dash', 'hls']}}

426

youtube_include_dash_manifest: Deprecated - Use extractor_args instead.

427

If True (default), DASH manifests and related

428

data will be downloaded and processed by extractor.

429

You can reduce network I/O by disabling it if you don't

430

care about DASH. (only for youtube)

431

youtube_include_hls_manifest: Deprecated - Use extractor_args instead.

432

If True (default), HLS manifests and related

433

data will be downloaded and processed by extractor.

434

You can reduce network I/O by disabling it if you don't

435

care about HLS. (only for youtube)

436

"""

437

438

_NUMERIC_FIELDS = set((

439

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

440

'timestamp', 'upload_year', 'upload_month', 'upload_day',

441

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

442

'average_rating', 'comment_count', 'age_limit',

443

'start_time', 'end_time',

444

'chapter_number', 'season_number', 'episode_number',

445

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

452

__prepare_filename_warned = False

453

_first_webpage_request = True

454

_download_retcode = None

455

_num_downloads = None

456

_playlist_level = 0

457

_playlist_urls = set()

458

_screen_file = None

459

460

def __init__(self, params=None, auto_init=True):

461

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

466

self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}

467

self.__prepare_filename_warned = False

468

self._first_webpage_request = True

469

self._post_hooks = []

470

self._progress_hooks = []

471

self._download_retcode = 0

472

self._num_downloads = 0

473

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

474

self._err_file = sys.stderr

475

self.params = {

476

# Default parameters

477

'nocheckcertificate': False,

478

}

479

self.params.update(params)

480

self.cache = Cache(self)

481

482

if sys.version_info < (3, 6):

483

self.report_warning(

484

'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])

485

486

def check_deprecated(param, option, suggestion):

487

if self.params.get(param) is not None:

488

self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

493

if self.params.get('geo_verification_proxy') is None:

494

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

495

496

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

497

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

498

check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')

499

500

for msg in self.params.get('warnings', []):

501

self.report_warning(msg)

502

503

if self.params.get('final_ext'):

504

if self.params.get('merge_output_format'):

505

self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')

506

self.params['merge_output_format'] = self.params['final_ext']

507

508

if 'overwrites' in self.params and self.params['overwrites'] is None:

509

del self.params['overwrites']

510

511

if params.get('bidi_workaround', False):

512

try:

513

import pty

514

master, slave = pty.openpty()

515

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

520

sp_kwargs = dict(

521

stdin=subprocess.PIPE,

522

stdout=slave,

523

stderr=self._err_file)

524

try:

525

self._output_process = subprocess.Popen(

526

['bidiv'] + width_args, **sp_kwargs

527

)

528

except OSError:

529

self._output_process = subprocess.Popen(

530

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

531

self._output_channel = os.fdopen(master, 'rb')

532

except OSError as ose:

533

if ose.errno == errno.ENOENT:

534

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

539

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

540

and not params.get('restrictfilenames', False)):

541

# Unicode filesystem API will throw errors (#1474, #13027)

542

self.report_warning(

543

'Assuming --restrict-filenames since file system encoding '

544

'cannot encode all characters. '

545

'Set the LC_ALL environment variable to fix this.')

546

self.params['restrictfilenames'] = True

547

548

self.outtmpl_dict = self.parse_outtmpl()

549

550

# Creating format selector here allows us to catch syntax errors before the extraction

551

self.format_selector = (

552

None if self.params.get('format') is None

553

else self.build_format_selector(self.params['format']))

self._setup_opener()

"""Preload the archive, if any is specified"""

558

def preload_download_archive(fn):

559

if fn is None:

560

return False

561

self.write_debug('Loading archive file %r\n' % fn)

562

try:

563

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

564

for line in archive_file:

565

self.archive.add(line.strip())

566

except IOError as ioe:

567

if ioe.errno != errno.ENOENT:

raise

return False

return True

self.archive = set()

preload_download_archive(self.params.get('download_archive'))

574

575

if auto_init:

576

self.print_debug_header()

577

self.add_default_info_extractors()

578

579

for pp_def_raw in self.params.get('postprocessors', []):

580

pp_def = dict(pp_def_raw)

581

when = pp_def.pop('when', 'post_process')

582

pp_class = get_postprocessor(pp_def.pop('key'))

583

pp = pp_class(self, **compat_kwargs(pp_def))

584

self.add_post_processor(pp, when=when)

585

586

for ph in self.params.get('post_hooks', []):

587

self.add_post_hook(ph)

588

589

for ph in self.params.get('progress_hooks', []):

590

self.add_progress_hook(ph)

591

592

register_socks_protocols()

593

594

def warn_if_short_id(self, argv):

595

# short YouTube ID starting with dash?

596

idxs = [

597

i for i, a in enumerate(argv)

598

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['yt-dlp']

+ [a for i, a in enumerate(argv) if i not in idxs]

603

+ ['--'] + [argv[i] for i in idxs]

604

)

605

self.report_warning(

606

'Long argument string detected. '

607

'Use -- to separate parameters and URLs, like this:\n%s\n' %

608

args_to_str(correct_argv))

609

610

def add_info_extractor(self, ie):

611

"""Add an InfoExtractor object to the end of the list."""

612

self._ies.append(ie)

613

if not isinstance(ie, type):

614

self._ies_instances[ie.ie_key()] = ie

615

ie.set_downloader(self)

616

617

def get_info_extractor(self, ie_key):

618

"""

619

Get an instance of an IE with name ie_key, it will try to get one from

620

the _ies list, if there's no instance it will create a new one and add

621

it to the extractor list.

622

"""

623

ie = self._ies_instances.get(ie_key)

624

if ie is None:

625

ie = get_info_extractor(ie_key)()

626

self.add_info_extractor(ie)

627

return ie

628

629

def add_default_info_extractors(self):

630

"""

631

Add the InfoExtractors returned by gen_extractors to the end of the list

632

"""

633

for ie in gen_extractor_classes():

634

self.add_info_extractor(ie)

635

636

def add_post_processor(self, pp, when='post_process'):

637

"""Add a PostProcessor object to the end of the chain."""

638

self._pps[when].append(pp)

639

pp.set_downloader(self)

640

641

def add_post_hook(self, ph):

642

"""Add the post hook"""

643

self._post_hooks.append(ph)

644

645

def add_progress_hook(self, ph):

646

"""Add the progress hook (currently only for the file downloader)"""

647

self._progress_hooks.append(ph)

648

649

def _bidi_workaround(self, message):

650

if not hasattr(self, '_output_channel'):

651

return message

652

653

assert hasattr(self, '_output_process')

654

assert isinstance(message, compat_str)

655

line_count = message.count('\n') + 1

656

self._output_process.stdin.write((message + '\n').encode('utf-8'))

657

self._output_process.stdin.flush()

658

res = ''.join(self._output_channel.readline().decode('utf-8')

659

for _ in range(line_count))

660

return res[:-len('\n')]

661

662

def _write_string(self, s, out=None):

663

write_string(s, out=out, encoding=self.params.get('encoding'))

664

665

def to_stdout(self, message, skip_eol=False, quiet=False):

666

"""Print message to stdout"""

667

if self.params.get('logger'):

668

self.params['logger'].debug(message)

669

elif not quiet or self.params.get('verbose'):

670

self._write_string(

671

'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),

672

self._err_file if quiet else self._screen_file)

673

674

def to_stderr(self, message):

675

"""Print message to stderr"""

676

assert isinstance(message, compat_str)

677

if self.params.get('logger'):

678

self.params['logger'].error(message)

679

else:

680

self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)

681

682

def to_console_title(self, message):

683

if not self.params.get('consoletitle', False):

684

return

685

if compat_os_name == 'nt':

686

if ctypes.windll.kernel32.GetConsoleWindow():

687

# c_wchar_p() might not be necessary if `message` is

688

# already of type unicode()

689

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

690

elif 'TERM' in os.environ:

691

self._write_string('\033]0;%s\007' % message, self._screen_file)

692

693

def save_console_title(self):

694

if not self.params.get('consoletitle', False):

695

return

696

if self.params.get('simulate', False):

697

return

698

if compat_os_name != 'nt' and 'TERM' in os.environ:

699

# Save the title on stack

700

self._write_string('\033[22;0t', self._screen_file)

701

702

def restore_console_title(self):

703

if not self.params.get('consoletitle', False):

704

return

705

if self.params.get('simulate', False):

706

return

707

if compat_os_name != 'nt' and 'TERM' in os.environ:

708

# Restore the title from stack

709

self._write_string('\033[23;0t', self._screen_file)

710

711

def __enter__(self):

712

self.save_console_title()

713

return self

714

715

def __exit__(self, *args):

716

self.restore_console_title()

717

718

if self.params.get('cookiefile') is not None:

719

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

720

721

def trouble(self, message=None, tb=None):

722

"""Determine action to take when a download problem appears.

723

724

Depending on if the downloader has been configured to ignore

725

download errors or not, this method may throw an exception or

726

not when errors are found, after printing the message.

727

728

tb, if given, is additional traceback information.

729

"""

730

if message is not None:

731

self.to_stderr(message)

732

if self.params.get('verbose'):

733

if tb is None:

734

if sys.exc_info()[0]: # if .trouble has been called from an except block

735

tb = ''

736

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

737

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

738

tb += encode_compat_str(traceback.format_exc())

739

else:

740

tb_data = traceback.format_list(traceback.extract_stack())

741

tb = ''.join(tb_data)

742

if tb:

743

self.to_stderr(tb)

744

if not self.params.get('ignoreerrors', False):

745

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

746

exc_info = sys.exc_info()[1].exc_info

747

else:

748

exc_info = sys.exc_info()

749

raise DownloadError(message, exc_info)

750

self._download_retcode = 1

751

752

def to_screen(self, message, skip_eol=False):

753

"""Print message to stdout if not in quiet mode"""

754

self.to_stdout(

755

message, skip_eol, quiet=self.params.get('quiet', False))

756

757

def report_warning(self, message):

758

'''

759

Print the message to stderr, it will be prefixed with 'WARNING:'

760

If stderr is a tty file the 'WARNING:' will be colored

761

'''

762

if self.params.get('logger') is not None:

763

self.params['logger'].warning(message)

764

else:

765

if self.params.get('no_warnings'):

766

return

767

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

768

_msg_header = '\033[0;33mWARNING:\033[0m'

769

else:

770

_msg_header = 'WARNING:'

771

warning_message = '%s %s' % (_msg_header, message)

772

self.to_stderr(warning_message)

773

774

def report_error(self, message, tb=None):

775

'''

776

Do the same as trouble, but prefixes the message with 'ERROR:', colored

777

in red if stderr is a tty file.

778

'''

779

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

780

_msg_header = '\033[0;31mERROR:\033[0m'

781

else:

782

_msg_header = 'ERROR:'

783

error_message = '%s %s' % (_msg_header, message)

784

self.trouble(error_message, tb)

785

786

def write_debug(self, message):

787

'''Log debug message or Print message to stderr'''

788

if not self.params.get('verbose', False):

789

return

790

message = '[debug] %s' % message

791

if self.params.get('logger'):

792

self.params['logger'].debug(message)

793

else:

794

self._write_string('%s\n' % message)

795

796

def report_file_already_downloaded(self, file_name):

797

"""Report file has already been fully downloaded."""

798

try:

799

self.to_screen('[download] %s has already been downloaded' % file_name)

800

except UnicodeEncodeError:

801

self.to_screen('[download] The file has already been downloaded')

802

803

def report_file_delete(self, file_name):

804

"""Report that existing file will be deleted."""

805

try:

806

self.to_screen('Deleting existing file %s' % file_name)

807

except UnicodeEncodeError:

808

self.to_screen('Deleting existing file')

809

810

def parse_outtmpl(self):

811

outtmpl_dict = self.params.get('outtmpl', {})

812

if not isinstance(outtmpl_dict, dict):

813

outtmpl_dict = {'default': outtmpl_dict}

814

outtmpl_dict.update({

815

k: v for k, v in DEFAULT_OUTTMPL.items()

816

if not outtmpl_dict.get(k)})

817

for key, val in outtmpl_dict.items():

818

if isinstance(val, bytes):

819

self.report_warning(

820

'Parameter outtmpl is bytes, but should be a unicode string. '

821

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

822

return outtmpl_dict

823

824

def get_output_path(self, dir_type='', filename=None):

825

paths = self.params.get('paths', {})

826

assert isinstance(paths, dict)

827

path = os.path.join(

828

expand_path(paths.get('home', '').strip()),

829

expand_path(paths.get(dir_type, '').strip()) if dir_type else '',

830

filename or '')

831

832

# Temporary fix for #4787

833

# 'Treat' all problem characters by passing filename through preferredencoding

834

# to workaround encoding issues with subprocess on python2 @ Windows

835

if sys.version_info < (3, 0) and sys.platform == 'win32':

836

path = encodeFilename(path, True).decode(preferredencoding())

837

return sanitize_path(path, force=self.params.get('windowsfilenames'))

838

839

@staticmethod

840

def validate_outtmpl(tmpl):

841

''' @return None or Exception object '''

842

try:

843

re.sub(

844

STR_FORMAT_RE.format(''),

845

lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),

846

tmpl

847

) % collections.defaultdict(int)

848

return None

849

except ValueError as err:

850

return err

851

852

def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):

853

""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""

854

info_dict = dict(info_dict)

855

na = self.params.get('outtmpl_na_placeholder', 'NA')

856

857

info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs

858

formatSeconds(info_dict['duration'], '-' if sanitize else ':')

859

if info_dict.get('duration', None) is not None

860

else None)

861

info_dict['epoch'] = int(time.time())

862

info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

863

if info_dict.get('resolution') is None:

864

info_dict['resolution'] = self.format_resolution(info_dict, default=None)

865

866

# For fields playlist_index and autonumber convert all occurrences

867

# of %(field)s to %(field)0Nd for backward compatibility

868

field_size_compat_map = {

869

'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),

870

'autonumber': self.params.get('autonumber_size') or 5,

}

TMPL_DICT = {}

EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))

MATH_FUNCTIONS = {

'+': float.__add__,

'-': float.__sub__,

}

# Field is of the form key1.key2...

880

# where keys (except first) can be string, int or slice

881

FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')

882

MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')

883

MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))

884

INTERNAL_FORMAT_RE = re.compile(r'''(?x)

885

(?P<negate>-)?

886

(?P<fields>{field})

887

(?P<maths>(?:{math_op}{math_field})*)

888

(?:>(?P<strf_format>.+?))?

889

(?:\|(?P<default>.*?))?

890

$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))

891

892

get_key = lambda k: traverse_obj(

893

info_dict, k.split('.'), is_user_input=True, traverse_string=True)

894

895

def get_value(mdict):

896

# Object traversal

897

value = get_key(mdict['fields'])

898

# Negative

899

if mdict['negate']:

900

value = float_or_none(value)

901

if value is not None:

902

value *= -1

903

# Do maths

904

offset_key = mdict['maths']

905

if offset_key:

906

value = float_or_none(value)

operator = None

while offset_key:

item = re.match(

MATH_FIELD_RE if operator else MATH_OPERATORS_RE,

911

offset_key).group(0)

912

offset_key = offset_key[len(item):]

913

if operator is None:

914

operator = MATH_FUNCTIONS[item]

915

continue

916

item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)

917

offset = float_or_none(item)

918

if offset is None:

919

offset = float_or_none(get_key(item))

920

try:

921

value = operator(value, multiplier * offset)

922

except (TypeError, ZeroDivisionError):

923

return None

924

operator = None

925

# Datetime formatting

926

if mdict['strf_format']:

927

value = strftime_or_none(value, mdict['strf_format'])

return value

def create_key(outer_mobj):

932

if not outer_mobj.group('has_key'):

933

return '%{}'.format(outer_mobj.group(0))

934

935

key = outer_mobj.group('key')

936

fmt = outer_mobj.group('format')

937

mobj = re.match(INTERNAL_FORMAT_RE, key)

938

if mobj is None:

939

value, default, mobj = None, na, {'fields': ''}

940

else:

941

mobj = mobj.groupdict()

942

default = mobj['default'] if mobj['default'] is not None else na

943

value = get_value(mobj)

944

945

if fmt == 's' and value is not None and key in field_size_compat_map.keys():

946

fmt = '0{:d}d'.format(field_size_compat_map[key])

947

948

value = default if value is None else value

949

950

if fmt == 'c':

951

value = compat_str(value)

952

if value is None:

953

value, fmt = default, 's'

954

else:

955

value = value[0]

956

elif fmt[-1] not in 'rs': # numeric

957

value = float_or_none(value)

958

if value is None:

959

value, fmt = default, 's'

960

if sanitize:

961

if fmt[-1] == 'r':

962

# If value is an object, sanitize might convert it to a string

963

# So we convert it to repr first

964

value, fmt = repr(value), '%ss' % fmt[:-1]

965

if fmt[-1] in 'csr':

966

value = sanitize(mobj['fields'].split('.')[-1], value)

967

key += '\0%s' % fmt

968

TMPL_DICT[key] = value

969

return '%({key}){fmt}'.format(key=key, fmt=fmt)

970

971

return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT

972

973

def _prepare_filename(self, info_dict, tmpl_type='default'):

974

try:

975

sanitize = lambda k, v: sanitize_filename(

976

compat_str(v),

977

restricted=self.params.get('restrictfilenames'),

978

is_id=(k == 'id' or k.endswith('_id')))

979

outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])

980

outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)

981

982

# expand_path translates '%%' into '%' and '$$' into '$'

983

# correspondingly that is not what we want since we need to keep

984

# '%%' intact for template dict substitution step. Working around

985

# with boundary-alike separator hack.

986

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

987

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

988

989

# outtmpl should be expand_path'ed before template dict substitution

990

# because meta fields may contain env variables we don't want to

991

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

992

# title "Hello $PATH", we don't want `$PATH` to be expanded.

993

filename = expand_path(outtmpl).replace(sep, '') % template_dict

994

995

force_ext = OUTTMPL_TYPES.get(tmpl_type)

996

if force_ext is not None:

997

filename = replace_extension(filename, force_ext, info_dict.get('ext'))

998

999

# https://github.com/blackjack4494/youtube-dlc/issues/85

1000

trim_file_name = self.params.get('trim_file_name', False)

1001

if trim_file_name:

1002

fn_groups = filename.rsplit('.')

1003

ext = fn_groups[-1]

1004

sub_ext = ''

1005

if len(fn_groups) > 2:

1006

sub_ext = fn_groups[-2]

1007

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

1008

1009

return filename

1010

except ValueError as err:

1011

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

1012

return None

1013

1014

def prepare_filename(self, info_dict, dir_type='', warn=False):

1015

"""Generate the output filename."""

1016

1017

filename = self._prepare_filename(info_dict, dir_type or 'default')

1018

1019

if warn and not self.__prepare_filename_warned:

1020

if not self.params.get('paths'):

1021

pass

1022

elif filename == '-':

1023

self.report_warning('--paths is ignored when an outputting to stdout')

1024

elif os.path.isabs(filename):

1025

self.report_warning('--paths is ignored since an absolute path is given in output template')

1026

self.__prepare_filename_warned = True

1027

if filename == '-' or not filename:

1028

return filename

1029

1030

return self.get_output_path(dir_type, filename)

1031

1032

def _match_entry(self, info_dict, incomplete=False, silent=False):

1033

""" Returns None if the file should be downloaded """

1034

1035

video_title = info_dict.get('title', info_dict.get('id', 'video'))

1036

1037

def check_filter():

1038

if 'title' in info_dict:

1039

# This can happen when we're just evaluating the playlist

1040

title = info_dict['title']

1041

matchtitle = self.params.get('matchtitle', False)

1042

if matchtitle:

1043

if not re.search(matchtitle, title, re.IGNORECASE):

1044

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

1045

rejecttitle = self.params.get('rejecttitle', False)

1046

if rejecttitle:

1047

if re.search(rejecttitle, title, re.IGNORECASE):

1048

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

1049

date = info_dict.get('upload_date')

1050

if date is not None:

1051

dateRange = self.params.get('daterange', DateRange())

1052

if date not in dateRange:

1053

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

1054

view_count = info_dict.get('view_count')

1055

if view_count is not None:

1056

min_views = self.params.get('min_views')

1057

if min_views is not None and view_count < min_views:

1058

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

1059

max_views = self.params.get('max_views')

1060

if max_views is not None and view_count > max_views:

1061

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

1062

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

1063

return 'Skipping "%s" because it is age restricted' % video_title

1064

1065

if not incomplete:

1066

match_filter = self.params.get('match_filter')

1067

if match_filter is not None:

1068

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

if self.in_download_archive(info_dict):

1074

reason = '%s has already been recorded in the archive' % video_title

1075

break_opt, break_err = 'break_on_existing', ExistingVideoReached

1076

else:

1077

reason = check_filter()

1078

break_opt, break_err = 'break_on_reject', RejectedVideoReached

1079

if reason is not None:

1080

if not silent:

1081

self.to_screen('[download] ' + reason)

1082

if self.params.get(break_opt, False):

raise break_err()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

1088

'''Set the keys from extra_info in info dict if they are missing'''

1089

for key, value in extra_info.items():

1090

info_dict.setdefault(key, value)

1091

1092

def extract_info(self, url, download=True, ie_key=None, extra_info={},

1093

process=True, force_generic_extractor=False):

1094

"""

1095

Return a list with a dictionary for each video extracted.

1096

1097

Arguments:

1098

url -- URL to extract

1099

1100

Keyword arguments:

1101

download -- whether to download videos during extraction

1102

ie_key -- extractor key hint

1103

extra_info -- dictionary containing the extra values to add to each result

1104

process -- whether to resolve all unresolved references (URLs, playlist items),

1105

must be True for download to work.

1106

force_generic_extractor -- force using the generic extractor

1107

"""

1108

1109

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

continue

ie_key = ie.ie_key()

ie = self.get_info_extractor(ie_key)

1123

if not ie.working():

1124

self.report_warning('The program functionality for this site has been marked as broken, '

1125

'and will probably not work.')

1126

1127

try:

1128

temp_id = str_or_none(

1129

ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))

1130

else ie._match_id(url))

1131

except (AssertionError, IndexError, AttributeError):

1132

temp_id = None

1133

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

1134

self.to_screen("[%s] %s: has already been recorded in archive" % (

1135

ie_key, temp_id))

1136

break

1137

return self.__extract_info(url, ie, download, extra_info, process)

1138

else:

1139

self.report_error('no suitable InfoExtractor for URL %s' % url)

1140

1141

def __handle_extraction_exceptions(func):

1142

def wrapper(self, *args, **kwargs):

1143

try:

1144

return func(self, *args, **kwargs)

1145

except GeoRestrictedError as e:

1146

msg = e.msg

1147

if e.countries:

1148

msg += '\nThis video is available in %s.' % ', '.join(

1149

map(ISO3166Utils.short2full, e.countries))

1150

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

1151

self.report_error(msg)

1152

except ExtractorError as e: # An error we somewhat expected

1153

self.report_error(compat_str(e), e.format_traceback())

1154

except ThrottledDownload:

1155

self.to_stderr('\r')

1156

self.report_warning('The download speed is below throttle limit. Re-extracting data')

1157

return wrapper(self, *args, **kwargs)

1158

except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):

1159

raise

1160

except Exception as e:

1161

if self.params.get('ignoreerrors', False):

1162

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

1168

def __extract_info(self, url, ie, download, extra_info, process):

1169

ie_result = ie.extract(url)

1170

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

1171

return

1172

if isinstance(ie_result, list):

1173

# Backwards compatibility: old IE result format

1174

ie_result = {

1175

'_type': 'compat_list',

1176

'entries': ie_result,

1177

}

1178

self.add_default_extra_info(ie_result, ie, url)

1179

if process:

1180

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

1185

if url is not None:

1186

self.add_extra_info(ie_result, {

1187

'webpage_url': url,

1188

'original_url': url,

1189

'webpage_url_basename': url_basename(url),

1190

})

1191

if ie is not None:

1192

self.add_extra_info(ie_result, {

1193

'extractor': ie.IE_NAME,

1194

'extractor_key': ie.ie_key(),

1195

})

1196

1197

def process_ie_result(self, ie_result, download=True, extra_info={}):

1198

"""

1199

Take the result of the ie(may be modified) and resolve all unresolved

1200

references (URLs, playlist items).

1201

1202

It will also download the videos if 'download'.

1203

Returns the resolved ie_result.

1204

"""

1205

result_type = ie_result.get('_type', 'video')

1206

1207

if result_type in ('url', 'url_transparent'):

1208

ie_result['url'] = sanitize_url(ie_result['url'])

1209

extract_flat = self.params.get('extract_flat', False)

1210

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

1211

or extract_flat is True):

1212

info_copy = ie_result.copy()

1213

self.add_extra_info(info_copy, extra_info)

1214

ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)

1215

self.add_default_extra_info(info_copy, ie, ie_result['url'])

1216

self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)

1217

return ie_result

1218

1219

if result_type == 'video':

1220

self.add_extra_info(ie_result, extra_info)

1221

ie_result = self.process_video_result(ie_result, download=download)

1222

additional_urls = (ie_result or {}).get('additional_urls')

1223

if additional_urls:

1224

# TODO: Improve MetadataFromFieldPP to allow setting a list

1225

if isinstance(additional_urls, compat_str):

1226

additional_urls = [additional_urls]

1227

self.to_screen(

1228

'[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))

1229

self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))

1230

ie_result['additional_entries'] = [

1231

self.extract_info(

1232

url, download, extra_info,

1233

force_generic_extractor=self.params.get('force_generic_extractor'))

1234

for url in additional_urls

1235

]

1236

return ie_result

1237

elif result_type == 'url':

1238

# We have to add extra_info to the results because it may be

1239

# contained in a playlist

1240

return self.extract_info(

1241

ie_result['url'], download,

1242

ie_key=ie_result.get('ie_key'),

1243

extra_info=extra_info)

1244

elif result_type == 'url_transparent':

1245

# Use the information from the embedding page

1246

info = self.extract_info(

1247

ie_result['url'], ie_key=ie_result.get('ie_key'),

1248

extra_info=extra_info, download=False, process=False)

1249

1250

# extract_info may return None when ignoreerrors is enabled and

1251

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

1257

(k, v) for k, v in ie_result.items() if v is not None)

1258

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1259

if f in force_properties:

1260

del force_properties[f]

1261

new_result = info.copy()

1262

new_result.update(force_properties)

1263

1264

# Extracted info may not be a video result (i.e.

1265

# info.get('_type', 'video') != video) but rather an url or

1266

# url_transparent. In such cases outer metadata (from ie_result)

1267

# should be propagated to inner one (info). For this to happen

1268

# _type of info should be overridden with url_transparent. This

1269

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1270

if new_result.get('_type') == 'url':

1271

new_result['_type'] = 'url_transparent'

1272

1273

return self.process_ie_result(

1274

new_result, download=download, extra_info=extra_info)

1275

elif result_type in ('playlist', 'multi_video'):

1276

# Protect from infinite recursion due to recursively nested playlists

1277

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1278

webpage_url = ie_result['webpage_url']

1279

if webpage_url in self._playlist_urls:

1280

self.to_screen(

1281

'[download] Skipping already downloaded playlist: %s'

1282

% ie_result.get('title') or ie_result.get('id'))

1283

return

1284

1285

self._playlist_level += 1

1286

self._playlist_urls.add(webpage_url)

1287

self._sanitize_thumbnails(ie_result)

1288

try:

1289

return self.__process_playlist(ie_result, download)

1290

finally:

1291

self._playlist_level -= 1

1292

if not self._playlist_level:

1293

self._playlist_urls.clear()

1294

elif result_type == 'compat_list':

1295

self.report_warning(

1296

'Extractor %s returned a compat_list result. '

1297

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1304

'webpage_url': ie_result['webpage_url'],

1305

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1306

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1311

self.process_ie_result(_fixup(r), download, extra_info)

1312

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1317

1318

def _ensure_dir_exists(self, path):

1319

return make_dir(path, self.report_error)

1320

1321

def __process_playlist(self, ie_result, download):

1322

# We process each entry in the playlist

1323

playlist = ie_result.get('title') or ie_result.get('id')

1324

self.to_screen('[download] Downloading playlist: %s' % playlist)

1325

1326

if 'entries' not in ie_result:

1327

raise EntryNotInPlaylist()

1328

incomplete_entries = bool(ie_result.get('requested_entries'))

1329

if incomplete_entries:

1330

def fill_missing_entries(entries, indexes):

1331

ret = [None] * max(*indexes)

1332

for i, entry in zip(indexes, entries):

1333

ret[i - 1] = entry

1334

return ret

1335

ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])

1336

1337

playlist_results = []

1338

1339

playliststart = self.params.get('playliststart', 1)

1340

playlistend = self.params.get('playlistend')

1341

# For backwards compatibility, interpret -1 as whole list

1342

if playlistend == -1:

1343

playlistend = None

1344

1345

playlistitems_str = self.params.get('playlist_items')

1346

playlistitems = None

1347

if playlistitems_str is not None:

1348

def iter_playlistitems(format):

1349

for string_segment in format.split(','):

1350

if '-' in string_segment:

1351

start, end = string_segment.split('-')

1352

for item in range(int(start), int(end) + 1):

1353

yield int(item)

1354

else:

1355

yield int(string_segment)

1356

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1357

1358

ie_entries = ie_result['entries']

1359

msg = (

1360

'Downloading %d videos' if not isinstance(ie_entries, list)

1361

else 'Collected %d videos; downloading %%d of them' % len(ie_entries))

1362

if not isinstance(ie_entries, (list, PagedList)):

1363

ie_entries = LazyList(ie_entries)

1364

1365

entries = []

1366

for i in playlistitems or itertools.count(playliststart):

1367

if playlistitems is None and playlistend is not None and playlistend < i:

break

entry = None

try:

entry = ie_entries[i - 1]

1372

if entry is None:

1373

raise EntryNotInPlaylist()

1374

except (IndexError, EntryNotInPlaylist):

1375

if incomplete_entries:

1376

raise EntryNotInPlaylist()

1377

elif not playlistitems:

1378

break

1379

entries.append(entry)

1380

try:

1381

if entry is not None:

1382

self._match_entry(entry, incomplete=True, silent=True)

1383

except (ExistingVideoReached, RejectedVideoReached):

1384

break

1385

ie_result['entries'] = entries

1386

1387

# Save playlist_index before re-ordering

1388

entries = [

1389

((playlistitems[i - 1] if playlistitems else i), entry)

1390

for i, entry in enumerate(entries, 1)

1391

if entry is not None]

1392

n_entries = len(entries)

1393

1394

if not playlistitems and (playliststart or playlistend):

1395

playlistitems = list(range(playliststart, playliststart + n_entries))

1396

ie_result['requested_entries'] = playlistitems

1397

1398

if self.params.get('allow_playlist_files', True):

1399

ie_copy = {

1400

'playlist': playlist,

1401

'playlist_id': ie_result.get('id'),

1402

'playlist_title': ie_result.get('title'),

1403

'playlist_uploader': ie_result.get('uploader'),

1404

'playlist_uploader_id': ie_result.get('uploader_id'),

1405

'playlist_index': 0,

1406

}

1407

ie_copy.update(dict(ie_result))

1408

1409

if self.params.get('writeinfojson', False):

1410

infofn = self.prepare_filename(ie_copy, 'pl_infojson')

1411

if not self._ensure_dir_exists(encodeFilename(infofn)):

1412

return

1413

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):

1414

self.to_screen('[info] Playlist metadata is already present')

1415

else:

1416

self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)

1417

try:

1418

write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)

1419

except (OSError, IOError):

1420

self.report_error('Cannot write playlist metadata to JSON file ' + infofn)

1421

1422

# TODO: This should be passed to ThumbnailsConvertor if necessary

1423

self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))

1424

1425

if self.params.get('writedescription', False):

1426

descfn = self.prepare_filename(ie_copy, 'pl_description')

1427

if not self._ensure_dir_exists(encodeFilename(descfn)):

1428

return

1429

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):

1430

self.to_screen('[info] Playlist description is already present')

1431

elif ie_result.get('description') is None:

1432

self.report_warning('There\'s no playlist description to write.')

1433

else:

1434

try:

1435

self.to_screen('[info] Writing playlist description to: ' + descfn)

1436

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1437

descfile.write(ie_result['description'])

1438

except (OSError, IOError):

1439

self.report_error('Cannot write playlist description file ' + descfn)

1440

return

1441

1442

if self.params.get('playlistreverse', False):

1443

entries = entries[::-1]

1444

if self.params.get('playlistrandom', False):

1445

random.shuffle(entries)

1446

1447

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1448

1449

self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))

1450

failures = 0

1451

max_failures = self.params.get('skip_playlist_after_errors') or float('inf')

1452

for i, entry_tuple in enumerate(entries, 1):

1453

playlist_index, entry = entry_tuple

1454

if 'playlist_index' in self.params.get('compat_options', []):

1455

playlist_index = playlistitems[i - 1] if playlistitems else i

1456

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1457

# This __x_forwarded_for_ip thing is a bit ugly but requires

1458

# minimal changes

1459

if x_forwarded_for:

1460

entry['__x_forwarded_for_ip'] = x_forwarded_for

1461

extra = {

1462

'n_entries': n_entries,

1463

'_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),

1464

'playlist_index': playlist_index,

1465

'playlist_autonumber': i,

1466

'playlist': playlist,

1467

'playlist_id': ie_result.get('id'),

1468

'playlist_title': ie_result.get('title'),

1469

'playlist_uploader': ie_result.get('uploader'),

1470

'playlist_uploader_id': ie_result.get('uploader_id'),

1471

'extractor': ie_result['extractor'],

1472

'webpage_url': ie_result['webpage_url'],

1473

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1474

'extractor_key': ie_result['extractor_key'],

1475

}

1476

1477

if self._match_entry(entry, incomplete=True) is not None:

1478

continue

1479

1480

entry_result = self.__process_iterable_entry(entry, download, extra)

1481

if not entry_result:

1482

failures += 1

1483

if failures >= max_failures:

1484

self.report_error(

1485

'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))

1486

break

1487

# TODO: skip failed (empty) entries?

1488

playlist_results.append(entry_result)

1489

ie_result['entries'] = playlist_results

1490

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1491

return ie_result

1492

1493

@__handle_extraction_exceptions

1494

def __process_iterable_entry(self, entry, download, extra_info):

1495

return self.process_ie_result(

1496

entry, download=download, extra_info=extra_info)

1497

1498

def _build_format_filter(self, filter_spec):

1499

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1510

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*

1511

(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1512

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*

1513

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1514

m = operator_rex.fullmatch(filter_spec)

1515

if m:

1516

try:

1517

comparison_value = int(m.group('value'))

1518

except ValueError:

1519

comparison_value = parse_filesize(m.group('value'))

1520

if comparison_value is None:

1521

comparison_value = parse_filesize(m.group('value') + 'B')

1522

if comparison_value is None:

1523

raise ValueError(

1524

'Invalid value %r in format specification %r' % (

1525

m.group('value'), filter_spec))

1526

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1532

'$=': lambda attr, value: attr.endswith(value),

1533

'*=': lambda attr, value: value in attr,

1534

}

1535

str_operator_rex = re.compile(r'''(?x)\s*

1536

(?P<key>[a-zA-Z0-9._-]+)\s*

1537

(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1538

(?P<value>[a-zA-Z0-9._-]+)\s*

1539

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1540

m = str_operator_rex.fullmatch(filter_spec)

1541

if m:

1542

comparison_value = m.group('value')

1543

str_op = STR_OPERATORS[m.group('op')]

1544

if m.group('negation'):

1545

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise SyntaxError('Invalid filter specification %r' % filter_spec)

1551

1552

def _filter(f):

1553

actual_value = f.get(m.group('key'))

1554

if actual_value is None:

1555

return m.group('none_inclusive')

1556

return op(actual_value, comparison_value)

1557

return _filter

1558

1559

def _default_format_spec(self, info_dict, download=True):

1560

1561

def can_merge():

1562

merger = FFmpegMergerPP(self)

1563

return merger.available and merger.can_merge()

1564

1565

prefer_best = (

1566

not self.params.get('simulate', False)

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1571

or self.outtmpl_dict['default'] == '-'))

1572

compat = (

1573

prefer_best

1574

or self.params.get('allow_multiple_audio_streams', False)

1575

or 'format-spec' in self.params.get('compat_opts', []))

1576

1577

return (

1578

'best/bestvideo+bestaudio' if prefer_best

1579

else 'bestvideo*+bestaudio/best' if not compat

1580

else 'bestvideo+bestaudio/best')

1581

1582

def build_format_selector(self, format_spec):

1583

def syntax_error(note, start):

1584

message = (

1585

'Invalid format specification: '

1586

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1587

return SyntaxError(message)

1588

1589

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1594

1595

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1596

'video': self.params.get('allow_multiple_video_streams', False)}

1597

1598

check_formats = self.params.get('check_formats')

1599

1600

def _parse_filter(tokens):

1601

filter_parts = []

1602

for type, string, start, _, _ in tokens:

1603

if type == tokenize.OP and string == ']':

1604

return ''.join(filter_parts)

1605

else:

1606

filter_parts.append(string)

1607

1608

def _remove_unused_ops(tokens):

1609

# Remove operators that we don't use and join them with the surrounding strings

1610

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1611

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1612

last_string, last_start, last_end, last_line = None, None, None, None

1613

for type, string, start, end, line in tokens:

1614

if type == tokenize.OP and string == '[':

1615

if last_string:

1616

yield tokenize.NAME, last_string, last_start, last_end, last_line

1617

last_string = None

1618

yield type, string, start, end, line

1619

# everything inside brackets will be handled by _parse_filter

1620

for type, string, start, end, line in tokens:

1621

yield type, string, start, end, line

1622

if type == tokenize.OP and string == ']':

1623

break

1624

elif type == tokenize.OP and string in ALLOWED_OPS:

1625

if last_string:

1626

yield tokenize.NAME, last_string, last_start, last_end, last_line

1627

last_string = None

1628

yield type, string, start, end, line

1629

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1636

if last_string:

1637

yield tokenize.NAME, last_string, last_start, last_end, last_line

1638

1639

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1640

selectors = []

1641

current_selector = None

1642

for type, string, start, _, _ in tokens:

1643

# ENCODING is only defined in python 3.x

1644

if type == getattr(tokenize, 'ENCODING', None):

1645

continue

1646

elif type in [tokenize.NAME, tokenize.NUMBER]:

1647

current_selector = FormatSelector(SINGLE, string, [])

1648

elif type == tokenize.OP:

1649

if string == ')':

1650

if not inside_group:

1651

# ')' will be handled by the parentheses group

1652

tokens.restore_last_token()

1653

break

1654

elif inside_merge and string in ['/', ',']:

1655

tokens.restore_last_token()

1656

break

1657

elif inside_choice and string == ',':

1658

tokens.restore_last_token()

1659

break

1660

elif string == ',':

1661

if not current_selector:

1662

raise syntax_error('"," must follow a format selector', start)

1663

selectors.append(current_selector)

1664

current_selector = None

1665

elif string == '/':

1666

if not current_selector:

1667

raise syntax_error('"/" must follow a format selector', start)

1668

first_choice = current_selector

1669

second_choice = _parse_format_selection(tokens, inside_choice=True)

1670

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1671

elif string == '[':

1672

if not current_selector:

1673

current_selector = FormatSelector(SINGLE, 'best', [])

1674

format_filter = _parse_filter(tokens)

1675

current_selector.filters.append(format_filter)

1676

elif string == '(':

1677

if current_selector:

1678

raise syntax_error('Unexpected "("', start)

1679

group = _parse_format_selection(tokens, inside_group=True)

1680

current_selector = FormatSelector(GROUP, group, [])

1681

elif string == '+':

1682

if not current_selector:

1683

raise syntax_error('Unexpected "+"', start)

1684

selector_1 = current_selector

1685

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1686

if not selector_2:

1687

raise syntax_error('Expected a selector', start)

1688

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1689

else:

1690

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1691

elif type == tokenize.ENDMARKER:

1692

break

1693

if current_selector:

1694

selectors.append(current_selector)

1695

return selectors

1696

1697

def _merge(formats_pair):

1698

format_1, format_2 = formats_pair

1699

1700

formats_info = []

1701

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1702

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1703

1704

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1705

get_no_more = {'video': False, 'audio': False}

1706

for (i, fmt_info) in enumerate(formats_info):

1707

if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':

1708

formats_info.pop(i)

1709

continue

1710

for aud_vid in ['audio', 'video']:

1711

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1712

if get_no_more[aud_vid]:

1713

formats_info.pop(i)

1714

get_no_more[aud_vid] = True

1715

1716

if len(formats_info) == 1:

1717

return formats_info[0]

1718

1719

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1720

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1721

1722

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1723

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1724

1725

output_ext = self.params.get('merge_output_format')

1726

if not output_ext:

1727

if the_only_video:

1728

output_ext = the_only_video['ext']

1729

elif the_only_audio and not video_fmts:

1730

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1736

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1737

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1744

'height': the_only_video.get('height'),

1745

'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),

1746

'fps': the_only_video.get('fps'),

1747

'vcodec': the_only_video.get('vcodec'),

1748

'vbr': the_only_video.get('vbr'),

1749

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1755

'abr': the_only_audio.get('abr'),

})

return new_dict

def _check_formats(formats):

1761

for f in formats:

1762

self.to_screen('[info] Testing format %s' % f['format_id'])

1763

temp_file = tempfile.NamedTemporaryFile(

1764

suffix='.tmp', delete=False,

1765

dir=self.get_output_path('temp') or None)

1766

temp_file.close()

1767

try:

1768

dl, _ = self.dl(temp_file.name, f, test=True)

1769

except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:

1770

dl = False

1771

finally:

1772

if os.path.exists(temp_file.name):

1773

try:

1774

os.remove(temp_file.name)

1775

except OSError:

1776

self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)

if dl:

yield f

else:

self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])

1781

1782

def _build_selector_function(selector):

1783

if isinstance(selector, list): # ,

1784

fs = [_build_selector_function(s) for s in selector]

1785

1786

def selector_function(ctx):

1787

for f in fs:

1788

for format in f(ctx):

1789

yield format

1790

return selector_function

1791

1792

elif selector.type == GROUP: # ()

1793

selector_function = _build_selector_function(selector.selector)

1794

1795

elif selector.type == PICKFIRST: # /

1796

fs = [_build_selector_function(s) for s in selector.selector]

1797

1798

def selector_function(ctx):

1799

for f in fs:

1800

picked_formats = list(f(ctx))

1801

if picked_formats:

1802

return picked_formats

1803

return []

1804

1805

elif selector.type == SINGLE: # atom

1806

format_spec = selector.selector or 'best'

1807

1808

# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector

1809

if format_spec == 'all':

1810

def selector_function(ctx):

1811

formats = list(ctx['formats'])

1812

if check_formats:

1813

formats = _check_formats(formats)

1814

for f in formats:

1815

yield f

1816

elif format_spec == 'mergeall':

1817

def selector_function(ctx):

1818

formats = ctx['formats']

1819

if check_formats:

1820

formats = list(_check_formats(formats))

1821

if not formats:

1822

return

1823

merged_format = formats[-1]

1824

for f in formats[-2::-1]:

1825

merged_format = _merge((merged_format, f))

yield merged_format

else:

format_fallback, format_reverse, format_idx = False, True, 1

mobj = re.match(

format_spec)

if mobj is not None:

format_idx = int_or_none(mobj.group('n'), default=1)

1835

format_reverse = mobj.group('bw')[0] == 'b'

1836

format_type = (mobj.group('type') or [None])[0]

1837

not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)

1838

format_modified = mobj.group('mod') is not None

1839

1840

format_fallback = not format_type and not format_modified # for b, w

1841

_filter_f = (

1842

(lambda f: f.get('%scodec' % format_type) != 'none')

1843

if format_type and format_modified # bv*, ba*, wv*, wa*

1844

else (lambda f: f.get('%scodec' % not_format_type) == 'none')

1845

if format_type # bv, ba, wv, wa

1846

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1847

if not format_modified # b, w

1848

else lambda f: True) # b*, w*

1849

filter_f = lambda f: _filter_f(f) and (

1850

f.get('vcodec') != 'none' or f.get('acodec') != 'none')

1851

else:

1852

filter_f = ((lambda f: f.get('ext') == format_spec)

1853

if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension

1854

else (lambda f: f.get('format_id') == format_spec)) # id

1855

1856

def selector_function(ctx):

1857

formats = list(ctx['formats'])

1858

if not formats:

1859

return

1860

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1861

if format_fallback and ctx['incomplete_formats'] and not matches:

1862

# for extractors with incomplete formats (audio only (soundcloud)

1863

# or video only (imgur)) best/worst will fallback to

1864

# best/worst {video,audio}-only format

1865

matches = formats

1866

if format_reverse:

1867

matches = matches[::-1]

1868

if check_formats:

1869

matches = list(itertools.islice(_check_formats(matches), format_idx))

1870

n = len(matches)

1871

if -n <= format_idx - 1 < n:

1872

yield matches[format_idx - 1]

1873

1874

elif selector.type == MERGE: # +

1875

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1876

1877

def selector_function(ctx):

1878

for pair in itertools.product(

1879

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1880

yield _merge(pair)

1881

1882

filters = [self._build_format_filter(f) for f in selector.filters]

1883

1884

def final_selector(ctx):

1885

ctx_copy = copy.deepcopy(ctx)

1886

for _filter in filters:

1887

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1888

return selector_function(ctx_copy)

1889

return final_selector

1890

1891

stream = io.BytesIO(format_spec.encode('utf-8'))

1892

try:

1893

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1894

except tokenize.TokenError:

1895

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1896

1897

class TokenIterator(object):

1898

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1907

raise StopIteration()

1908

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1915

self.counter -= 1

1916

1917

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1918

return _build_selector_function(parsed_selector)

1919

1920

def _calc_headers(self, info_dict):

1921

res = std_headers.copy()

1922

1923

add_headers = info_dict.get('http_headers')

1924

if add_headers:

1925

res.update(add_headers)

1926

1927

cookies = self._calc_cookies(info_dict)

1928

if cookies:

1929

res['Cookie'] = cookies

1930

1931

if 'X-Forwarded-For' not in res:

1932

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1933

if x_forwarded_for_ip:

1934

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1939

pr = sanitized_Request(info_dict['url'])

1940

self.cookiejar.add_cookie_header(pr)

1941

return pr.get_header('Cookie')

1942

1943

def _sanitize_thumbnails(self, info_dict):

1944

thumbnails = info_dict.get('thumbnails')

1945

if thumbnails is None:

1946

thumbnail = info_dict.get('thumbnail')

1947

if thumbnail:

1948

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1949

if thumbnails:

1950

thumbnails.sort(key=lambda t: (

1951

t.get('preference') if t.get('preference') is not None else -1,

1952

t.get('width') if t.get('width') is not None else -1,

1953

t.get('height') if t.get('height') is not None else -1,

1954

t.get('id') if t.get('id') is not None else '',

1955

t.get('url')))

1956

1957

def test_thumbnail(t):

1958

self.to_screen('[info] Testing thumbnail %s' % t['id'])

1959

try:

1960

self.urlopen(HEADRequest(t['url']))

1961

except network_exceptions as err:

1962

self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (

1963

t['id'], t['url'], error_to_compat_str(err)))

return False

return True

for i, t in enumerate(thumbnails):

1968

if t.get('id') is None:

1969

t['id'] = '%d' % i

1970

if t.get('width') and t.get('height'):

1971

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1972

t['url'] = sanitize_url(t['url'])

1973

if self.params.get('check_formats'):

1974

info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1])))

1975

1976

def process_video_result(self, info_dict, download=True):

1977

assert info_dict.get('_type', 'video') == 'video'

1978

1979

if 'id' not in info_dict:

1980

raise ExtractorError('Missing "id" field in extractor result')

1981

if 'title' not in info_dict:

1982

raise ExtractorError('Missing "title" field in extractor result')

1983

1984

def report_force_conversion(field, field_not, conversion):

1985

self.report_warning(

1986

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1987

% (field, field_not, conversion))

1988

1989

def sanitize_string_field(info, string_field):

1990

field = info.get(string_field)

1991

if field is None or isinstance(field, compat_str):

1992

return

1993

report_force_conversion(string_field, 'a string', 'string')

1994

info[string_field] = compat_str(field)

1995

1996

def sanitize_numeric_fields(info):

1997

for numeric_field in self._NUMERIC_FIELDS:

1998

field = info.get(numeric_field)

1999

if field is None or isinstance(field, compat_numeric_types):

2000

continue

2001

report_force_conversion(numeric_field, 'numeric', 'int')

2002

info[numeric_field] = int_or_none(field)

2003

2004

sanitize_string_field(info_dict, 'id')

2005

sanitize_numeric_fields(info_dict)

2006

2007

if 'playlist' not in info_dict:

2008

# It isn't part of a playlist

2009

info_dict['playlist'] = None

2010

info_dict['playlist_index'] = None

2011

2012

self._sanitize_thumbnails(info_dict)

2013

2014

if self.params.get('list_thumbnails'):

2015

self.list_thumbnails(info_dict)

2016

return

2017

2018

thumbnail = info_dict.get('thumbnail')

2019

thumbnails = info_dict.get('thumbnails')

2020

if thumbnail:

2021

info_dict['thumbnail'] = sanitize_url(thumbnail)

2022

elif thumbnails:

2023

info_dict['thumbnail'] = thumbnails[-1]['url']

2024

2025

if 'display_id' not in info_dict and 'id' in info_dict:

2026

info_dict['display_id'] = info_dict['id']

2027

2028

for ts_key, date_key in (

2029

('timestamp', 'upload_date'),

2030

('release_timestamp', 'release_date'),

2031

):

2032

if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:

2033

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

2034

# see http://bugs.python.org/issue1646728)

2035

try:

2036

upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])

2037

info_dict[date_key] = upload_date.strftime('%Y%m%d')

2038

except (ValueError, OverflowError, OSError):

2039

pass

2040

2041

# Auto generate title fields corresponding to the *_number fields when missing

2042

# in order to always have clean titles. This is very common for TV series.

2043

for field in ('chapter', 'season', 'episode'):

2044

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

2045

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

2046

2047

for cc_kind in ('subtitles', 'automatic_captions'):

2048

cc = info_dict.get(cc_kind)

2049

if cc:

2050

for _, subtitle in cc.items():

2051

for subtitle_format in subtitle:

2052

if subtitle_format.get('url'):

2053

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

2054

if subtitle_format.get('ext') is None:

2055

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

2056

2057

automatic_captions = info_dict.get('automatic_captions')

2058

subtitles = info_dict.get('subtitles')

2059

2060

if self.params.get('listsubtitles', False):

2061

if 'automatic_captions' in info_dict:

2062

self.list_subtitles(

2063

info_dict['id'], automatic_captions, 'automatic captions')

2064

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

2065

return

2066

2067

info_dict['requested_subtitles'] = self.process_subtitles(

2068

info_dict['id'], subtitles, automatic_captions)

2069

2070

# We now pick which formats have to be downloaded

2071

if info_dict.get('formats') is None:

2072

# There's only one format available

2073

formats = [info_dict]

2074

else:

2075

formats = info_dict['formats']

2076

2077

if not formats:

2078

if not self.params.get('ignore_no_formats_error'):

2079

raise ExtractorError('No video formats found!')

2080

else:

2081

self.report_warning('No video formats found!')

2082

2083

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

2088

'there is an error in extractor')

2089

return False

2090

if isinstance(url, bytes):

2091

sanitize_string_field(f, 'url')

2092

return True

2093

2094

# Filter out malformed formats for better extraction robustness

2095

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

2100

for i, format in enumerate(formats):

2101

sanitize_string_field(format, 'format_id')

2102

sanitize_numeric_fields(format)

2103

format['url'] = sanitize_url(format['url'])

2104

if not format.get('format_id'):

2105

format['format_id'] = compat_str(i)

2106

else:

2107

# Sanitize format_id from characters used in format selector expression

2108

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

2109

format_id = format['format_id']

2110

if format_id not in formats_dict:

2111

formats_dict[format_id] = []

2112

formats_dict[format_id].append(format)

2113

2114

# Make sure all formats have unique format_id

2115

for format_id, ambiguous_formats in formats_dict.items():

2116

if len(ambiguous_formats) > 1:

2117

for i, format in enumerate(ambiguous_formats):

2118

format['format_id'] = '%s-%d' % (format_id, i)

2119

2120

for i, format in enumerate(formats):

2121

if format.get('format') is None:

2122

format['format'] = '{id} - {res}{note}'.format(

2123

id=format['format_id'],

2124

res=self.format_resolution(format),

2125

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

2126

)

2127

# Automatically determine file extension if missing

2128

if format.get('ext') is None:

2129

format['ext'] = determine_ext(format['url']).lower()

2130

# Automatically determine protocol if missing (useful for format

2131

# selection purposes)

2132

if format.get('protocol') is None:

2133

format['protocol'] = determine_protocol(format)

2134

# Add HTTP headers, so that external programs can use them from the

2135

# json output

2136

full_format_info = info_dict.copy()

2137

full_format_info.update(format)

2138

format['http_headers'] = self._calc_headers(full_format_info)

2139

# Remove private housekeeping stuff

2140

if '__x_forwarded_for_ip' in info_dict:

2141

del info_dict['__x_forwarded_for_ip']

2142

2143

# TODO Central sorting goes here

2144

2145

if formats and formats[0] is not info_dict:

2146

# only set the 'formats' fields if the original info_dict list them

2147

# otherwise we end up with a circular reference, the first (and unique)

2148

# element in the 'formats' field in info_dict is info_dict itself,

2149

# which can't be exported to json

2150

info_dict['formats'] = formats

2151

2152

info_dict, _ = self.pre_process(info_dict)

2153

2154

if self.params.get('listformats'):

2155

if not info_dict.get('formats'):

2156

raise ExtractorError('No video formats found', expected=True)

2157

self.list_formats(info_dict)

2158

return

2159

2160

format_selector = self.format_selector

2161

if format_selector is None:

2162

req_format = self._default_format_spec(info_dict, download=download)

2163

self.write_debug('Default format spec: %s' % req_format)

2164

format_selector = self.build_format_selector(req_format)

2165

2166

# While in format selection we may need to have an access to the original

2167

# format set in order to calculate some metrics or do some processing.

2168

# For now we need to be able to guess whether original formats provided

2169

# by extractor are incomplete or not (i.e. whether extractor provides only

2170

# video-only or audio-only formats) for proper formats selection for

2171

# extractors with such incomplete formats (see

2172

# https://github.com/ytdl-org/youtube-dl/pull/5556).

2173

# Since formats may be filtered during format selection and may not match

2174

# the original formats the results may be incorrect. Thus original formats

2175

# or pre-calculated metrics should be passed to format selection routines

2176

# as well.

2177

# We will pass a context object containing all necessary additional data

2178

# instead of just formats.

2179

# This fixes incorrect format selection issue (see

2180

# https://github.com/ytdl-org/youtube-dl/issues/10083).

2181

incomplete_formats = (

2182

# All formats are video-only or

2183

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

2184

# all formats are audio-only

2185

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

2190

}

2191

2192

formats_to_download = list(format_selector(ctx))

2193

if not formats_to_download:

2194

if not self.params.get('ignore_no_formats_error'):

2195

raise ExtractorError('Requested format is not available', expected=True)

2196

else:

2197

self.report_warning('Requested format is not available')

2198

# Process what we can, even without any available formats.

2199

self.process_info(dict(info_dict))

2200

elif download:

2201

self.to_screen(

2202

'[info] %s: Downloading %d format(s): %s' % (

2203

info_dict['id'], len(formats_to_download),

2204

", ".join([f['format_id'] for f in formats_to_download])))

2205

for fmt in formats_to_download:

2206

new_info = dict(info_dict)

2207

# Save a reference to the original info_dict so that it can be modified in process_info if needed

2208

new_info['__original_infodict'] = info_dict

2209

new_info.update(fmt)

2210

self.process_info(new_info)

2211

# We update the info dict with the best quality format (backwards compatibility)

2212

if formats_to_download:

2213

info_dict.update(formats_to_download[-1])

2214

return info_dict

2215

2216

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

2217

"""Select the requested subtitles and their format"""

2218

available_subs = {}

2219

if normal_subtitles and self.params.get('writesubtitles'):

2220

available_subs.update(normal_subtitles)

2221

if automatic_captions and self.params.get('writeautomaticsub'):

2222

for lang, cap_info in automatic_captions.items():

2223

if lang not in available_subs:

2224

available_subs[lang] = cap_info

2225

2226

if (not self.params.get('writesubtitles') and not

2227

self.params.get('writeautomaticsub') or not

available_subs):

return None

all_sub_langs = available_subs.keys()

2232

if self.params.get('allsubtitles', False):

2233

requested_langs = all_sub_langs

2234

elif self.params.get('subtitleslangs', False):

2235

requested_langs = set()

2236

for lang in self.params.get('subtitleslangs'):

2237

if lang == 'all':

2238

requested_langs.update(all_sub_langs)

2239

continue

2240

discard = lang[0] == '-'

2241

if discard:

2242

lang = lang[1:]

2243

current_langs = filter(re.compile(lang + '$').match, all_sub_langs)

2244

if discard:

2245

for lang in current_langs:

2246

requested_langs.discard(lang)

2247

else:

2248

requested_langs.update(current_langs)

2249

elif 'en' in available_subs:

2250

requested_langs = ['en']

2251

else:

2252

requested_langs = [list(all_sub_langs)[0]]

2253

self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))

2254

2255

formats_query = self.params.get('subtitlesformat', 'best')

2256

formats_preference = formats_query.split('/') if formats_query else []

2257

subs = {}

2258

for lang in requested_langs:

2259

formats = available_subs.get(lang)

2260

if formats is None:

2261

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

2262

continue

2263

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

2275

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

2280

def print_mandatory(field, actual_field=None):

2281

if actual_field is None:

2282

actual_field = field

2283

if (self.params.get('force%s' % field, False)

2284

and (not incomplete or info_dict.get(actual_field) is not None)):

2285

self.to_stdout(info_dict[actual_field])

2286

2287

def print_optional(field):

2288

if (self.params.get('force%s' % field, False)

2289

and info_dict.get(field) is not None):

2290

self.to_stdout(info_dict[field])

2291

2292

info_dict = info_dict.copy()

2293

if filename is not None:

2294

info_dict['filename'] = filename

2295

if info_dict.get('requested_formats') is not None:

2296

# For RTMP URLs, also include the playpath

2297

info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])

2298

elif 'url' in info_dict:

2299

info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')

2300

2301

for tmpl in self.params.get('forceprint', []):

2302

if re.match(r'\w+$', tmpl):

2303

tmpl = '%({})s'.format(tmpl)

2304

tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)

2305

self.to_stdout(tmpl % info_copy)

2306

2307

print_mandatory('title')

2308

print_mandatory('id')

2309

print_mandatory('url', 'urls')

2310

print_optional('thumbnail')

2311

print_optional('description')

2312

print_optional('filename')

2313

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

2314

self.to_stdout(formatSeconds(info_dict['duration']))

2315

print_mandatory('format')

2316

2317

if self.params.get('forcejson', False):

2318

self.post_extract(info_dict)

2319

self.to_stdout(json.dumps(info_dict, default=repr))

2320

2321

def dl(self, name, info, subtitle=False, test=False):

2322

2323

if test:

2324

verbose = self.params.get('verbose')

2325

params = {

2326

'test': True,

2327

'quiet': not verbose,

2328

'verbose': verbose,

2329

'noprogress': not verbose,

2330

'nopart': True,

2331

'skip_unavailable_fragments': False,

2332

'keep_fragments': False,

2333

'overwrites': True,

2334

'_no_ytdl_file': True,

}

else:

params = self.params

fd = get_suitable_downloader(info, params)(self, params)

2339

if not test:

2340

for ph in self._progress_hooks:

2341

fd.add_progress_hook(ph)

2342

urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])

2343

self.write_debug('Invoking downloader on "%s"' % urls)

2344

new_info = dict(info)

2345

if new_info.get('http_headers') is None:

2346

new_info['http_headers'] = self._calc_headers(new_info)

2347

return fd.download(name, new_info, subtitle)

2348

2349

def process_info(self, info_dict):

2350

"""Process a single resolved IE result."""

2351

2352

assert info_dict.get('_type', 'video') == 'video'

2353

2354

info_dict.setdefault('__postprocessors', [])

2355

2356

max_downloads = self.params.get('max_downloads')

2357

if max_downloads is not None:

2358

if self._num_downloads >= int(max_downloads):

2359

raise MaxDownloadsReached()

2360

2361

# TODO: backward compatibility, to be removed

2362

info_dict['fulltitle'] = info_dict['title']

2363

2364

if 'format' not in info_dict and 'ext' in info_dict:

2365

info_dict['format'] = info_dict['ext']

2366

2367

if self._match_entry(info_dict) is not None:

2368

return

2369

2370

self.post_extract(info_dict)

2371

self._num_downloads += 1

2372

2373

# info_dict['_filename'] needs to be set for backward compatibility

2374

info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)

2375

temp_filename = self.prepare_filename(info_dict, 'temp')

files_to_move = {}

# Forced printings

self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))

2380

2381

if self.params.get('simulate', False):

2382

if self.params.get('force_write_download_archive', False):

2383

self.record_download_archive(info_dict)

2384

2385

# Do nothing else if in simulate mode

2386

return

2387

2388

if full_filename is None:

2389

return

2390

2391

if not self._ensure_dir_exists(encodeFilename(full_filename)):

2392

return

2393

if not self._ensure_dir_exists(encodeFilename(temp_filename)):

2394

return

2395

2396

if self.params.get('writedescription', False):

2397

descfn = self.prepare_filename(info_dict, 'description')

2398

if not self._ensure_dir_exists(encodeFilename(descfn)):

2399

return

2400

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):

2401

self.to_screen('[info] Video description is already present')

2402

elif info_dict.get('description') is None:

2403

self.report_warning('There\'s no description to write.')

2404

else:

2405

try:

2406

self.to_screen('[info] Writing video description to: ' + descfn)

2407

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

2408

descfile.write(info_dict['description'])

2409

except (OSError, IOError):

2410

self.report_error('Cannot write description file ' + descfn)

2411

return

2412

2413

if self.params.get('writeannotations', False):

2414

annofn = self.prepare_filename(info_dict, 'annotation')

2415

if not self._ensure_dir_exists(encodeFilename(annofn)):

2416

return

2417

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

2418

self.to_screen('[info] Video annotations are already present')

2419

elif not info_dict.get('annotations'):

2420

self.report_warning('There are no annotations to write.')

2421

else:

2422

try:

2423

self.to_screen('[info] Writing video annotations to: ' + annofn)

2424

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

2425

annofile.write(info_dict['annotations'])

2426

except (KeyError, TypeError):

2427

self.report_warning('There are no annotations to write.')

2428

except (OSError, IOError):

2429

self.report_error('Cannot write annotations file: ' + annofn)

2430

return

2431

2432

subtitles_are_requested = any([self.params.get('writesubtitles', False),

2433

self.params.get('writeautomaticsub')])

2434

2435

if subtitles_are_requested and info_dict.get('requested_subtitles'):

2436

# subtitles download errors are already managed as troubles in relevant IE

2437

# that way it will silently go on when used with unsupporting IE

2438

subtitles = info_dict['requested_subtitles']

2439

# ie = self.get_info_extractor(info_dict['extractor_key'])

2440

for sub_lang, sub_info in subtitles.items():

2441

sub_format = sub_info['ext']

2442

sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))

2443

sub_filename_final = subtitles_filename(

2444

self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))

2445

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):

2446

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

2447

sub_info['filepath'] = sub_filename

2448

files_to_move[sub_filename] = sub_filename_final

2449

else:

2450

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

2451

if sub_info.get('data') is not None:

2452

try:

2453

# Use newline='' to prevent conversion of newline characters

2454

# See https://github.com/ytdl-org/youtube-dl/issues/10268

2455

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

2456

subfile.write(sub_info['data'])

2457

sub_info['filepath'] = sub_filename

2458

files_to_move[sub_filename] = sub_filename_final

2459

except (OSError, IOError):

2460

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

self.dl(sub_filename, sub_info.copy(), subtitle=True)

2465

sub_info['filepath'] = sub_filename

2466

files_to_move[sub_filename] = sub_filename_final

2467

except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:

2468

self.report_warning('Unable to download subtitle for "%s": %s' %

2469

(sub_lang, error_to_compat_str(err)))

2470

continue

2471

2472

if self.params.get('writeinfojson', False):

2473

infofn = self.prepare_filename(info_dict, 'infojson')

2474

if not self._ensure_dir_exists(encodeFilename(infofn)):

2475

return

2476

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):

2477

self.to_screen('[info] Video metadata is already present')

2478

else:

2479

self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)

2480

try:

2481

write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)

2482

except (OSError, IOError):

2483

self.report_error('Cannot write video metadata to JSON file ' + infofn)

2484

return

2485

info_dict['__infojson_filename'] = infofn

2486

2487

for thumb_ext in self._write_thumbnails(info_dict, temp_filename):

2488

thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))

2489

thumb_filename = replace_extension(

2490

self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))

2491

files_to_move[thumb_filename_temp] = thumb_filename

2492

2493

# Write internet shortcut files

2494

url_link = webloc_link = desktop_link = False

2495

if self.params.get('writelink', False):

2496

if sys.platform == "darwin": # macOS.

2497

webloc_link = True

2498

elif sys.platform.startswith("linux"):

2499

desktop_link = True

2500

else: # if sys.platform in ['win32', 'cygwin']:

2501

url_link = True

2502

if self.params.get('writeurllink', False):

2503

url_link = True

2504

if self.params.get('writewebloclink', False):

2505

webloc_link = True

2506

if self.params.get('writedesktoplink', False):

2507

desktop_link = True

2508

2509

if url_link or webloc_link or desktop_link:

2510

if 'webpage_url' not in info_dict:

2511

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2512

return

2513

ascii_url = iri_to_uri(info_dict['webpage_url'])

2514

2515

def _write_link_file(extension, template, newline, embed_filename):

2516

linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))

2517

if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):

2518

self.to_screen('[info] Internet shortcut is already present')

2519

else:

2520

try:

2521

self.to_screen('[info] Writing internet shortcut to: ' + linkfn)

2522

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:

2523

template_vars = {'url': ascii_url}

2524

if embed_filename:

2525

template_vars['filename'] = linkfn[:-(len(extension) + 1)]

2526

linkfile.write(template % template_vars)

2527

except (OSError, IOError):

2528

self.report_error('Cannot write internet shortcut ' + linkfn)

return False

return True

if url_link:

if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):

2534

return

2535

if webloc_link:

2536

if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):

2537

return

2538

if desktop_link:

2539

if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):

return

try:

info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)

2544

except PostProcessingError as err:

2545

self.report_error('Preprocessing: %s' % str(err))

2546

return

2547

2548

must_record_download_archive = False

2549

if self.params.get('skip_download', False):

2550

info_dict['filepath'] = temp_filename

2551

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2552

info_dict['__files_to_move'] = files_to_move

2553

info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)

else:

# Download

try:

def existing_file(*filepaths):

2559

ext = info_dict.get('ext')

2560

final_ext = self.params.get('final_ext', ext)

2561

existing_files = []

2562

for file in orderedSet(filepaths):

2563

if final_ext != ext:

2564

converted = replace_extension(file, final_ext, ext)

2565

if os.path.exists(encodeFilename(converted)):

2566

existing_files.append(converted)

2567

if os.path.exists(encodeFilename(file)):

2568

existing_files.append(file)

2569

2570

if not existing_files or self.params.get('overwrites', False):

2571

for file in orderedSet(existing_files):

2572

self.report_file_delete(file)

2573

os.remove(encodeFilename(file))

2574

return None

2575

2576

self.report_file_already_downloaded(existing_files[0])

2577

info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]

2578

return existing_files[0]

2579

2580

success = True

2581

if info_dict.get('requested_formats') is not None:

2582

2583

def compatible_formats(formats):

2584

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2585

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2586

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2587

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2592

COMPATIBLE_EXTS = (

2593

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2594

set(('webm',)),

2595

)

2596

for ext_sets in COMPATIBLE_EXTS:

2597

if ext_sets.issuperset(exts):

2598

return True

2599

# TODO: Check acodec/vcodec

2600

return False

2601

2602

requested_formats = info_dict['requested_formats']

2603

old_ext = info_dict['ext']

2604

if self.params.get('merge_output_format') is None:

2605

if not compatible_formats(requested_formats):

2606

info_dict['ext'] = 'mkv'

2607

self.report_warning(

2608

'Requested formats are incompatible for merge and will be merged into mkv.')

2609

if (info_dict['ext'] == 'webm'

2610

and self.params.get('writethumbnail', False)

2611

and info_dict.get('thumbnails')):

2612

info_dict['ext'] = 'mkv'

2613

self.report_warning(

2614

'webm doesn\'t support embedding a thumbnail, mkv will be used.')

2615

2616

def correct_ext(filename):

2617

filename_real_ext = os.path.splitext(filename)[1][1:]

2618

filename_wo_ext = (

2619

os.path.splitext(filename)[0]

2620

if filename_real_ext == old_ext

2621

else filename)

2622

return '%s.%s' % (filename_wo_ext, info_dict['ext'])

2623

2624

# Ensure filename always has a correct extension for successful merge

2625

full_filename = correct_ext(full_filename)

2626

temp_filename = correct_ext(temp_filename)

2627

dl_filename = existing_file(full_filename, temp_filename)

2628

info_dict['__real_download'] = False

2629

2630

_protocols = set(determine_protocol(f) for f in requested_formats)

2631

if len(_protocols) == 1:

2632

info_dict['protocol'] = _protocols.pop()

2633

directly_mergable = (

2634

'no-direct-merge' not in self.params.get('compat_opts', [])

2635

and info_dict.get('protocol') is not None # All requested formats have same protocol

2636

and not self.params.get('allow_unplayable_formats')

2637

and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')

2638

if directly_mergable:

2639

info_dict['url'] = requested_formats[0]['url']

2640

# Treat it as a single download

2641

dl_filename = existing_file(full_filename, temp_filename)

2642

if dl_filename is None:

2643

success, real_download = self.dl(temp_filename, info_dict)

2644

info_dict['__real_download'] = real_download

2645

else:

2646

downloaded = []

2647

merger = FFmpegMergerPP(self)

2648

if self.params.get('allow_unplayable_formats'):

2649

self.report_warning(

2650

'You have requested merging of multiple formats '

2651

'while also allowing unplayable formats to be downloaded. '

2652

'The formats won\'t be merged to prevent data corruption.')

2653

elif not merger.available:

2654

self.report_warning(

2655

'You have requested merging of multiple formats but ffmpeg is not installed. '

2656

'The formats won\'t be merged.')

2657

2658

if dl_filename is None:

2659

for f in requested_formats:

2660

new_info = dict(info_dict)

2661

del new_info['requested_formats']

2662

new_info.update(f)

2663

fname = prepend_extension(

2664

self.prepare_filename(new_info, 'temp'),

2665

'f%s' % f['format_id'], new_info['ext'])

2666

if not self._ensure_dir_exists(fname):

2667

return

2668

downloaded.append(fname)

2669

partial_success, real_download = self.dl(fname, new_info)

2670

info_dict['__real_download'] = info_dict['__real_download'] or real_download

2671

success = success and partial_success

2672

if merger.available and not self.params.get('allow_unplayable_formats'):

2673

info_dict['__postprocessors'].append(merger)

2674

info_dict['__files_to_merge'] = downloaded

2675

# Even if there were no downloads, it is being merged only now

2676

info_dict['__real_download'] = True

2677

else:

2678

for file in downloaded:

2679

files_to_move[file] = None

2680

else:

2681

# Just a single file

2682

dl_filename = existing_file(full_filename, temp_filename)

2683

if dl_filename is None:

2684

success, real_download = self.dl(temp_filename, info_dict)

2685

info_dict['__real_download'] = real_download

2686

2687

dl_filename = dl_filename or temp_filename

2688

info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))

2689

2690

except network_exceptions as err:

2691

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2692

return

2693

except (OSError, IOError) as err:

2694

raise UnavailableVideoError(err)

2695

except (ContentTooShortError, ) as err:

2696

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2697

return

2698

2699

if success and full_filename != '-':

def fixup():

do_fixup = True

fixup_policy = self.params.get('fixup')

2704

vid = info_dict['id']

2705

2706

if fixup_policy in ('ignore', 'never'):

2707

return

2708

elif fixup_policy == 'warn':

2709

do_fixup = False

2710

elif fixup_policy != 'force':

2711

assert fixup_policy in ('detect_or_warn', None)

2712

if not info_dict.get('__real_download'):

2713

do_fixup = False

2714

2715

def ffmpeg_fixup(cndn, msg, cls):

if not cndn:

return

if not do_fixup:

self.report_warning(f'{vid}: {msg}')

return

pp = cls(self)

if pp.available:

info_dict['__postprocessors'].append(pp)

2724

else:

2725

self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')

2726

2727

stretched_ratio = info_dict.get('stretched_ratio')

2728

ffmpeg_fixup(

2729

stretched_ratio not in (1, None),

2730

f'Non-uniform pixel ratio {stretched_ratio}',

2731

FFmpegFixupStretchedPP)

2732

2733

ffmpeg_fixup(

2734

(info_dict.get('requested_formats') is None

2735

and info_dict.get('container') == 'm4a_dash'

2736

and info_dict.get('ext') == 'm4a'),

2737

'writing DASH m4a. Only some players support this container',

2738

FFmpegFixupM4aPP)

2739

2740

downloader = (get_suitable_downloader(info_dict, self.params).__name__

2741

if 'protocol' in info_dict else None)

2742

ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)

2743

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)

2744

ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)

fixup()

try:

info_dict = self.post_process(dl_filename, info_dict, files_to_move)

2749

except PostProcessingError as err:

2750

self.report_error('Postprocessing: %s' % str(err))

2751

return

2752

try:

2753

for ph in self._post_hooks:

2754

ph(info_dict['filepath'])

2755

except Exception as err:

2756

self.report_error('post hooks: %s' % str(err))

2757

return

2758

must_record_download_archive = True

2759

2760

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2761

self.record_download_archive(info_dict)

2762

max_downloads = self.params.get('max_downloads')

2763

if max_downloads is not None and self._num_downloads >= int(max_downloads):

2764

raise MaxDownloadsReached()

2765

2766

def download(self, url_list):

2767

"""Download a given list of URLs."""

2768

outtmpl = self.outtmpl_dict['default']

2769

if (len(url_list) > 1

2770

and outtmpl != '-'

2771

and '%' not in outtmpl

2772

and self.params.get('max_downloads') != 1):

2773

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2778

res = self.extract_info(

2779

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2780

except UnavailableVideoError:

2781

self.report_error('unable to download video')

2782

except MaxDownloadsReached:

2783

self.to_screen('[info] Maximum number of downloaded files reached')

2784

raise

2785

except ExistingVideoReached:

2786

self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')

2787

raise

2788

except RejectedVideoReached:

2789

self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')

2790

raise

2791

else:

2792

if self.params.get('dump_single_json', False):

2793

self.post_extract(res)

2794

self.to_stdout(json.dumps(res, default=repr))

2795

2796

return self._download_retcode

2797

2798

def download_with_info_file(self, info_filename):

2799

with contextlib.closing(fileinput.FileInput(

2800

[info_filename], mode='r',

2801

openhook=fileinput.hook_encoded('utf-8'))) as f:

2802

# FileInput doesn't have a read method, we can't call json.load

2803

info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))

2804

try:

2805

self.process_ie_result(info, download=True)

2806

except (DownloadError, EntryNotInPlaylist):

2807

webpage_url = info.get('webpage_url')

2808

if webpage_url is not None:

2809

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2810

return self.download([webpage_url])

2811

else:

2812

raise

2813

return self._download_retcode

2814

2815

@staticmethod

2816

def filter_requested_info(info_dict, actually_filter=True):

2817

remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict

2818

keep_keys = ['_type'], # Always keep this to facilitate load-info-json

2819

if actually_filter:

2820

remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')

2821

empty_values = (None, {}, [], set(), tuple())

2822

reject = lambda k, v: k not in keep_keys and (

2823

k.startswith('_') or k in remove_keys or v in empty_values)

2824

else:

2825

info_dict['epoch'] = int(time.time())

2826

reject = lambda k, v: k in remove_keys

2827

filter_fn = lambda obj: (

2828

list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))

2829

else obj if not isinstance(obj, dict)

2830

else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))

2831

return filter_fn(info_dict)

2832

2833

def run_pp(self, pp, infodict):

2834

files_to_delete = []

2835

if '__files_to_move' not in infodict:

2836

infodict['__files_to_move'] = {}

2837

files_to_delete, infodict = pp.run(infodict)

2838

if not files_to_delete:

2839

return infodict

2840

2841

if self.params.get('keepvideo', False):

2842

for f in files_to_delete:

2843

infodict['__files_to_move'].setdefault(f, '')

2844

else:

2845

for old_filename in set(files_to_delete):

2846

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2847

try:

2848

os.remove(encodeFilename(old_filename))

2849

except (IOError, OSError):

2850

self.report_warning('Unable to remove downloaded original file')

2851

if old_filename in infodict['__files_to_move']:

2852

del infodict['__files_to_move'][old_filename]

return infodict

@staticmethod

def post_extract(info_dict):

2857

def actual_post_extract(info_dict):

2858

if info_dict.get('_type') in ('playlist', 'multi_video'):

2859

for video_dict in info_dict.get('entries', {}):

2860

actual_post_extract(video_dict or {})

2861

return

2862

2863

post_extractor = info_dict.get('__post_extractor') or (lambda: {})

2864

extra = post_extractor().items()

2865

info_dict.update(extra)

2866

info_dict.pop('__post_extractor', None)

2867

2868

original_infodict = info_dict.get('__original_infodict') or {}

2869

original_infodict.update(extra)

2870

original_infodict.pop('__post_extractor', None)

2871

2872

actual_post_extract(info_dict or {})

2873

2874

def pre_process(self, ie_info, key='pre_process', files_to_move=None):

2875

info = dict(ie_info)

2876

info['__files_to_move'] = files_to_move or {}

2877

for pp in self._pps[key]:

2878

info = self.run_pp(pp, info)

2879

return info, info.pop('__files_to_move', None)

2880

2881

def post_process(self, filename, ie_info, files_to_move=None):

2882

"""Run all the postprocessors on the given file."""

2883

info = dict(ie_info)

2884

info['filepath'] = filename

2885

info['__files_to_move'] = files_to_move or {}

2886

2887

for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:

2888

info = self.run_pp(pp, info)

2889

info = self.run_pp(MoveFilesAfterDownloadPP(self), info)

2890

del info['__files_to_move']

2891

for pp in self._pps['after_move']:

2892

info = self.run_pp(pp, info)

2893

return info

2894

2895

def _make_archive_id(self, info_dict):

2896

video_id = info_dict.get('id')

2897

if not video_id:

2898

return

2899

# Future-proof against any change in case

2900

# and backwards compatibility with prior versions

2901

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

2902

if extractor is None:

2903

url = str_or_none(info_dict.get('url'))

2904

if not url:

2905

return

2906

# Try to find matching extractor for the URL and take its ie_key

2907

for ie in self._ies:

2908

if ie.suitable(url):

2909

extractor = ie.ie_key()

break

else:

return

return '%s %s' % (extractor.lower(), video_id)

2914

2915

def in_download_archive(self, info_dict):

2916

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2921

if not vid_id:

2922

return False # Incomplete video information

2923

2924

return vid_id in self.archive

2925

2926

def record_download_archive(self, info_dict):

2927

fn = self.params.get('download_archive')

2928

if fn is None:

2929

return

2930

vid_id = self._make_archive_id(info_dict)

2931

assert vid_id

2932

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2933

archive_file.write(vid_id + '\n')

2934

self.archive.add(vid_id)

2935

2936

@staticmethod

2937

def format_resolution(format, default='unknown'):

2938

if format.get('vcodec') == 'none':

2939

if format.get('acodec') == 'none':

2940

return 'images'

2941

return 'audio only'

2942

if format.get('resolution') is not None:

2943

return format['resolution']

2944

if format.get('width') and format.get('height'):

2945

res = '%dx%d' % (format['width'], format['height'])

2946

elif format.get('height'):

2947

res = '%sp' % format['height']

2948

elif format.get('width'):

2949

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2955

res = ''

2956

if fdict.get('ext') in ['f4f', 'f4m']:

2957

res += '(unsupported) '

2958

if fdict.get('language'):

2959

if res:

2960

res += ' '

2961

res += '[%s] ' % fdict['language']

2962

if fdict.get('format_note') is not None:

2963

res += fdict['format_note'] + ' '

2964

if fdict.get('tbr') is not None:

2965

res += '%4dk ' % fdict['tbr']

2966

if fdict.get('container') is not None:

2967

if res:

2968

res += ', '

2969

res += '%s container' % fdict['container']

2970

if (fdict.get('vcodec') is not None

2971

and fdict.get('vcodec') != 'none'):

2972

if res:

2973

res += ', '

2974

res += fdict['vcodec']

2975

if fdict.get('vbr') is not None:

2976

res += '@'

2977

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2978

res += 'video@'

2979

if fdict.get('vbr') is not None:

2980

res += '%4dk' % fdict['vbr']

2981

if fdict.get('fps') is not None:

2982

if res:

2983

res += ', '

2984

res += '%sfps' % fdict['fps']

2985

if fdict.get('acodec') is not None:

2986

if res:

2987

res += ', '

2988

if fdict['acodec'] == 'none':

2989

res += 'video only'

2990

else:

2991

res += '%-5s' % fdict['acodec']

2992

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2997

res += '@%3dk' % fdict['abr']

2998

if fdict.get('asr') is not None:

2999

res += ' (%5dHz)' % fdict['asr']

3000

if fdict.get('filesize') is not None:

3001

if res:

3002

res += ', '

3003

res += format_bytes(fdict['filesize'])

3004

elif fdict.get('filesize_approx') is not None:

3005

if res:

3006

res += ', '

3007

res += '~' + format_bytes(fdict['filesize_approx'])

3008

return res

3009

3010

def _format_note_table(self, f):

3011

def join_fields(*vargs):

3012

return ', '.join((val for val in vargs if val != ''))

3013

3014

return join_fields(

3015

'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',

3016

format_field(f, 'language', '[%s]'),

3017

format_field(f, 'format_note'),

3018

format_field(f, 'container', ignore=(None, f.get('ext'))),

3019

format_field(f, 'asr', '%5dHz'))

3020

3021

def list_formats(self, info_dict):

3022

formats = info_dict.get('formats', [info_dict])

3023

new_format = (

3024

'list-formats' not in self.params.get('compat_opts', [])

3025

and self.params.get('list_formats_as_table', True) is not False)

if new_format:

table = [

[

format_field(f, 'format_id'),

3030

format_field(f, 'ext'),

3031

self.format_resolution(f),

3032

format_field(f, 'fps', '%d'),

3033

'|',

3034

format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),

3035

format_field(f, 'tbr', '%4dk'),

3036

shorten_protocol_name(f.get('protocol', '').replace("native", "n")),

3037

'|',

3038

format_field(f, 'vcodec', default='unknown').replace('none', ''),

3039

format_field(f, 'vbr', '%4dk'),

3040

format_field(f, 'acodec', default='unknown').replace('none', ''),

3041

format_field(f, 'abr', '%3dk'),

3042

format_field(f, 'asr', '%5dHz'),

3043

self._format_note_table(f)]

3044

for f in formats

3045

if f.get('preference') is None or f['preference'] >= -1000]

3046

header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',

3047

'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']

else:

table = [

[

format_field(f, 'format_id'),

3052

format_field(f, 'ext'),

3053

self.format_resolution(f),

3054

self._format_note(f)]

3055

for f in formats

3056

if f.get('preference') is None or f['preference'] >= -1000]

3057

header_line = ['format code', 'extension', 'resolution', 'note']

3058

3059

self.to_screen(

3060

'[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(

header_line,

table,

delim=new_format,

extraGap=(0 if new_format else 1),

3065

hideEmpty=new_format)))

3066

3067

def list_thumbnails(self, info_dict):

3068

thumbnails = list(info_dict.get('thumbnails'))

3069

if not thumbnails:

3070

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

3075

self.to_screen(render_table(

3076

['ID', 'width', 'height', 'URL'],

3077

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

3078

3079

def list_subtitles(self, video_id, subtitles, name='subtitles'):

3080

if not subtitles:

3081

self.to_screen('%s has no %s' % (video_id, name))

3082

return

3083

self.to_screen(

3084

'Available %s for %s:' % (name, video_id))

3085

3086

def _row(lang, formats):

3087

exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))

3088

if len(set(names)) == 1:

3089

names = [] if names[0] == 'unknown' else names[:1]

3090

return [lang, ', '.join(names), ', '.join(exts)]

3091

3092

self.to_screen(render_table(

3093

['Language', 'Name', 'Formats'],

3094

[_row(lang, formats) for lang, formats in subtitles.items()],

3095

hideEmpty=True))

3096

3097

def urlopen(self, req):

3098

""" Start an HTTP download """

3099

if isinstance(req, compat_basestring):

3100

req = sanitized_Request(req)

3101

return self._opener.open(req, timeout=self._socket_timeout)

3102

3103

def print_debug_header(self):

3104

if not self.params.get('verbose'):

3105

return

3106

3107

if type('') is not compat_str:

3108

# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)

3109

self.report_warning(

3110

'Your Python is broken! Update to a newer and supported version')

3111

3112

stdout_encoding = getattr(

3113

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

3114

encoding_str = (

3115

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

3116

locale.getpreferredencoding(),

3117

sys.getfilesystemencoding(),

3118

stdout_encoding,

3119

self.get_encoding()))

3120

write_string(encoding_str, encoding=None)

3121

3122

source = (

3123

'(exe)' if hasattr(sys, 'frozen')

3124

else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)

3125

else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'

3126

else '')

3127

self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))

3128

if _LAZY_LOADER:

3129

self._write_string('[debug] Lazy loading extractors enabled\n')

3130

if _PLUGIN_CLASSES:

3131

self._write_string(

3132

'[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])

3133

if self.params.get('compat_opts'):

3134

self._write_string(

3135

'[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))

3136

try:

3137

sp = subprocess.Popen(

3138

['git', 'rev-parse', '--short', 'HEAD'],

3139

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

3140

cwd=os.path.dirname(os.path.abspath(__file__)))

3141

out, err = process_communicate_or_kill(sp)

3142

out = out.decode().strip()

3143

if re.match('[0-9a-f]+', out):

3144

self._write_string('[debug] Git HEAD: %s\n' % out)

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

3152

impl_name = platform.python_implementation()

3153

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

3154

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

3155

return impl_name

3156

3157

self._write_string('[debug] Python version %s (%s %s) - %s\n' % (

3158

platform.python_version(),

3159

python_implementation(),

3160

platform.architecture()[0],

3161

platform_name()))

3162

3163

exe_versions = FFmpegPostProcessor.get_versions(self)

3164

exe_versions['rtmpdump'] = rtmpdump_version()

3165

exe_versions['phantomjs'] = PhantomJSwrapper._version()

3166

exe_str = ', '.join(

3167

'%s %s' % (exe, v)

3168

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

3174

3175

proxy_map = {}

3176

for handler in self._opener.handlers:

3177

if hasattr(handler, 'proxies'):

3178

proxy_map.update(handler.proxies)

3179

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

3180

3181

if self.params.get('call_home', False):

3182

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

3183

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

3184

return

3185

latest_version = self.urlopen(

3186

'https://yt-dl.org/latest/version').read().decode('utf-8')

3187

if version_tuple(latest_version) > version_tuple(__version__):

3188

self.report_warning(

3189

'You are using an outdated version (newest version: %s)! '

3190

'See https://yt-dl.org/update if you need help updating.' %

3191

latest_version)

3192

3193

def _setup_opener(self):

3194

timeout_val = self.params.get('socket_timeout')

3195

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

3196

3197

opts_cookiefile = self.params.get('cookiefile')

3198

opts_proxy = self.params.get('proxy')

3199

3200

if opts_cookiefile is None:

3201

self.cookiejar = compat_cookiejar.CookieJar()

3202

else:

3203

opts_cookiefile = expand_path(opts_cookiefile)

3204

self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)

3205

if os.access(opts_cookiefile, os.R_OK):

3206

self.cookiejar.load(ignore_discard=True, ignore_expires=True)

3207

3208

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

3209

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

3214

else:

3215

proxies = compat_urllib_request.getproxies()

3216

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

3217

if 'http' in proxies and 'https' not in proxies:

3218

proxies['https'] = proxies['http']

3219

proxy_handler = PerRequestProxyHandler(proxies)

3220

3221

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

3222

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

3223

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

3224

redirect_handler = YoutubeDLRedirectHandler()

3225

data_handler = compat_urllib_request_DataHandler()

3226

3227

# When passing our own FileHandler instance, build_opener won't add the

3228

# default FileHandler and allows us to disable the file protocol, which

3229

# can be used for malicious purposes (see

3230

# https://github.com/ytdl-org/youtube-dl/issues/8227)

3231

file_handler = compat_urllib_request.FileHandler()

3232

3233

def file_open(*args, **kwargs):

3234

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')

3235

file_handler.file_open = file_open

3236

3237

opener = compat_urllib_request.build_opener(

3238

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

3239

3240

# Delete the default user-agent header, which would otherwise apply in

3241

# cases where our custom HTTP handler doesn't come into play

3242

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

3243

opener.addheaders = []

3244

self._opener = opener

3245

3246

def encode(self, s):

3247

if isinstance(s, bytes):

3248

return s # Already encoded

3249

3250

try:

3251

return s.encode(self.get_encoding())

3252

except UnicodeEncodeError as err:

3253

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

3254

raise

3255

3256

def get_encoding(self):

3257

encoding = self.params.get('encoding')

3258

if encoding is None:

3259

encoding = preferredencoding()

3260

return encoding

3261

3262

def _write_thumbnails(self, info_dict, filename): # return the extensions

3263

write_all = self.params.get('write_all_thumbnails', False)

3264

thumbnails = []

3265

if write_all or self.params.get('writethumbnail', False):

3266

thumbnails = info_dict.get('thumbnails') or []

3267

multiple = write_all and len(thumbnails) > 1

3268

3269

ret = []

3270

for t in thumbnails[::1 if write_all else -1]:

3271

thumb_ext = determine_ext(t['url'], 'jpg')

3272

suffix = '%s.' % t['id'] if multiple else ''

3273

thumb_display_id = '%s ' % t['id'] if multiple else ''

3274

thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))

3275

3276

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):

3277

ret.append(suffix + thumb_ext)

3278

t['filepath'] = thumb_filename

3279

self.to_screen('[%s] %s: Thumbnail %sis already present' %

3280

(info_dict['extractor'], info_dict['id'], thumb_display_id))

3281

else:

3282

self.to_screen('[%s] %s: Downloading thumbnail %s ...' %

3283

(info_dict['extractor'], info_dict['id'], thumb_display_id))

3284

try:

3285

uf = self.urlopen(t['url'])

3286

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

3287

shutil.copyfileobj(uf, thumbf)

3288

ret.append(suffix + thumb_ext)

3289

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

3290

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

3291

t['filepath'] = thumb_filename

3292

except network_exceptions as err:

3293

self.report_warning('Unable to download thumbnail "%s": %s' %

3294

(t['url'], error_to_compat_str(err)))

3295

if ret and not write_all:

3296

break

3297

return ret