jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import socket
	23	import sys
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_http_client,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DOT_DESKTOP_LINK_TEMPLATE,
	55	DOT_URL_LINK_TEMPLATE,
	56	DOT_WEBLOC_LINK_TEMPLATE,
	57	DownloadError,
	58	encode_compat_str,
	59	encodeFilename,
	60	error_to_compat_str,
	61	ExistingVideoReached,
	62	expand_path,
	63	ExtractorError,
	64	format_bytes,
	65	format_field,
	66	formatSeconds,
	67	GeoRestrictedError,
	68	int_or_none,
	69	iri_to_uri,
	70	ISO3166Utils,
	71	locked_file,
	72	make_HTTPS_handler,
	73	MaxDownloadsReached,
	74	orderedSet,
	75	PagedList,
	76	parse_filesize,
	77	PerRequestProxyHandler,
	78	platform_name,
	79	PostProcessingError,
	80	preferredencoding,
	81	prepend_extension,
	82	register_socks_protocols,
	83	render_table,
	84	replace_extension,
	85	RejectedVideoReached,
	86	SameFileError,
	87	sanitize_filename,
	88	sanitize_path,
	89	sanitize_url,
	90	sanitized_Request,
	91	std_headers,
	92	str_or_none,
	93	subtitles_filename,
	94	to_high_limit_path,
	95	UnavailableVideoError,
	96	url_basename,
	97	version_tuple,
	98	write_json_file,
	99	write_string,
	100	YoutubeDLCookieJar,
	101	YoutubeDLCookieProcessor,
	102	YoutubeDLHandler,
	103	YoutubeDLRedirectHandler,
	104	process_communicate_or_kill,
	105	)
	106	from .cache import Cache
	107	from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
	108	from .extractor.openload import PhantomJSwrapper
	109	from .downloader import get_suitable_downloader
	110	from .downloader.rtmp import rtmpdump_version
	111	from .postprocessor import (
	112	FFmpegFixupM3u8PP,
	113	FFmpegFixupM4aPP,
	114	FFmpegFixupStretchedPP,
	115	FFmpegMergerPP,
	116	FFmpegPostProcessor,
	117	FFmpegSubtitlesConvertorPP,
	118	get_postprocessor,
	119	)
	120	from .version import __version__
	121
	122	if compat_os_name == 'nt':
	123	import ctypes
	124
	125
	126	class YoutubeDL(object):
	127	"""YoutubeDL class.
	128
	129	YoutubeDL objects are the ones responsible of downloading the
	130	actual video file and writing it to disk if the user has requested
	131	it, among some other tasks. In most cases there should be one per
	132	program. As, given a video URL, the downloader doesn't know how to
	133	extract all the needed information, task that InfoExtractors do, it
	134	has to pass the URL to one of them.
	135
	136	For this, YoutubeDL objects have a method that allows
	137	InfoExtractors to be registered in a given order. When it is passed
	138	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	139	finds that reports being able to handle it. The InfoExtractor extracts
	140	all the information about the video or videos the URL refers to, and
	141	YoutubeDL process the extracted information, possibly using a File
	142	Downloader to download the video.
	143
	144	YoutubeDL objects accept a lot of parameters. In order not to saturate
	145	the object constructor with arguments, it receives a dictionary of
	146	options instead. These options are available through the params
	147	attribute for the InfoExtractors to use. The YoutubeDL also
	148	registers itself as the downloader in charge for the InfoExtractors
	149	that are added to it, so this is a "mutual registration".
	150
	151	Available options:
	152
	153	username: Username for authentication purposes.
	154	password: Password for authentication purposes.
	155	videopassword: Password for accessing a video.
	156	ap_mso: Adobe Pass multiple-system operator identifier.
	157	ap_username: Multiple-system operator account username.
	158	ap_password: Multiple-system operator account password.
	159	usenetrc: Use netrc for authentication instead.
	160	verbose: Print additional info to stdout.
	161	quiet: Do not print messages to stdout.
	162	no_warnings: Do not print out anything for warnings.
	163	forceurl: Force printing final URL.
	164	forcetitle: Force printing title.
	165	forceid: Force printing ID.
	166	forcethumbnail: Force printing thumbnail URL.
	167	forcedescription: Force printing description.
	168	forcefilename: Force printing final filename.
	169	forceduration: Force printing duration.
	170	forcejson: Force printing info_dict as JSON.
	171	dump_single_json: Force printing the info_dict of the whole playlist
	172	(or video) as a single JSON line.
	173	force_write_download_archive: Force writing download archive regardless of
	174	'skip_download' or 'simulate'.
	175	simulate: Do not download the video files.
	176	format: Video format code. see "FORMAT SELECTION" for more details.
	177	format_sort: How to sort the video formats. see "Sorting Formats" for more details.
	178	format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
	179	allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
	180	allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
	181	outtmpl: Template for output names.
	182	restrictfilenames: Do not allow "&" and spaces in file names.
	183	trim_file_name: Limit length of filename (extension excluded).
	184	ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
	185	force_generic_extractor: Force downloader to use the generic extractor
	186	overwrites: Overwrite all video and metadata files if True,
	187	overwrite only non-video files if None
	188	and don't overwrite any file if False
	189	playliststart: Playlist item to start at.
	190	playlistend: Playlist item to end at.
	191	playlist_items: Specific indices of playlist to download.
	192	playlistreverse: Download playlist items in reverse order.
	193	playlistrandom: Download playlist items in random order.
	194	matchtitle: Download only matching titles.
	195	rejecttitle: Reject downloads for matching titles.
	196	logger: Log messages to a logging.Logger instance.
	197	logtostderr: Log messages to stderr instead of stdout.
	198	writedescription: Write the video description to a .description file
	199	writeinfojson: Write the video description to a .info.json file
	200	writeannotations: Write the video annotations to a .annotations.xml file
	201	writethumbnail: Write the thumbnail image to a file
	202	write_all_thumbnails: Write all thumbnail formats to files
	203	writelink: Write an internet shortcut file, depending on the
	204	current platform (.url/.webloc/.desktop)
	205	writeurllink: Write a Windows internet shortcut file (.url)
	206	writewebloclink: Write a macOS internet shortcut file (.webloc)
	207	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	208	writesubtitles: Write the video subtitles to a file
	209	writeautomaticsub: Write the automatically generated subtitles to a file
	210	allsubtitles: Downloads all the subtitles of the video
	211	(requires writesubtitles or writeautomaticsub)
	212	listsubtitles: Lists all available subtitles for the video
	213	subtitlesformat: The format code for subtitles
	214	subtitleslangs: List of languages of the subtitles to download
	215	keepvideo: Keep the video file after post-processing
	216	daterange: A DateRange object, download only if the upload_date is in the range.
	217	skip_download: Skip the actual download of the video file
	218	cachedir: Location of the cache files in the filesystem.
	219	False to disable filesystem cache.
	220	noplaylist: Download single video instead of a playlist if in doubt.
	221	age_limit: An integer representing the user's age in years.
	222	Unsuitable videos for the given age are skipped.
	223	min_views: An integer representing the minimum view count the video
	224	must have in order to not be skipped.
	225	Videos without view count information are always
	226	downloaded. None for no limit.
	227	max_views: An integer representing the maximum view count.
	228	Videos that are more popular than that are not
	229	downloaded.
	230	Videos without view count information are always
	231	downloaded. None for no limit.
	232	download_archive: File name of a file where all downloads are recorded.
	233	Videos already present in the file are not downloaded
	234	again.
	235	break_on_existing: Stop the download process after attempting to download a
	236	file that is in the archive.
	237	break_on_reject: Stop the download process when encountering a video that
	238	has been filtered out.
	239	cookiefile: File name where cookies should be read from and dumped to
	240	nocheckcertificate:Do not verify SSL certificates
	241	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	242	At the moment, this is only supported by YouTube.
	243	proxy: URL of the proxy server to use
	244	geo_verification_proxy: URL of the proxy to use for IP address verification
	245	on geo-restricted sites.
	246	socket_timeout: Time to wait for unresponsive hosts, in seconds
	247	bidi_workaround: Work around buggy terminals without bidirectional text
	248	support, using fridibi
	249	debug_printtraffic:Print out sent and received HTTP traffic
	250	include_ads: Download ads as well
	251	default_search: Prepend this string if an input url is not valid.
	252	'auto' for elaborate guessing
	253	encoding: Use this encoding instead of the system-specified.
	254	extract_flat: Do not resolve URLs, return the immediate result.
	255	Pass in 'in_playlist' to only show this behavior for
	256	playlist items.
	257	postprocessors: A list of dictionaries, each with an entry
	258	* key: The name of the postprocessor. See
	259	youtube_dlc/postprocessor/__init__.py for a list.
	260	as well as any further keyword arguments for the
	261	postprocessor.
	262	post_hooks: A list of functions that get called as the final step
	263	for each video file, after all postprocessors have been
	264	called. The filename will be passed as the only argument.
	265	progress_hooks: A list of functions that get called on download
	266	progress, with a dictionary with the entries
	267	* status: One of "downloading", "error", or "finished".
	268	Check this first and ignore unknown values.
	269
	270	If status is one of "downloading", or "finished", the
	271	following properties may also be present:
	272	* filename: The final filename (always present)
	273	* tmpfilename: The filename we're currently writing to
	274	* downloaded_bytes: Bytes on disk
	275	* total_bytes: Size of the whole file, None if unknown
	276	* total_bytes_estimate: Guess of the eventual file size,
	277	None if unavailable.
	278	* elapsed: The number of seconds since download started.
	279	* eta: The estimated time in seconds, None if unknown
	280	* speed: The download speed in bytes/second, None if
	281	unknown
	282	* fragment_index: The counter of the currently
	283	downloaded video fragment.
	284	* fragment_count: The number of fragments (= individual
	285	files that will be merged)
	286
	287	Progress hooks are guaranteed to be called at least once
	288	(with status "finished") if the download is successful.
	289	merge_output_format: Extension to use when merging formats.
	290	fixup: Automatically correct known faults of the file.
	291	One of:
	292	- "never": do nothing
	293	- "warn": only emit a warning
	294	- "detect_or_warn": check whether we can do anything
	295	about it, warn otherwise (default)
	296	source_address: Client-side IP address to bind to.
	297	call_home: Boolean, true iff we are allowed to contact the
	298	youtube-dlc servers for debugging.
	299	sleep_interval: Number of seconds to sleep before each download when
	300	used alone or a lower bound of a range for randomized
	301	sleep before each download (minimum possible number
	302	of seconds to sleep) when used along with
	303	max_sleep_interval.
	304	max_sleep_interval:Upper bound of a range for randomized sleep before each
	305	download (maximum possible number of seconds to sleep).
	306	Must only be used along with sleep_interval.
	307	Actual sleep time will be a random float from range
	308	[sleep_interval; max_sleep_interval].
	309	listformats: Print an overview of available video formats and exit.
	310	list_thumbnails: Print a table of all thumbnails and exit.
	311	match_filter: A function that gets called with the info_dict of
	312	every video.
	313	If it returns a message, the video is ignored.
	314	If it returns None, the video is downloaded.
	315	match_filter_func in utils.py is one example for this.
	316	no_color: Do not emit color codes in output.
	317	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	318	HTTP header
	319	geo_bypass_country:
	320	Two-letter ISO 3166-2 country code that will be used for
	321	explicit geographic restriction bypassing via faking
	322	X-Forwarded-For HTTP header
	323	geo_bypass_ip_block:
	324	IP range in CIDR notation that will be used similarly to
	325	geo_bypass_country
	326
	327	The following options determine which downloader is picked:
	328	external_downloader: Executable of the external downloader to call.
	329	None or unset for standard (built-in) downloader.
	330	hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
	331	if True, otherwise use ffmpeg/avconv if False, otherwise
	332	use downloader suggested by extractor if None.
	333
	334	The following parameters are not used by YoutubeDL itself, they are used by
	335	the downloader (see youtube_dlc/downloader/common.py):
	336	nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
	337	noresizebuffer, retries, continuedl, noprogress, consoletitle,
	338	xattr_set_filesize, external_downloader_args, hls_use_mpegts,
	339	http_chunk_size.
	340
	341	The following options are used by the post processors:
	342	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	343	otherwise prefer ffmpeg.
	344	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	345	to the binary or its containing directory.
	346	postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
	347	of additional command-line arguments for the postprocessor.
	348	Use 'default' as the name for arguments to passed to all PP.
	349
	350	The following options are used by the Youtube extractor:
	351	youtube_include_dash_manifest: If True (default), DASH manifests and related
	352	data will be downloaded and processed by extractor.
	353	You can reduce network I/O by disabling it if you don't
	354	care about DASH.
	355	"""
	356
	357	_NUMERIC_FIELDS = set((
	358	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	359	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	360	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	361	'average_rating', 'comment_count', 'age_limit',
	362	'start_time', 'end_time',
	363	'chapter_number', 'season_number', 'episode_number',
	364	'track_number', 'disc_number', 'release_year',
	365	'playlist_index',
	366	))
	367
	368	params = None
	369	_ies = []
	370	_pps = []
	371	_download_retcode = None
	372	_num_downloads = None
	373	_playlist_level = 0
	374	_playlist_urls = set()
	375	_screen_file = None
	376
	377	def __init__(self, params=None, auto_init=True):
	378	"""Create a FileDownloader object with the given options."""
	379	if params is None:
	380	params = {}
	381	self._ies = []
	382	self._ies_instances = {}
	383	self._pps = []
	384	self._post_hooks = []
	385	self._progress_hooks = []
	386	self._download_retcode = 0
	387	self._num_downloads = 0
	388	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	389	self._err_file = sys.stderr
	390	self.params = {
	391	# Default parameters
	392	'nocheckcertificate': False,
	393	}
	394	self.params.update(params)
	395	self.cache = Cache(self)
	396	self.archive = set()
	397
	398	"""Preload the archive, if any is specified"""
	399	def preload_download_archive(self):
	400	fn = self.params.get('download_archive')
	401	if fn is None:
	402	return False
	403	try:
	404	with locked_file(fn, 'r', encoding='utf-8') as archive_file:
	405	for line in archive_file:
	406	self.archive.add(line.strip())
	407	except IOError as ioe:
	408	if ioe.errno != errno.ENOENT:
	409	raise
	410	return False
	411	return True
	412
	413	def check_deprecated(param, option, suggestion):
	414	if self.params.get(param) is not None:
	415	self.report_warning(
	416	'%s is deprecated. Use %s instead.' % (option, suggestion))
	417	return True
	418	return False
	419
	420	if self.params.get('verbose'):
	421	self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
	422
	423	preload_download_archive(self)
	424
	425	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	426	if self.params.get('geo_verification_proxy') is None:
	427	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	428
	429	check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
	430	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	431	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	432
	433	if params.get('bidi_workaround', False):
	434	try:
	435	import pty
	436	master, slave = pty.openpty()
	437	width = compat_get_terminal_size().columns
	438	if width is None:
	439	width_args = []
	440	else:
	441	width_args = ['-w', str(width)]
	442	sp_kwargs = dict(
	443	stdin=subprocess.PIPE,
	444	stdout=slave,
	445	stderr=self._err_file)
	446	try:
	447	self._output_process = subprocess.Popen(
	448	['bidiv'] + width_args, **sp_kwargs
	449	)
	450	except OSError:
	451	self._output_process = subprocess.Popen(
	452	['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
	453	self._output_channel = os.fdopen(master, 'rb')
	454	except OSError as ose:
	455	if ose.errno == errno.ENOENT:
	456	self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
	457	else:
	458	raise
	459
	460	if (sys.platform != 'win32'
	461	and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
	462	and not params.get('restrictfilenames', False)):
	463	# Unicode filesystem API will throw errors (#1474, #13027)
	464	self.report_warning(
	465	'Assuming --restrict-filenames since file system encoding '
	466	'cannot encode all characters. '
	467	'Set the LC_ALL environment variable to fix this.')
	468	self.params['restrictfilenames'] = True
	469
	470	if isinstance(params.get('outtmpl'), bytes):
	471	self.report_warning(
	472	'Parameter outtmpl is bytes, but should be a unicode string. '
	473	'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
	474
	475	self._setup_opener()
	476
	477	if auto_init:
	478	self.print_debug_header()
	479	self.add_default_info_extractors()
	480
	481	for pp_def_raw in self.params.get('postprocessors', []):
	482	pp_class = get_postprocessor(pp_def_raw['key'])
	483	pp_def = dict(pp_def_raw)
	484	del pp_def['key']
	485	pp = pp_class(self, **compat_kwargs(pp_def))
	486	self.add_post_processor(pp)
	487
	488	for ph in self.params.get('post_hooks', []):
	489	self.add_post_hook(ph)
	490
	491	for ph in self.params.get('progress_hooks', []):
	492	self.add_progress_hook(ph)
	493
	494	register_socks_protocols()
	495
	496	def warn_if_short_id(self, argv):
	497	# short YouTube ID starting with dash?
	498	idxs = [
	499	i for i, a in enumerate(argv)
	500	if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

1

#!/usr/bin/env python

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import socket

import sys

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

31

from .compat import (

32

compat_basestring,

33

compat_cookiejar,

34

compat_get_terminal_size,

35

compat_http_client,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

55

DOT_URL_LINK_TEMPLATE,

56

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

error_to_compat_str,

ExistingVideoReached,

expand_path,

ExtractorError,

format_bytes,

format_field,

formatSeconds,

GeoRestrictedError,

int_or_none,

iri_to_uri,

ISO3166Utils,

locked_file,

make_HTTPS_handler,

MaxDownloadsReached,

orderedSet,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

register_socks_protocols,

83

render_table,

84

replace_extension,

85

RejectedVideoReached,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

subtitles_filename,

to_high_limit_path,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

102

YoutubeDLHandler,

103

YoutubeDLRedirectHandler,

104

process_communicate_or_kill,

105

)

106

from .cache import Cache

107

from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER

108

from .extractor.openload import PhantomJSwrapper

109

from .downloader import get_suitable_downloader

110

from .downloader.rtmp import rtmpdump_version

111

from .postprocessor import (

112

FFmpegFixupM3u8PP,

113

FFmpegFixupM4aPP,

114

FFmpegFixupStretchedPP,

115

FFmpegMergerPP,

116

FFmpegPostProcessor,

117

FFmpegSubtitlesConvertorPP,

118

get_postprocessor,

119

)

120

from .version import __version__

121

122

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

127

"""YoutubeDL class.

128

129

YoutubeDL objects are the ones responsible of downloading the

130

actual video file and writing it to disk if the user has requested

131

it, among some other tasks. In most cases there should be one per

132

program. As, given a video URL, the downloader doesn't know how to

133

extract all the needed information, task that InfoExtractors do, it

134

has to pass the URL to one of them.

135

136

For this, YoutubeDL objects have a method that allows

137

InfoExtractors to be registered in a given order. When it is passed

138

a URL, the YoutubeDL object handles it to the first InfoExtractor it

139

finds that reports being able to handle it. The InfoExtractor extracts

140

all the information about the video or videos the URL refers to, and

141

YoutubeDL process the extracted information, possibly using a File

142

Downloader to download the video.

143

144

YoutubeDL objects accept a lot of parameters. In order not to saturate

145

the object constructor with arguments, it receives a dictionary of

146

options instead. These options are available through the params

147

attribute for the InfoExtractors to use. The YoutubeDL also

148

registers itself as the downloader in charge for the InfoExtractors

149

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

154

password: Password for authentication purposes.

155

videopassword: Password for accessing a video.

156

ap_mso: Adobe Pass multiple-system operator identifier.

157

ap_username: Multiple-system operator account username.

158

ap_password: Multiple-system operator account password.

159

usenetrc: Use netrc for authentication instead.

160

verbose: Print additional info to stdout.

161

quiet: Do not print messages to stdout.

162

no_warnings: Do not print out anything for warnings.

163

forceurl: Force printing final URL.

164

forcetitle: Force printing title.

165

forceid: Force printing ID.

166

forcethumbnail: Force printing thumbnail URL.

167

forcedescription: Force printing description.

168

forcefilename: Force printing final filename.

169

forceduration: Force printing duration.

170

forcejson: Force printing info_dict as JSON.

171

dump_single_json: Force printing the info_dict of the whole playlist

172

(or video) as a single JSON line.

173

force_write_download_archive: Force writing download archive regardless of

174

'skip_download' or 'simulate'.

175

simulate: Do not download the video files.

176

format: Video format code. see "FORMAT SELECTION" for more details.

177

format_sort: How to sort the video formats. see "Sorting Formats" for more details.

178

format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.

179

allow_multiple_video_streams: Allow multiple video streams to be merged into a single file

180

allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file

181

outtmpl: Template for output names.

182

restrictfilenames: Do not allow "&" and spaces in file names.

183

trim_file_name: Limit length of filename (extension excluded).

184

ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)

185

force_generic_extractor: Force downloader to use the generic extractor

186

overwrites: Overwrite all video and metadata files if True,

187

overwrite only non-video files if None

188

and don't overwrite any file if False

189

playliststart: Playlist item to start at.

190

playlistend: Playlist item to end at.

191

playlist_items: Specific indices of playlist to download.

192

playlistreverse: Download playlist items in reverse order.

193

playlistrandom: Download playlist items in random order.

194

matchtitle: Download only matching titles.

195

rejecttitle: Reject downloads for matching titles.

196

logger: Log messages to a logging.Logger instance.

197

logtostderr: Log messages to stderr instead of stdout.

198

writedescription: Write the video description to a .description file

199

writeinfojson: Write the video description to a .info.json file

200

writeannotations: Write the video annotations to a .annotations.xml file

201

writethumbnail: Write the thumbnail image to a file

202

write_all_thumbnails: Write all thumbnail formats to files

203

writelink: Write an internet shortcut file, depending on the

204

current platform (.url/.webloc/.desktop)

205

writeurllink: Write a Windows internet shortcut file (.url)

206

writewebloclink: Write a macOS internet shortcut file (.webloc)

207

writedesktoplink: Write a Linux internet shortcut file (.desktop)

208

writesubtitles: Write the video subtitles to a file

209

writeautomaticsub: Write the automatically generated subtitles to a file

210

allsubtitles: Downloads all the subtitles of the video

211

(requires writesubtitles or writeautomaticsub)

212

listsubtitles: Lists all available subtitles for the video

213

subtitlesformat: The format code for subtitles

214

subtitleslangs: List of languages of the subtitles to download

215

keepvideo: Keep the video file after post-processing

216

daterange: A DateRange object, download only if the upload_date is in the range.

217

skip_download: Skip the actual download of the video file

218

cachedir: Location of the cache files in the filesystem.

219

False to disable filesystem cache.

220

noplaylist: Download single video instead of a playlist if in doubt.

221

age_limit: An integer representing the user's age in years.

222

Unsuitable videos for the given age are skipped.

223

min_views: An integer representing the minimum view count the video

224

must have in order to not be skipped.

225

Videos without view count information are always

226

downloaded. None for no limit.

227

max_views: An integer representing the maximum view count.

228

Videos that are more popular than that are not

229

downloaded.

230

Videos without view count information are always

231

downloaded. None for no limit.

232

download_archive: File name of a file where all downloads are recorded.

233

Videos already present in the file are not downloaded

234

again.

235

break_on_existing: Stop the download process after attempting to download a

236

file that is in the archive.

237

break_on_reject: Stop the download process when encountering a video that

238

has been filtered out.

239

cookiefile: File name where cookies should be read from and dumped to

240

nocheckcertificate:Do not verify SSL certificates

241

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

242

At the moment, this is only supported by YouTube.

243

proxy: URL of the proxy server to use

244

geo_verification_proxy: URL of the proxy to use for IP address verification

245

on geo-restricted sites.

246

socket_timeout: Time to wait for unresponsive hosts, in seconds

247

bidi_workaround: Work around buggy terminals without bidirectional text

248

support, using fridibi

249

debug_printtraffic:Print out sent and received HTTP traffic

250

include_ads: Download ads as well

251

default_search: Prepend this string if an input url is not valid.

252

'auto' for elaborate guessing

253

encoding: Use this encoding instead of the system-specified.

254

extract_flat: Do not resolve URLs, return the immediate result.

255

Pass in 'in_playlist' to only show this behavior for

256

playlist items.

257

postprocessors: A list of dictionaries, each with an entry

258

* key: The name of the postprocessor. See

259

youtube_dlc/postprocessor/__init__.py for a list.

260

as well as any further keyword arguments for the

261

postprocessor.

262

post_hooks: A list of functions that get called as the final step

263

for each video file, after all postprocessors have been

264

called. The filename will be passed as the only argument.

265

progress_hooks: A list of functions that get called on download

266

progress, with a dictionary with the entries

267

* status: One of "downloading", "error", or "finished".

268

Check this first and ignore unknown values.

269

270

If status is one of "downloading", or "finished", the

271

following properties may also be present:

272

* filename: The final filename (always present)

273

* tmpfilename: The filename we're currently writing to

274

* downloaded_bytes: Bytes on disk

275

* total_bytes: Size of the whole file, None if unknown

276

* total_bytes_estimate: Guess of the eventual file size,

277

None if unavailable.

278

* elapsed: The number of seconds since download started.

279

* eta: The estimated time in seconds, None if unknown

280

* speed: The download speed in bytes/second, None if

281

unknown

282

* fragment_index: The counter of the currently

283

downloaded video fragment.

284

* fragment_count: The number of fragments (= individual

285

files that will be merged)

286

287

Progress hooks are guaranteed to be called at least once

288

(with status "finished") if the download is successful.

289

merge_output_format: Extension to use when merging formats.

290

fixup: Automatically correct known faults of the file.

291

One of:

292

- "never": do nothing

293

- "warn": only emit a warning

294

- "detect_or_warn": check whether we can do anything

295

about it, warn otherwise (default)

296

source_address: Client-side IP address to bind to.

297

call_home: Boolean, true iff we are allowed to contact the

298

youtube-dlc servers for debugging.

299

sleep_interval: Number of seconds to sleep before each download when

300

used alone or a lower bound of a range for randomized

301

sleep before each download (minimum possible number

302

of seconds to sleep) when used along with

303

max_sleep_interval.

304

max_sleep_interval:Upper bound of a range for randomized sleep before each

305

download (maximum possible number of seconds to sleep).

306

Must only be used along with sleep_interval.

307

Actual sleep time will be a random float from range

308

[sleep_interval; max_sleep_interval].

309

listformats: Print an overview of available video formats and exit.

310

list_thumbnails: Print a table of all thumbnails and exit.

311

match_filter: A function that gets called with the info_dict of

312

every video.

313

If it returns a message, the video is ignored.

314

If it returns None, the video is downloaded.

315

match_filter_func in utils.py is one example for this.

316

no_color: Do not emit color codes in output.

317

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

318

HTTP header

319

geo_bypass_country:

320

Two-letter ISO 3166-2 country code that will be used for

321

explicit geographic restriction bypassing via faking

322

X-Forwarded-For HTTP header

323

geo_bypass_ip_block:

324

IP range in CIDR notation that will be used similarly to

325

geo_bypass_country

326

327

The following options determine which downloader is picked:

328

external_downloader: Executable of the external downloader to call.

329

None or unset for standard (built-in) downloader.

330

hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv

331

if True, otherwise use ffmpeg/avconv if False, otherwise

332

use downloader suggested by extractor if None.

333

334

The following parameters are not used by YoutubeDL itself, they are used by

335

the downloader (see youtube_dlc/downloader/common.py):

336

nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,

337

noresizebuffer, retries, continuedl, noprogress, consoletitle,

338

xattr_set_filesize, external_downloader_args, hls_use_mpegts,

339

http_chunk_size.

340

341

The following options are used by the post processors:

342

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

343

otherwise prefer ffmpeg.

344

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

345

to the binary or its containing directory.

346

postprocessor_args: A dictionary of postprocessor names (in lower case) and a list

347

of additional command-line arguments for the postprocessor.

348

Use 'default' as the name for arguments to passed to all PP.

349

350

The following options are used by the Youtube extractor:

351

youtube_include_dash_manifest: If True (default), DASH manifests and related

352

data will be downloaded and processed by extractor.

353

You can reduce network I/O by disabling it if you don't

care about DASH.

"""

_NUMERIC_FIELDS = set((

358

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

359

'timestamp', 'upload_year', 'upload_month', 'upload_day',

360

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

361

'average_rating', 'comment_count', 'age_limit',

362

'start_time', 'end_time',

363

'chapter_number', 'season_number', 'episode_number',

364

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = []

_download_retcode = None

372

_num_downloads = None

373

_playlist_level = 0

374

_playlist_urls = set()

375

_screen_file = None

376

377

def __init__(self, params=None, auto_init=True):

378

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

383

self._pps = []

384

self._post_hooks = []

385

self._progress_hooks = []

386

self._download_retcode = 0

387

self._num_downloads = 0

388

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

389

self._err_file = sys.stderr

390

self.params = {

391

# Default parameters

392

'nocheckcertificate': False,

393

}

394

self.params.update(params)

395

self.cache = Cache(self)

396

self.archive = set()

397

398

"""Preload the archive, if any is specified"""

399

def preload_download_archive(self):

400

fn = self.params.get('download_archive')

if fn is None:

return False

try:

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

405

for line in archive_file:

406

self.archive.add(line.strip())

407

except IOError as ioe:

408

if ioe.errno != errno.ENOENT:

raise

return False

return True

def check_deprecated(param, option, suggestion):

414

if self.params.get(param) is not None:

415

self.report_warning(

416

'%s is deprecated. Use %s instead.' % (option, suggestion))

return True

return False

if self.params.get('verbose'):

421

self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))

422

423

preload_download_archive(self)

424

425

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

426

if self.params.get('geo_verification_proxy') is None:

427

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

428

429

check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')

430

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

431

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

432

433

if params.get('bidi_workaround', False):

434

try:

435

import pty

436

master, slave = pty.openpty()

437

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

442

sp_kwargs = dict(

443

stdin=subprocess.PIPE,

444

stdout=slave,

445

stderr=self._err_file)

446

try:

447

self._output_process = subprocess.Popen(

448

['bidiv'] + width_args, **sp_kwargs

449

)

450

except OSError:

451

self._output_process = subprocess.Popen(

452

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

453

self._output_channel = os.fdopen(master, 'rb')

454

except OSError as ose:

455

if ose.errno == errno.ENOENT:

456

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

461

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

462

and not params.get('restrictfilenames', False)):

463

# Unicode filesystem API will throw errors (#1474, #13027)

464

self.report_warning(

465

'Assuming --restrict-filenames since file system encoding '

466

'cannot encode all characters. '

467

'Set the LC_ALL environment variable to fix this.')

468

self.params['restrictfilenames'] = True

469

470

if isinstance(params.get('outtmpl'), bytes):

471

self.report_warning(

472

'Parameter outtmpl is bytes, but should be a unicode string. '

473

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

self._setup_opener()

if auto_init:

self.print_debug_header()

479

self.add_default_info_extractors()

480

481

for pp_def_raw in self.params.get('postprocessors', []):

482

pp_class = get_postprocessor(pp_def_raw['key'])

483

pp_def = dict(pp_def_raw)

484

del pp_def['key']

485

pp = pp_class(self, **compat_kwargs(pp_def))

486

self.add_post_processor(pp)

487

488

for ph in self.params.get('post_hooks', []):

489

self.add_post_hook(ph)

490

491

for ph in self.params.get('progress_hooks', []):

492

self.add_progress_hook(ph)

493

494

register_socks_protocols()

495

496

def warn_if_short_id(self, argv):

497

# short YouTube ID starting with dash?

498

idxs = [

499

i for i, a in enumerate(argv)

500

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['youtube-dlc']

+ [a for i, a in enumerate(argv) if i not in idxs]

505

+ ['--'] + [argv[i] for i in idxs]

506

)

507

self.report_warning(

508

'Long argument string detected. '

509

'Use -- to separate parameters and URLs, like this:\n%s\n' %

510

args_to_str(correct_argv))

511

512

def add_info_extractor(self, ie):

513

"""Add an InfoExtractor object to the end of the list."""

514

self._ies.append(ie)

515

if not isinstance(ie, type):

516

self._ies_instances[ie.ie_key()] = ie

517

ie.set_downloader(self)

518

519

def get_info_extractor(self, ie_key):

520

"""

521

Get an instance of an IE with name ie_key, it will try to get one from

522

the _ies list, if there's no instance it will create a new one and add

523

it to the extractor list.

524

"""

525

ie = self._ies_instances.get(ie_key)

526

if ie is None:

527

ie = get_info_extractor(ie_key)()

528

self.add_info_extractor(ie)

529

return ie

530

531

def add_default_info_extractors(self):

532

"""

533

Add the InfoExtractors returned by gen_extractors to the end of the list

534

"""

535

for ie in gen_extractor_classes():

536

self.add_info_extractor(ie)

537

538

def add_post_processor(self, pp):

539

"""Add a PostProcessor object to the end of the chain."""

540

self._pps.append(pp)

541

pp.set_downloader(self)

542

543

def add_post_hook(self, ph):

544

"""Add the post hook"""

545

self._post_hooks.append(ph)

546

547

def add_progress_hook(self, ph):

548

"""Add the progress hook (currently only for the file downloader)"""

549

self._progress_hooks.append(ph)

550

551

def _bidi_workaround(self, message):

552

if not hasattr(self, '_output_channel'):

553

return message

554

555

assert hasattr(self, '_output_process')

556

assert isinstance(message, compat_str)

557

line_count = message.count('\n') + 1

558

self._output_process.stdin.write((message + '\n').encode('utf-8'))

559

self._output_process.stdin.flush()

560

res = ''.join(self._output_channel.readline().decode('utf-8')

561

for _ in range(line_count))

562

return res[:-len('\n')]

563

564

def to_screen(self, message, skip_eol=False):

565

"""Print message to stdout if not in quiet mode."""

566

return self.to_stdout(message, skip_eol, check_quiet=True)

567

568

def _write_string(self, s, out=None):

569

write_string(s, out=out, encoding=self.params.get('encoding'))

570

571

def to_stdout(self, message, skip_eol=False, check_quiet=False):

572

"""Print message to stdout if not in quiet mode."""

573

if self.params.get('logger'):

574

self.params['logger'].debug(message)

575

elif not check_quiet or not self.params.get('quiet', False):

576

message = self._bidi_workaround(message)

577

terminator = ['\n', ''][skip_eol]

578

output = message + terminator

579

580

self._write_string(output, self._screen_file)

581

582

def to_stderr(self, message):

583

"""Print message to stderr."""

584

assert isinstance(message, compat_str)

585

if self.params.get('logger'):

586

self.params['logger'].error(message)

587

else:

588

message = self._bidi_workaround(message)

589

output = message + '\n'

590

self._write_string(output, self._err_file)

591

592

def to_console_title(self, message):

593

if not self.params.get('consoletitle', False):

594

return

595

if compat_os_name == 'nt':

596

if ctypes.windll.kernel32.GetConsoleWindow():

597

# c_wchar_p() might not be necessary if `message` is

598

# already of type unicode()

599

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

600

elif 'TERM' in os.environ:

601

self._write_string('\033[0;%s\007' % message, self._screen_file)

602

603

def save_console_title(self):

604

if not self.params.get('consoletitle', False):

605

return

606

if self.params.get('simulate', False):

607

return

608

if compat_os_name != 'nt' and 'TERM' in os.environ:

609

# Save the title on stack

610

self._write_string('\033[22;0t', self._screen_file)

611

612

def restore_console_title(self):

613

if not self.params.get('consoletitle', False):

614

return

615

if self.params.get('simulate', False):

616

return

617

if compat_os_name != 'nt' and 'TERM' in os.environ:

618

# Restore the title from stack

619

self._write_string('\033[23;0t', self._screen_file)

620

621

def __enter__(self):

622

self.save_console_title()

623

return self

624

625

def __exit__(self, *args):

626

self.restore_console_title()

627

628

if self.params.get('cookiefile') is not None:

629

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

630

631

def trouble(self, message=None, tb=None):

632

"""Determine action to take when a download problem appears.

633

634

Depending on if the downloader has been configured to ignore

635

download errors or not, this method may throw an exception or

636

not when errors are found, after printing the message.

637

638

tb, if given, is additional traceback information.

639

"""

640

if message is not None:

641

self.to_stderr(message)

642

if self.params.get('verbose'):

643

if tb is None:

644

if sys.exc_info()[0]: # if .trouble has been called from an except block

645

tb = ''

646

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

647

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

648

tb += encode_compat_str(traceback.format_exc())

649

else:

650

tb_data = traceback.format_list(traceback.extract_stack())

651

tb = ''.join(tb_data)

652

self.to_stderr(tb)

653

if not self.params.get('ignoreerrors', False):

654

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

655

exc_info = sys.exc_info()[1].exc_info

656

else:

657

exc_info = sys.exc_info()

658

raise DownloadError(message, exc_info)

659

self._download_retcode = 1

660

661

def report_warning(self, message):

662

'''

663

Print the message to stderr, it will be prefixed with 'WARNING:'

664

If stderr is a tty file the 'WARNING:' will be colored

665

'''

666

if self.params.get('logger') is not None:

667

self.params['logger'].warning(message)

668

else:

669

if self.params.get('no_warnings'):

670

return

671

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

672

_msg_header = '\033[0;33mWARNING:\033[0m'

673

else:

674

_msg_header = 'WARNING:'

675

warning_message = '%s %s' % (_msg_header, message)

676

self.to_stderr(warning_message)

677

678

def report_error(self, message, tb=None):

679

'''

680

Do the same as trouble, but prefixes the message with 'ERROR:', colored

681

in red if stderr is a tty file.

682

'''

683

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

684

_msg_header = '\033[0;31mERROR:\033[0m'

685

else:

686

_msg_header = 'ERROR:'

687

error_message = '%s %s' % (_msg_header, message)

688

self.trouble(error_message, tb)

689

690

def report_file_already_downloaded(self, file_name):

691

"""Report file has already been fully downloaded."""

692

try:

693

self.to_screen('[download] %s has already been downloaded' % file_name)

694

except UnicodeEncodeError:

695

self.to_screen('[download] The file has already been downloaded')

696

697

def report_file_delete(self, file_name):

698

"""Report that existing file will be deleted."""

699

try:

700

self.to_screen('Deleting already existent file %s' % file_name)

701

except UnicodeEncodeError:

702

self.to_screen('Deleting already existent file')

703

704

def prepare_filename(self, info_dict):

705

"""Generate the output filename."""

706

try:

707

template_dict = dict(info_dict)

708

709

template_dict['epoch'] = int(time.time())

710

autonumber_size = self.params.get('autonumber_size')

711

if autonumber_size is None:

712

autonumber_size = 5

713

template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

714

if template_dict.get('resolution') is None:

715

if template_dict.get('width') and template_dict.get('height'):

716

template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])

717

elif template_dict.get('height'):

718

template_dict['resolution'] = '%sp' % template_dict['height']

719

elif template_dict.get('width'):

720

template_dict['resolution'] = '%dx?' % template_dict['width']

721

722

sanitize = lambda k, v: sanitize_filename(

723

compat_str(v),

724

restricted=self.params.get('restrictfilenames'),

725

is_id=(k == 'id' or k.endswith('_id')))

726

template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))

727

for k, v in template_dict.items()

728

if v is not None and not isinstance(v, (list, tuple, dict)))

729

template_dict = collections.defaultdict(lambda: 'NA', template_dict)

730

731

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

732

733

# For fields playlist_index and autonumber convert all occurrences

734

# of %(field)s to %(field)0Nd for backward compatibility

735

field_size_compat_map = {

736

'playlist_index': len(str(template_dict['n_entries'])),

737

'autonumber': autonumber_size,

738

}

739

FIELD_SIZE_COMPAT_RE = r'(?<!%)%$(?P<field>autonumber|playlist_index)$s'

740

mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)

741

if mobj:

742

outtmpl = re.sub(

743

FIELD_SIZE_COMPAT_RE,

744

r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],

745

outtmpl)

746

747

# Missing numeric fields used together with integer presentation types

748

# in format specification will break the argument substitution since

749

# string 'NA' is returned for missing fields. We will patch output

750

# template for missing fields to meet string presentation type.

751

for numeric_field in self._NUMERIC_FIELDS:

752

if numeric_field not in template_dict:

753

# As of [1] format syntax is:

754

# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type

755

# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting

FORMAT_RE = r'''(?x)

(?<!%)

%

${0}$ # mapping key

(?:[#0\-+ ]+)? # conversion flags (optional)

761

(?:\d+)? # minimum field width (optional)

762

(?:\.\d+)? # precision (optional)

763

[hlL]? # length modifier (optional)

764

[diouxXeEfFgGcrs%] # conversion type

765

'''

766

outtmpl = re.sub(

767

FORMAT_RE.format(numeric_field),

768

r'%({0})s'.format(numeric_field), outtmpl)

769

770

# expand_path translates '%%' into '%' and '$$' into '$'

771

# correspondingly that is not what we want since we need to keep

772

# '%%' intact for template dict substitution step. Working around

773

# with boundary-alike separator hack.

774

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

775

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

776

777

# outtmpl should be expand_path'ed before template dict substitution

778

# because meta fields may contain env variables we don't want to

779

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

780

# title "Hello $PATH", we don't want `$PATH` to be expanded.

781

filename = expand_path(outtmpl).replace(sep, '') % template_dict

782

783

# https://github.com/blackjack4494/youtube-dlc/issues/85

784

trim_file_name = self.params.get('trim_file_name', False)

785

if trim_file_name:

786

fn_groups = filename.rsplit('.')

787

ext = fn_groups[-1]

788

sub_ext = ''

789

if len(fn_groups) > 2:

790

sub_ext = fn_groups[-2]

791

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

792

793

# Temporary fix for #4787

794

# 'Treat' all problem characters by passing filename through preferredencoding

795

# to workaround encoding issues with subprocess on python2 @ Windows

796

if sys.version_info < (3, 0) and sys.platform == 'win32':

797

filename = encodeFilename(filename, True).decode(preferredencoding())

798

return sanitize_path(filename)

799

except ValueError as err:

800

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

801

return None

802

803

def _match_entry(self, info_dict, incomplete):

804

""" Returns None if the file should be downloaded """

805

806

def check_filter():

807

video_title = info_dict.get('title', info_dict.get('id', 'video'))

808

if 'title' in info_dict:

809

# This can happen when we're just evaluating the playlist

810

title = info_dict['title']

811

matchtitle = self.params.get('matchtitle', False)

812

if matchtitle:

813

if not re.search(matchtitle, title, re.IGNORECASE):

814

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

815

rejecttitle = self.params.get('rejecttitle', False)

816

if rejecttitle:

817

if re.search(rejecttitle, title, re.IGNORECASE):

818

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

819

date = info_dict.get('upload_date')

820

if date is not None:

821

dateRange = self.params.get('daterange', DateRange())

822

if date not in dateRange:

823

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

824

view_count = info_dict.get('view_count')

825

if view_count is not None:

826

min_views = self.params.get('min_views')

827

if min_views is not None and view_count < min_views:

828

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

829

max_views = self.params.get('max_views')

830

if max_views is not None and view_count > max_views:

831

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

832

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

833

return 'Skipping "%s" because it is age restricted' % video_title

834

if self.in_download_archive(info_dict):

835

return '%s has already been recorded in archive' % video_title

836

837

if not incomplete:

838

match_filter = self.params.get('match_filter')

839

if match_filter is not None:

840

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

reason = check_filter()

846

if reason is not None:

847

self.to_screen('[download] ' + reason)

848

if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):

849

raise ExistingVideoReached()

850

elif self.params.get('break_on_reject', False):

851

raise RejectedVideoReached()

return reason

@staticmethod

def add_extra_info(info_dict, extra_info):

856

'''Set the keys from extra_info in info dict if they are missing'''

857

for key, value in extra_info.items():

858

info_dict.setdefault(key, value)

859

860

def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},

861

process=True, force_generic_extractor=False):

862

'''

863

Returns a list with a dictionary for each video we find.

864

If 'download', also downloads the videos.

865

extra_info is a dict containing the extra values to add to each result

866

'''

867

868

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

continue

ie_key = ie.ie_key()

ie = self.get_info_extractor(ie_key)

882

if not ie.working():

883

self.report_warning('The program functionality for this site has been marked as broken, '

884

'and will probably not work.')

885

886

try:

887

temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)

888

except (AssertionError, IndexError, AttributeError):

889

temp_id = None

890

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

891

self.to_screen("[%s] %s: has already been recorded in archive" % (

ie_key, temp_id))

break

return self.__extract_info(url, ie, download, extra_info, process, info_dict)

896

897

else:

898

self.report_error('no suitable InfoExtractor for URL %s' % url)

899

900

def __handle_extraction_exceptions(func):

901

def wrapper(self, *args, **kwargs):

902

try:

903

return func(self, *args, **kwargs)

904

except GeoRestrictedError as e:

905

msg = e.msg

906

if e.countries:

907

msg += '\nThis video is available in %s.' % ', '.join(

908

map(ISO3166Utils.short2full, e.countries))

909

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

910

self.report_error(msg)

911

except ExtractorError as e: # An error we somewhat expected

912

self.report_error(compat_str(e), e.format_traceback())

913

except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):

914

raise

915

except Exception as e:

916

if self.params.get('ignoreerrors', False):

917

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

923

def __extract_info(self, url, ie, download, extra_info, process, info_dict):

924

ie_result = ie.extract(url)

925

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

926

return

927

if isinstance(ie_result, list):

928

# Backwards compatibility: old IE result format

929

ie_result = {

930

'_type': 'compat_list',

931

'entries': ie_result,

932

}

933

if info_dict:

934

if info_dict.get('id'):

935

ie_result['id'] = info_dict['id']

936

if info_dict.get('title'):

937

ie_result['title'] = info_dict['title']

938

self.add_default_extra_info(ie_result, ie, url)

939

if process:

940

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

945

self.add_extra_info(ie_result, {

946

'extractor': ie.IE_NAME,

947

'webpage_url': url,

948

'duration_string': (

949

formatSeconds(ie_result['duration'], '-')

950

if ie_result.get('duration', None) is not None

951

else None),

952

'webpage_url_basename': url_basename(url),

953

'extractor_key': ie.ie_key(),

954

})

955

956

def process_ie_result(self, ie_result, download=True, extra_info={}):

957

"""

958

Take the result of the ie(may be modified) and resolve all unresolved

959

references (URLs, playlist items).

960

961

It will also download the videos if 'download'.

962

Returns the resolved ie_result.

963

"""

964

result_type = ie_result.get('_type', 'video')

965

966

if result_type in ('url', 'url_transparent'):

967

ie_result['url'] = sanitize_url(ie_result['url'])

968

extract_flat = self.params.get('extract_flat', False)

969

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

970

or extract_flat is True):

971

self.__forced_printings(

972

ie_result, self.prepare_filename(ie_result),

incomplete=True)

return ie_result

if result_type == 'video':

977

self.add_extra_info(ie_result, extra_info)

978

return self.process_video_result(ie_result, download=download)

979

elif result_type == 'url':

980

# We have to add extra_info to the results because it may be

981

# contained in a playlist

982

return self.extract_info(ie_result['url'],

983

download, info_dict=ie_result,

984

ie_key=ie_result.get('ie_key'),

985

extra_info=extra_info)

986

elif result_type == 'url_transparent':

987

# Use the information from the embedding page

988

info = self.extract_info(

989

ie_result['url'], ie_key=ie_result.get('ie_key'),

990

extra_info=extra_info, download=False, process=False)

991

992

# extract_info may return None when ignoreerrors is enabled and

993

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

999

(k, v) for k, v in ie_result.items() if v is not None)

1000

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

1001

if f in force_properties:

1002

del force_properties[f]

1003

new_result = info.copy()

1004

new_result.update(force_properties)

1005

1006

# Extracted info may not be a video result (i.e.

1007

# info.get('_type', 'video') != video) but rather an url or

1008

# url_transparent. In such cases outer metadata (from ie_result)

1009

# should be propagated to inner one (info). For this to happen

1010

# _type of info should be overridden with url_transparent. This

1011

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

1012

if new_result.get('_type') == 'url':

1013

new_result['_type'] = 'url_transparent'

1014

1015

return self.process_ie_result(

1016

new_result, download=download, extra_info=extra_info)

1017

elif result_type in ('playlist', 'multi_video'):

1018

# Protect from infinite recursion due to recursively nested playlists

1019

# (see https://github.com/ytdl-org/youtube-dl/issues/27833)

1020

webpage_url = ie_result['webpage_url']

1021

if webpage_url in self._playlist_urls:

1022

self.to_screen(

1023

'[download] Skipping already downloaded playlist: %s'

1024

% ie_result.get('title') or ie_result.get('id'))

1025

return

1026

1027

self._playlist_level += 1

1028

self._playlist_urls.add(webpage_url)

1029

try:

1030

return self.__process_playlist(ie_result, download)

1031

finally:

1032

self._playlist_level -= 1

1033

if not self._playlist_level:

1034

self._playlist_urls.clear()

1035

elif result_type == 'compat_list':

1036

self.report_warning(

1037

'Extractor %s returned a compat_list result. '

1038

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1045

'webpage_url': ie_result['webpage_url'],

1046

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1047

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1052

self.process_ie_result(_fixup(r), download, extra_info)

1053

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1058

1059

def __process_playlist(self, ie_result, download):

1060

# We process each entry in the playlist

1061

playlist = ie_result.get('title') or ie_result.get('id')

1062

self.to_screen('[download] Downloading playlist: %s' % playlist)

1063

1064

playlist_results = []

1065

1066

playliststart = self.params.get('playliststart', 1) - 1

1067

playlistend = self.params.get('playlistend')

1068

# For backwards compatibility, interpret -1 as whole list

1069

if playlistend == -1:

1070

playlistend = None

1071

1072

playlistitems_str = self.params.get('playlist_items')

1073

playlistitems = None

1074

if playlistitems_str is not None:

1075

def iter_playlistitems(format):

1076

for string_segment in format.split(','):

1077

if '-' in string_segment:

1078

start, end = string_segment.split('-')

1079

for item in range(int(start), int(end) + 1):

1080

yield int(item)

1081

else:

1082

yield int(string_segment)

1083

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1084

1085

ie_entries = ie_result['entries']

1086

1087

def make_playlistitems_entries(list_ie_entries):

1088

num_entries = len(list_ie_entries)

1089

return [

1090

list_ie_entries[i - 1] for i in playlistitems

1091

if -num_entries <= i - 1 < num_entries]

1092

1093

def report_download(num_entries):

1094

self.to_screen(

1095

'[%s] playlist %s: Downloading %d videos' %

1096

(ie_result['extractor'], playlist, num_entries))

1097

1098

if isinstance(ie_entries, list):

1099

n_all_entries = len(ie_entries)

1100

if playlistitems:

1101

entries = make_playlistitems_entries(ie_entries)

1102

else:

1103

entries = ie_entries[playliststart:playlistend]

1104

n_entries = len(entries)

1105

self.to_screen(

1106

'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %

1107

(ie_result['extractor'], playlist, n_all_entries, n_entries))

1108

elif isinstance(ie_entries, PagedList):

1109

if playlistitems:

1110

entries = []

1111

for item in playlistitems:

1112

entries.extend(ie_entries.getslice(

item - 1, item

))

else:

entries = ie_entries.getslice(

1117

playliststart, playlistend)

1118

n_entries = len(entries)

1119

report_download(n_entries)

1120

else: # iterable

1121

if playlistitems:

1122

entries = make_playlistitems_entries(list(itertools.islice(

1123

ie_entries, 0, max(playlistitems))))

1124

else:

1125

entries = list(itertools.islice(

1126

ie_entries, playliststart, playlistend))

1127

n_entries = len(entries)

1128

report_download(n_entries)

1129

1130

if self.params.get('playlistreverse', False):

1131

entries = entries[::-1]

1132

1133

if self.params.get('playlistrandom', False):

1134

random.shuffle(entries)

1135

1136

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1137

1138

for i, entry in enumerate(entries, 1):

1139

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1140

# This __x_forwarded_for_ip thing is a bit ugly but requires

1141

# minimal changes

1142

if x_forwarded_for:

1143

entry['__x_forwarded_for_ip'] = x_forwarded_for

1144

extra = {

1145

'n_entries': n_entries,

1146

'playlist': playlist,

1147

'playlist_id': ie_result.get('id'),

1148

'playlist_title': ie_result.get('title'),

1149

'playlist_uploader': ie_result.get('uploader'),

1150

'playlist_uploader_id': ie_result.get('uploader_id'),

1151

'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,

1152

'extractor': ie_result['extractor'],

1153

'webpage_url': ie_result['webpage_url'],

1154

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1155

'extractor_key': ie_result['extractor_key'],

1156

}

1157

1158

if self._match_entry(entry, incomplete=True) is not None:

1159

continue

1160

1161

entry_result = self.__process_iterable_entry(entry, download, extra)

1162

# TODO: skip failed (empty) entries?

1163

playlist_results.append(entry_result)

1164

ie_result['entries'] = playlist_results

1165

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1166

return ie_result

1167

1168

@__handle_extraction_exceptions

1169

def __process_iterable_entry(self, entry, download, extra_info):

1170

return self.process_ie_result(

1171

entry, download=download, extra_info=extra_info)

1172

1173

def _build_format_filter(self, filter_spec):

1174

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1185

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)

1186

\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1187

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)

1188

$

1189

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1190

m = operator_rex.search(filter_spec)

1191

if m:

1192

try:

1193

comparison_value = int(m.group('value'))

1194

except ValueError:

1195

comparison_value = parse_filesize(m.group('value'))

1196

if comparison_value is None:

1197

comparison_value = parse_filesize(m.group('value') + 'B')

1198

if comparison_value is None:

1199

raise ValueError(

1200

'Invalid value %r in format specification %r' % (

1201

m.group('value'), filter_spec))

1202

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1208

'$=': lambda attr, value: attr.endswith(value),

1209

'*=': lambda attr, value: value in attr,

1210

}

1211

str_operator_rex = re.compile(r'''(?x)

1212

\s*(?P<key>[a-zA-Z0-9._-]+)

1213

\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?

1214

\s*(?P<value>[a-zA-Z0-9._-]+)

1215

\s*$

1216

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1217

m = str_operator_rex.search(filter_spec)

1218

if m:

1219

comparison_value = m.group('value')

1220

str_op = STR_OPERATORS[m.group('op')]

1221

if m.group('negation'):

1222

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise ValueError('Invalid filter specification %r' % filter_spec)

1228

1229

def _filter(f):

1230

actual_value = f.get(m.group('key'))

1231

if actual_value is None:

1232

return m.group('none_inclusive')

1233

return op(actual_value, comparison_value)

1234

return _filter

1235

1236

def _default_format_spec(self, info_dict, download=True):

1237

1238

def can_merge():

1239

merger = FFmpegMergerPP(self)

1240

return merger.available and merger.can_merge()

1241

1242

prefer_best = (

1243

not self.params.get('simulate', False)

and download

and (

not can_merge()

or info_dict.get('is_live', False)

1248

or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))

1249

1250

return (

1251

'best/bestvideo+bestaudio'

1252

if prefer_best

1253

else 'bestvideo*+bestaudio/best'

1254

if not self.params.get('allow_multiple_audio_streams', False)

1255

else 'bestvideo+bestaudio/best')

1256

1257

def build_format_selector(self, format_spec):

1258

def syntax_error(note, start):

1259

message = (

1260

'Invalid format specification: '

1261

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1262

return SyntaxError(message)

1263

1264

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1269

1270

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1271

'video': self.params.get('allow_multiple_video_streams', False)}

1272

1273

def _parse_filter(tokens):

1274

filter_parts = []

1275

for type, string, start, _, _ in tokens:

1276

if type == tokenize.OP and string == ']':

1277

return ''.join(filter_parts)

1278

else:

1279

filter_parts.append(string)

1280

1281

def _remove_unused_ops(tokens):

1282

# Remove operators that we don't use and join them with the surrounding strings

1283

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1284

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1285

last_string, last_start, last_end, last_line = None, None, None, None

1286

for type, string, start, end, line in tokens:

1287

if type == tokenize.OP and string == '[':

1288

if last_string:

1289

yield tokenize.NAME, last_string, last_start, last_end, last_line

1290

last_string = None

1291

yield type, string, start, end, line

1292

# everything inside brackets will be handled by _parse_filter

1293

for type, string, start, end, line in tokens:

1294

yield type, string, start, end, line

1295

if type == tokenize.OP and string == ']':

1296

break

1297

elif type == tokenize.OP and string in ALLOWED_OPS:

1298

if last_string:

1299

yield tokenize.NAME, last_string, last_start, last_end, last_line

1300

last_string = None

1301

yield type, string, start, end, line

1302

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1309

if last_string:

1310

yield tokenize.NAME, last_string, last_start, last_end, last_line

1311

1312

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1313

selectors = []

1314

current_selector = None

1315

for type, string, start, _, _ in tokens:

1316

# ENCODING is only defined in python 3.x

1317

if type == getattr(tokenize, 'ENCODING', None):

1318

continue

1319

elif type in [tokenize.NAME, tokenize.NUMBER]:

1320

current_selector = FormatSelector(SINGLE, string, [])

1321

elif type == tokenize.OP:

1322

if string == ')':

1323

if not inside_group:

1324

# ')' will be handled by the parentheses group

1325

tokens.restore_last_token()

1326

break

1327

elif inside_merge and string in ['/', ',']:

1328

tokens.restore_last_token()

1329

break

1330

elif inside_choice and string == ',':

1331

tokens.restore_last_token()

1332

break

1333

elif string == ',':

1334

if not current_selector:

1335

raise syntax_error('"," must follow a format selector', start)

1336

selectors.append(current_selector)

1337

current_selector = None

1338

elif string == '/':

1339

if not current_selector:

1340

raise syntax_error('"/" must follow a format selector', start)

1341

first_choice = current_selector

1342

second_choice = _parse_format_selection(tokens, inside_choice=True)

1343

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1344

elif string == '[':

1345

if not current_selector:

1346

current_selector = FormatSelector(SINGLE, 'best', [])

1347

format_filter = _parse_filter(tokens)

1348

current_selector.filters.append(format_filter)

1349

elif string == '(':

1350

if current_selector:

1351

raise syntax_error('Unexpected "("', start)

1352

group = _parse_format_selection(tokens, inside_group=True)

1353

current_selector = FormatSelector(GROUP, group, [])

1354

elif string == '+':

1355

if not current_selector:

1356

raise syntax_error('Unexpected "+"', start)

1357

selector_1 = current_selector

1358

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1359

if not selector_2:

1360

raise syntax_error('Expected a selector', start)

1361

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1362

else:

1363

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1364

elif type == tokenize.ENDMARKER:

1365

break

1366

if current_selector:

1367

selectors.append(current_selector)

1368

return selectors

1369

1370

def _build_selector_function(selector):

1371

if isinstance(selector, list): # ,

1372

fs = [_build_selector_function(s) for s in selector]

1373

1374

def selector_function(ctx):

1375

for f in fs:

1376

for format in f(ctx):

1377

yield format

1378

return selector_function

1379

1380

elif selector.type == GROUP: # ()

1381

selector_function = _build_selector_function(selector.selector)

1382

1383

elif selector.type == PICKFIRST: # /

1384

fs = [_build_selector_function(s) for s in selector.selector]

1385

1386

def selector_function(ctx):

1387

for f in fs:

1388

picked_formats = list(f(ctx))

1389

if picked_formats:

1390

return picked_formats

1391

return []

1392

1393

elif selector.type == SINGLE: # atom

1394

format_spec = selector.selector if selector.selector is not None else 'best'

1395

1396

if format_spec == 'all':

1397

def selector_function(ctx):

1398

formats = list(ctx['formats'])

if formats:

for f in formats:

yield f

else:

format_fallback = False

1405

1406

if format_spec_obj is not None:

1407

format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1

1408

format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False

1409

not_format_type = 'v' if format_type == 'a' else 'a'

1410

format_modified = format_spec_obj.group(3) is not None

1411

1412

format_fallback = not format_type and not format_modified # for b, w

1413

filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')

1414

if format_type and format_modified # bv*, ba*, wv*, wa*

1415

else (lambda f: f.get(not_format_type + 'codec') == 'none')

1416

if format_type # bv, ba, wv, wa

1417

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1418

if not format_modified # b, w

else None) # b*, w*

else:

format_idx = -1

filter_f = ((lambda f: f.get('ext') == format_spec)

1423

if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension

1424

else (lambda f: f.get('format_id') == format_spec)) # id

1425

1426

def selector_function(ctx):

1427

formats = list(ctx['formats'])

1428

if not formats:

1429

return

1430

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1431

if matches:

1432

yield matches[format_idx]

1433

elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):

1434

# for extractors with incomplete formats (audio only (soundcloud)

1435

# or video only (imgur)) best/worst will fallback to

1436

# best/worst {video,audio}-only format

1437

yield formats[format_idx]

1438

1439

elif selector.type == MERGE: # +

1440

def _merge(formats_pair):

1441

format_1, format_2 = formats_pair

1442

1443

formats_info = []

1444

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1445

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1446

1447

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1448

get_no_more = {"video": False, "audio": False}

1449

for (i, fmt_info) in enumerate(formats_info):

1450

for aud_vid in ["audio", "video"]:

1451

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1452

if get_no_more[aud_vid]:

1453

formats_info.pop(i)

1454

get_no_more[aud_vid] = True

1455

1456

if len(formats_info) == 1:

1457

return formats_info[0]

1458

1459

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1460

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1461

1462

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1463

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1464

1465

output_ext = self.params.get('merge_output_format')

1466

if not output_ext:

1467

if the_only_video:

1468

output_ext = the_only_video['ext']

1469

elif the_only_audio and not video_fmts:

1470

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1476

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1477

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1484

'height': the_only_video.get('height'),

1485

'resolution': the_only_video.get('resolution'),

1486

'fps': the_only_video.get('fps'),

1487

'vcodec': the_only_video.get('vcodec'),

1488

'vbr': the_only_video.get('vbr'),

1489

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1495

'abr': the_only_audio.get('abr'),

})

return new_dict

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1501

1502

def selector_function(ctx):

1503

for pair in itertools.product(

1504

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1505

yield _merge(pair)

1506

1507

filters = [self._build_format_filter(f) for f in selector.filters]

1508

1509

def final_selector(ctx):

1510

ctx_copy = copy.deepcopy(ctx)

1511

for _filter in filters:

1512

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1513

return selector_function(ctx_copy)

1514

return final_selector

1515

1516

stream = io.BytesIO(format_spec.encode('utf-8'))

1517

try:

1518

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1519

except tokenize.TokenError:

1520

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1521

1522

class TokenIterator(object):

1523

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1532

raise StopIteration()

1533

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1540

self.counter -= 1

1541

1542

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1543

return _build_selector_function(parsed_selector)

1544

1545

def _calc_headers(self, info_dict):

1546

res = std_headers.copy()

1547

1548

add_headers = info_dict.get('http_headers')

1549

if add_headers:

1550

res.update(add_headers)

1551

1552

cookies = self._calc_cookies(info_dict)

1553

if cookies:

1554

res['Cookie'] = cookies

1555

1556

if 'X-Forwarded-For' not in res:

1557

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1558

if x_forwarded_for_ip:

1559

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1564

pr = sanitized_Request(info_dict['url'])

1565

self.cookiejar.add_cookie_header(pr)

1566

return pr.get_header('Cookie')

1567

1568

def process_video_result(self, info_dict, download=True):

1569

assert info_dict.get('_type', 'video') == 'video'

1570

1571

if 'id' not in info_dict:

1572

raise ExtractorError('Missing "id" field in extractor result')

1573

if 'title' not in info_dict:

1574

raise ExtractorError('Missing "title" field in extractor result')

1575

1576

def report_force_conversion(field, field_not, conversion):

1577

self.report_warning(

1578

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1579

% (field, field_not, conversion))

1580

1581

def sanitize_string_field(info, string_field):

1582

field = info.get(string_field)

1583

if field is None or isinstance(field, compat_str):

1584

return

1585

report_force_conversion(string_field, 'a string', 'string')

1586

info[string_field] = compat_str(field)

1587

1588

def sanitize_numeric_fields(info):

1589

for numeric_field in self._NUMERIC_FIELDS:

1590

field = info.get(numeric_field)

1591

if field is None or isinstance(field, compat_numeric_types):

1592

continue

1593

report_force_conversion(numeric_field, 'numeric', 'int')

1594

info[numeric_field] = int_or_none(field)

1595

1596

sanitize_string_field(info_dict, 'id')

1597

sanitize_numeric_fields(info_dict)

1598

1599

if 'playlist' not in info_dict:

1600

# It isn't part of a playlist

1601

info_dict['playlist'] = None

1602

info_dict['playlist_index'] = None

1603

1604

thumbnails = info_dict.get('thumbnails')

1605

if thumbnails is None:

1606

thumbnail = info_dict.get('thumbnail')

1607

if thumbnail:

1608

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1609

if thumbnails:

1610

thumbnails.sort(key=lambda t: (

1611

t.get('preference') if t.get('preference') is not None else -1,

1612

t.get('width') if t.get('width') is not None else -1,

1613

t.get('height') if t.get('height') is not None else -1,

1614

t.get('id') if t.get('id') is not None else '', t.get('url')))

1615

for i, t in enumerate(thumbnails):

1616

t['url'] = sanitize_url(t['url'])

1617

if t.get('width') and t.get('height'):

1618

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1619

if t.get('id') is None:

1620

t['id'] = '%d' % i

1621

1622

if self.params.get('list_thumbnails'):

1623

self.list_thumbnails(info_dict)

1624

return

1625

1626

thumbnail = info_dict.get('thumbnail')

1627

if thumbnail:

1628

info_dict['thumbnail'] = sanitize_url(thumbnail)

1629

elif thumbnails:

1630

info_dict['thumbnail'] = thumbnails[-1]['url']

1631

1632

if 'display_id' not in info_dict and 'id' in info_dict:

1633

info_dict['display_id'] = info_dict['id']

1634

1635

if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:

1636

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

1637

# see http://bugs.python.org/issue1646728)

1638

try:

1639

upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])

1640

info_dict['upload_date'] = upload_date.strftime('%Y%m%d')

1641

except (ValueError, OverflowError, OSError):

1642

pass

1643

1644

# Auto generate title fields corresponding to the *_number fields when missing

1645

# in order to always have clean titles. This is very common for TV series.

1646

for field in ('chapter', 'season', 'episode'):

1647

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

1648

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

1649

1650

for cc_kind in ('subtitles', 'automatic_captions'):

1651

cc = info_dict.get(cc_kind)

1652

if cc:

1653

for _, subtitle in cc.items():

1654

for subtitle_format in subtitle:

1655

if subtitle_format.get('url'):

1656

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

1657

if subtitle_format.get('ext') is None:

1658

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

1659

1660

automatic_captions = info_dict.get('automatic_captions')

1661

subtitles = info_dict.get('subtitles')

1662

1663

if self.params.get('listsubtitles', False):

1664

if 'automatic_captions' in info_dict:

1665

self.list_subtitles(

1666

info_dict['id'], automatic_captions, 'automatic captions')

1667

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

1668

return

1669

1670

info_dict['requested_subtitles'] = self.process_subtitles(

1671

info_dict['id'], subtitles, automatic_captions)

1672

1673

# We now pick which formats have to be downloaded

1674

if info_dict.get('formats') is None:

1675

# There's only one format available

1676

formats = [info_dict]

1677

else:

1678

formats = info_dict['formats']

1679

1680

if not formats:

1681

raise ExtractorError('No video formats found!')

1682

1683

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

1688

'there is an error in extractor')

1689

return False

1690

if isinstance(url, bytes):

1691

sanitize_string_field(f, 'url')

1692

return True

1693

1694

# Filter out malformed formats for better extraction robustness

1695

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

1700

for i, format in enumerate(formats):

1701

sanitize_string_field(format, 'format_id')

1702

sanitize_numeric_fields(format)

1703

format['url'] = sanitize_url(format['url'])

1704

if not format.get('format_id'):

1705

format['format_id'] = compat_str(i)

1706

else:

1707

# Sanitize format_id from characters used in format selector expression

1708

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

1709

format_id = format['format_id']

1710

if format_id not in formats_dict:

1711

formats_dict[format_id] = []

1712

formats_dict[format_id].append(format)

1713

1714

# Make sure all formats have unique format_id

1715

for format_id, ambiguous_formats in formats_dict.items():

1716

if len(ambiguous_formats) > 1:

1717

for i, format in enumerate(ambiguous_formats):

1718

format['format_id'] = '%s-%d' % (format_id, i)

1719

1720

for i, format in enumerate(formats):

1721

if format.get('format') is None:

1722

format['format'] = '{id} - {res}{note}'.format(

1723

id=format['format_id'],

1724

res=self.format_resolution(format),

1725

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

1726

)

1727

# Automatically determine file extension if missing

1728

if format.get('ext') is None:

1729

format['ext'] = determine_ext(format['url']).lower()

1730

# Automatically determine protocol if missing (useful for format

1731

# selection purposes)

1732

if format.get('protocol') is None:

1733

format['protocol'] = determine_protocol(format)

1734

# Add HTTP headers, so that external programs can use them from the

1735

# json output

1736

full_format_info = info_dict.copy()

1737

full_format_info.update(format)

1738

format['http_headers'] = self._calc_headers(full_format_info)

1739

# Remove private housekeeping stuff

1740

if '__x_forwarded_for_ip' in info_dict:

1741

del info_dict['__x_forwarded_for_ip']

1742

1743

# TODO Central sorting goes here

1744

1745

if formats[0] is not info_dict:

1746

# only set the 'formats' fields if the original info_dict list them

1747

# otherwise we end up with a circular reference, the first (and unique)

1748

# element in the 'formats' field in info_dict is info_dict itself,

1749

# which can't be exported to json

1750

info_dict['formats'] = formats

1751

if self.params.get('listformats'):

1752

self.list_formats(info_dict)

1753

return

1754

1755

req_format = self.params.get('format')

1756

if req_format is None:

1757

req_format = self._default_format_spec(info_dict, download=download)

1758

if self.params.get('verbose'):

1759

self._write_string('[debug] Default format spec: %s\n' % req_format)

1760

1761

format_selector = self.build_format_selector(req_format)

1762

1763

# While in format selection we may need to have an access to the original

1764

# format set in order to calculate some metrics or do some processing.

1765

# For now we need to be able to guess whether original formats provided

1766

# by extractor are incomplete or not (i.e. whether extractor provides only

1767

# video-only or audio-only formats) for proper formats selection for

1768

# extractors with such incomplete formats (see

1769

# https://github.com/ytdl-org/youtube-dl/pull/5556).

1770

# Since formats may be filtered during format selection and may not match

1771

# the original formats the results may be incorrect. Thus original formats

1772

# or pre-calculated metrics should be passed to format selection routines

1773

# as well.

1774

# We will pass a context object containing all necessary additional data

1775

# instead of just formats.

1776

# This fixes incorrect format selection issue (see

1777

# https://github.com/ytdl-org/youtube-dl/issues/10083).

1778

incomplete_formats = (

1779

# All formats are video-only or

1780

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

1781

# all formats are audio-only

1782

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

1787

}

1788

1789

formats_to_download = list(format_selector(ctx))

1790

if not formats_to_download:

1791

raise ExtractorError('requested format not available',

expected=True)

if download:

self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))

1796

if len(formats_to_download) > 1:

1797

self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))

1798

for format in formats_to_download:

1799

new_info = dict(info_dict)

1800

new_info.update(format)

1801

self.process_info(new_info)

1802

# We update the info dict with the best quality format (backwards compatibility)

1803

info_dict.update(formats_to_download[-1])

1804

return info_dict

1805

1806

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

1807

"""Select the requested subtitles and their format"""

1808

available_subs = {}

1809

if normal_subtitles and self.params.get('writesubtitles'):

1810

available_subs.update(normal_subtitles)

1811

if automatic_captions and self.params.get('writeautomaticsub'):

1812

for lang, cap_info in automatic_captions.items():

1813

if lang not in available_subs:

1814

available_subs[lang] = cap_info

1815

1816

if (not self.params.get('writesubtitles') and not

1817

self.params.get('writeautomaticsub') or not

available_subs):

return None

if self.params.get('allsubtitles', False):

1822

requested_langs = available_subs.keys()

1823

else:

1824

if self.params.get('subtitleslangs', False):

1825

requested_langs = self.params.get('subtitleslangs')

1826

elif 'en' in available_subs:

1827

requested_langs = ['en']

1828

else:

1829

requested_langs = [list(available_subs.keys())[0]]

1830

1831

formats_query = self.params.get('subtitlesformat', 'best')

1832

formats_preference = formats_query.split('/') if formats_query else []

1833

subs = {}

1834

for lang in requested_langs:

1835

formats = available_subs.get(lang)

1836

if formats is None:

1837

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

1838

continue

1839

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

1851

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

1856

def print_mandatory(field):

1857

if (self.params.get('force%s' % field, False)

1858

and (not incomplete or info_dict.get(field) is not None)):

1859

self.to_stdout(info_dict[field])

1860

1861

def print_optional(field):

1862

if (self.params.get('force%s' % field, False)

1863

and info_dict.get(field) is not None):

1864

self.to_stdout(info_dict[field])

1865

1866

print_mandatory('title')

1867

print_mandatory('id')

1868

if self.params.get('forceurl', False) and not incomplete:

1869

if info_dict.get('requested_formats') is not None:

1870

for f in info_dict['requested_formats']:

1871

self.to_stdout(f['url'] + f.get('play_path', ''))

1872

else:

1873

# For RTMP URLs, also include the playpath

1874

self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))

1875

print_optional('thumbnail')

1876

print_optional('description')

1877

if self.params.get('forcefilename', False) and filename is not None:

1878

self.to_stdout(filename)

1879

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

1880

self.to_stdout(formatSeconds(info_dict['duration']))

1881

print_mandatory('format')

1882

if self.params.get('forcejson', False):

1883

self.to_stdout(json.dumps(info_dict))

1884

1885

def process_info(self, info_dict):

1886

"""Process a single resolved IE result."""

1887

1888

assert info_dict.get('_type', 'video') == 'video'

1889

1890

max_downloads = self.params.get('max_downloads')

1891

if max_downloads is not None:

1892

if self._num_downloads >= int(max_downloads):

1893

raise MaxDownloadsReached()

1894

1895

# TODO: backward compatibility, to be removed

1896

info_dict['fulltitle'] = info_dict['title']

1897

1898

if 'format' not in info_dict:

1899

info_dict['format'] = info_dict['ext']

1900

1901

if self._match_entry(info_dict, incomplete=False) is not None:

1902

return

1903

1904

self._num_downloads += 1

1905

1906

info_dict['_filename'] = filename = self.prepare_filename(info_dict)

1907

1908

# Forced printings

1909

self.__forced_printings(info_dict, filename, incomplete=False)

1910

1911

if self.params.get('simulate', False):

1912

if self.params.get('force_write_download_archive', False):

1913

self.record_download_archive(info_dict)

1914

1915

# Do nothing else if in simulate mode

return

if filename is None:

return

def ensure_dir_exists(path):

1922

try:

1923

dn = os.path.dirname(path)

1924

if dn and not os.path.exists(dn):

1925

os.makedirs(dn)

1926

return True

1927

except (OSError, IOError) as err:

1928

self.report_error('unable to create directory ' + error_to_compat_str(err))

1929

return False

1930

1931

if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):

1932

return

1933

1934

if self.params.get('writedescription', False):

1935

descfn = replace_extension(filename, 'description', info_dict.get('ext'))

1936

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):

1937

self.to_screen('[info] Video description is already present')

1938

elif info_dict.get('description') is None:

1939

self.report_warning('There\'s no description to write.')

1940

else:

1941

try:

1942

self.to_screen('[info] Writing video description to: ' + descfn)

1943

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1944

descfile.write(info_dict['description'])

1945

except (OSError, IOError):

1946

self.report_error('Cannot write description file ' + descfn)

1947

return

1948

1949

if self.params.get('writeannotations', False):

1950

annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))

1951

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):

1952

self.to_screen('[info] Video annotations are already present')

1953

elif not info_dict.get('annotations'):

1954

self.report_warning('There are no annotations to write.')

1955

else:

1956

try:

1957

self.to_screen('[info] Writing video annotations to: ' + annofn)

1958

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

1959

annofile.write(info_dict['annotations'])

1960

except (KeyError, TypeError):

1961

self.report_warning('There are no annotations to write.')

1962

except (OSError, IOError):

1963

self.report_error('Cannot write annotations file: ' + annofn)

1964

return

1965

1966

def dl(name, info, subtitle=False):

1967

fd = get_suitable_downloader(info, self.params)(self, self.params)

1968

for ph in self._progress_hooks:

1969

fd.add_progress_hook(ph)

1970

if self.params.get('verbose'):

1971

self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))

1972

return fd.download(name, info, subtitle)

1973

1974

subtitles_are_requested = any([self.params.get('writesubtitles', False),

1975

self.params.get('writeautomaticsub')])

1976

1977

if subtitles_are_requested and info_dict.get('requested_subtitles'):

1978

# subtitles download errors are already managed as troubles in relevant IE

1979

# that way it will silently go on when used with unsupporting IE

1980

subtitles = info_dict['requested_subtitles']

1981

# ie = self.get_info_extractor(info_dict['extractor_key'])

1982

for sub_lang, sub_info in subtitles.items():

1983

sub_format = sub_info['ext']

1984

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

1985

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):

1986

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

1987

else:

1988

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

1989

if sub_info.get('data') is not None:

1990

try:

1991

# Use newline='' to prevent conversion of newline characters

1992

# See https://github.com/ytdl-org/youtube-dl/issues/10268

1993

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

1994

subfile.write(sub_info['data'])

1995

except (OSError, IOError):

1996

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

dl(sub_filename, sub_info, subtitle=True)

2001

'''

2002

if self.params.get('sleep_interval_subtitles', False):

2003

dl(sub_filename, sub_info)

2004

else:

2005

sub_data = ie._request_webpage(

2006

sub_info['url'], info_dict['id'], note=False).read()

2007

with io.open(encodeFilename(sub_filename), 'wb') as subfile:

2008

subfile.write(sub_data)

2009

'''

2010

except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2011

self.report_warning('Unable to download subtitle for "%s": %s' %

2012

(sub_lang, error_to_compat_str(err)))

2013

continue

2014

2015

if self.params.get('skip_download', False):

2016

if self.params.get('convertsubtitles', False):

2017

subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))

2018

filename_real_ext = os.path.splitext(filename)[1][1:]

2019

filename_wo_ext = (

2020

os.path.splitext(filename)[0]

2021

if filename_real_ext == info_dict['ext']

2022

else filename)

2023

afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))

2024

if subconv.available:

2025

info_dict.setdefault('__postprocessors', [])

2026

# info_dict['__postprocessors'].append(subconv)

2027

if os.path.exists(encodeFilename(afilename)):

2028

self.to_screen(

2029

'[download] %s has already been downloaded and '

2030

'converted' % afilename)

2031

else:

2032

try:

2033

self.post_process(filename, info_dict)

2034

except (PostProcessingError) as err:

2035

self.report_error('postprocessing: %s' % str(err))

2036

return

2037

2038

if self.params.get('writeinfojson', False):

2039

infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))

2040

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):

2041

self.to_screen('[info] Video description metadata is already present')

2042

else:

2043

self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)

2044

try:

2045

write_json_file(self.filter_requested_info(info_dict), infofn)

2046

except (OSError, IOError):

2047

self.report_error('Cannot write metadata to JSON file ' + infofn)

2048

return

2049

2050

self._write_thumbnails(info_dict, filename)

2051

2052

# Write internet shortcut files

2053

url_link = webloc_link = desktop_link = False

2054

if self.params.get('writelink', False):

2055

if sys.platform == "darwin": # macOS.

2056

webloc_link = True

2057

elif sys.platform.startswith("linux"):

2058

desktop_link = True

2059

else: # if sys.platform in ['win32', 'cygwin']:

2060

url_link = True

2061

if self.params.get('writeurllink', False):

2062

url_link = True

2063

if self.params.get('writewebloclink', False):

2064

webloc_link = True

2065

if self.params.get('writedesktoplink', False):

2066

desktop_link = True

2067

2068

if url_link or webloc_link or desktop_link:

2069

if 'webpage_url' not in info_dict:

2070

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2071

return

2072

ascii_url = iri_to_uri(info_dict['webpage_url'])

2073

2074

def _write_link_file(extension, template, newline, embed_filename):

2075

linkfn = replace_extension(filename, extension, info_dict.get('ext'))

2076

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):

2077

self.to_screen('[info] Internet shortcut is already present')

2078

else:

2079

try:

2080

self.to_screen('[info] Writing internet shortcut to: ' + linkfn)

2081

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:

2082

template_vars = {'url': ascii_url}

2083

if embed_filename:

2084

template_vars['filename'] = linkfn[:-(len(extension) + 1)]

2085

linkfile.write(template % template_vars)

2086

except (OSError, IOError):

2087

self.report_error('Cannot write internet shortcut ' + linkfn)

return False

return True

if url_link:

if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):

2093

return

2094

if webloc_link:

2095

if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):

2096

return

2097

if desktop_link:

2098

if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):

return

# Download

must_record_download_archive = False

2103

if not self.params.get('skip_download', False):

2104

try:

2105

if info_dict.get('requested_formats') is not None:

2106

downloaded = []

2107

success = True

2108

merger = FFmpegMergerPP(self)

2109

if not merger.available:

2110

postprocessors = []

2111

self.report_warning('You have requested multiple '

2112

'formats but ffmpeg or avconv are not installed.'

2113

' The formats won\'t be merged.')

2114

else:

2115

postprocessors = [merger]

2116

2117

def compatible_formats(formats):

2118

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2119

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2120

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2121

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2126

COMPATIBLE_EXTS = (

2127

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2128

set(('webm',)),

2129

)

2130

for ext_sets in COMPATIBLE_EXTS:

2131

if ext_sets.issuperset(exts):

2132

return True

2133

# TODO: Check acodec/vcodec

2134

return False

2135

2136

filename_real_ext = os.path.splitext(filename)[1][1:]

2137

filename_wo_ext = (

2138

os.path.splitext(filename)[0]

2139

if filename_real_ext == info_dict['ext']

2140

else filename)

2141

requested_formats = info_dict['requested_formats']

2142

if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):

2143

info_dict['ext'] = 'mkv'

2144

self.report_warning(

2145

'Requested formats are incompatible for merge and will be merged into mkv.')

2146

# Ensure filename always has a correct extension for successful merge

2147

filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])

2148

file_exists = os.path.exists(encodeFilename(filename))

2149

if not self.params.get('overwrites', False) and file_exists:

2150

self.to_screen(

2151

'[download] %s has already been downloaded and '

'merged' % filename)

else:

if file_exists:

self.report_file_delete(filename)

2156

os.remove(encodeFilename(filename))

2157

for f in requested_formats:

2158

new_info = dict(info_dict)

2159

new_info.update(f)

2160

fname = prepend_extension(

2161

self.prepare_filename(new_info),

2162

'f%s' % f['format_id'], new_info['ext'])

2163

if not ensure_dir_exists(fname):

2164

return

2165

downloaded.append(fname)

2166

partial_success, real_download = dl(fname, new_info)

2167

success = success and partial_success

2168

info_dict['__postprocessors'] = postprocessors

2169

info_dict['__files_to_merge'] = downloaded

2170

# Even if there were no downloads, it is being merged only now

2171

info_dict['__real_download'] = True

2172

else:

2173

# Delete existing file with --yes-overwrites

2174

if self.params.get('overwrites', False):

2175

if os.path.exists(encodeFilename(filename)):

2176

self.report_file_delete(filename)

2177

os.remove(encodeFilename(filename))

2178

# Just a single file

2179

success, real_download = dl(filename, info_dict)

2180

info_dict['__real_download'] = real_download

2181

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2182

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2183

return

2184

except (OSError, IOError) as err:

2185

raise UnavailableVideoError(err)

2186

except (ContentTooShortError, ) as err:

2187

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2188

return

2189

2190

if success and filename != '-':

2191

# Fixup content

2192

fixup_policy = self.params.get('fixup')

2193

if fixup_policy is None:

2194

fixup_policy = 'detect_or_warn'

2195

2196

INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'

2197

2198

stretched_ratio = info_dict.get('stretched_ratio')

2199

if stretched_ratio is not None and stretched_ratio != 1:

2200

if fixup_policy == 'warn':

2201

self.report_warning('%s: Non-uniform pixel ratio (%s)' % (

2202

info_dict['id'], stretched_ratio))

2203

elif fixup_policy == 'detect_or_warn':

2204

stretched_pp = FFmpegFixupStretchedPP(self)

2205

if stretched_pp.available:

2206

info_dict.setdefault('__postprocessors', [])

2207

info_dict['__postprocessors'].append(stretched_pp)

2208

else:

2209

self.report_warning(

2210

'%s: Non-uniform pixel ratio (%s). %s'

2211

% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))

2212

else:

2213

assert fixup_policy in ('ignore', 'never')

2214

2215

if (info_dict.get('requested_formats') is None

2216

and info_dict.get('container') == 'm4a_dash'):

2217

if fixup_policy == 'warn':

2218

self.report_warning(

2219

'%s: writing DASH m4a. '

2220

'Only some players support this container.'

2221

% info_dict['id'])

2222

elif fixup_policy == 'detect_or_warn':

2223

fixup_pp = FFmpegFixupM4aPP(self)

2224

if fixup_pp.available:

2225

info_dict.setdefault('__postprocessors', [])

2226

info_dict['__postprocessors'].append(fixup_pp)

2227

else:

2228

self.report_warning(

2229

'%s: writing DASH m4a. '

2230

'Only some players support this container. %s'

2231

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2232

else:

2233

assert fixup_policy in ('ignore', 'never')

2234

2235

if (info_dict.get('protocol') == 'm3u8_native'

2236

or info_dict.get('protocol') == 'm3u8'

2237

and self.params.get('hls_prefer_native')):

2238

if fixup_policy == 'warn':

2239

self.report_warning('%s: malformed AAC bitstream detected.' % (

2240

info_dict['id']))

2241

elif fixup_policy == 'detect_or_warn':

2242

fixup_pp = FFmpegFixupM3u8PP(self)

2243

if fixup_pp.available:

2244

info_dict.setdefault('__postprocessors', [])

2245

info_dict['__postprocessors'].append(fixup_pp)

2246

else:

2247

self.report_warning(

2248

'%s: malformed AAC bitstream detected. %s'

2249

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2250

else:

2251

assert fixup_policy in ('ignore', 'never')

2252

2253

try:

2254

self.post_process(filename, info_dict)

2255

except (PostProcessingError) as err:

2256

self.report_error('postprocessing: %s' % str(err))

2257

return

2258

try:

2259

for ph in self._post_hooks:

2260

ph(filename)

2261

except Exception as err:

2262

self.report_error('post hooks: %s' % str(err))

2263

return

2264

must_record_download_archive = True

2265

2266

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2267

self.record_download_archive(info_dict)

2268

max_downloads = self.params.get('max_downloads')

2269

if max_downloads is not None and self._num_downloads >= int(max_downloads):

2270

raise MaxDownloadsReached()

2271

2272

def download(self, url_list):

2273

"""Download a given list of URLs."""

2274

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

2275

if (len(url_list) > 1

2276

and outtmpl != '-'

2277

and '%' not in outtmpl

2278

and self.params.get('max_downloads') != 1):

2279

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2284

res = self.extract_info(

2285

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2286

except UnavailableVideoError:

2287

self.report_error('unable to download video')

2288

except MaxDownloadsReached:

2289

self.to_screen('[info] Maximum number of downloaded files reached')

2290

raise

2291

except ExistingVideoReached:

2292

self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')

2293

raise

2294

except RejectedVideoReached:

2295

self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')

2296

raise

2297

else:

2298

if self.params.get('dump_single_json', False):

2299

self.to_stdout(json.dumps(res))

2300

2301

return self._download_retcode

2302

2303

def download_with_info_file(self, info_filename):

2304

with contextlib.closing(fileinput.FileInput(

2305

[info_filename], mode='r',

2306

openhook=fileinput.hook_encoded('utf-8'))) as f:

2307

# FileInput doesn't have a read method, we can't call json.load

2308

info = self.filter_requested_info(json.loads('\n'.join(f)))

2309

try:

2310

self.process_ie_result(info, download=True)

2311

except DownloadError:

2312

webpage_url = info.get('webpage_url')

2313

if webpage_url is not None:

2314

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2315

return self.download([webpage_url])

2316

else:

2317

raise

2318

return self._download_retcode

2319

2320

@staticmethod

2321

def filter_requested_info(info_dict):

2322

return dict(

2323

(k, v) for k, v in info_dict.items()

2324

if k not in ['requested_formats', 'requested_subtitles'])

2325

2326

def post_process(self, filename, ie_info):

2327

"""Run all the postprocessors on the given file."""

2328

info = dict(ie_info)

2329

info['filepath'] = filename

2330

pps_chain = []

2331

if ie_info.get('__postprocessors') is not None:

2332

pps_chain.extend(ie_info['__postprocessors'])

2333

pps_chain.extend(self._pps)

for pp in pps_chain:

files_to_delete = []

try:

files_to_delete, info = pp.run(info)

2338

except PostProcessingError as e:

2339

self.report_error(e.msg)

2340

if files_to_delete and not self.params.get('keepvideo', False):

2341

for old_filename in set(files_to_delete):

2342

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2343

try:

2344

os.remove(encodeFilename(old_filename))

2345

except (IOError, OSError):

2346

self.report_warning('Unable to remove downloaded original file')

2347

2348

def _make_archive_id(self, info_dict):

2349

video_id = info_dict.get('id')

2350

if not video_id:

2351

return

2352

# Future-proof against any change in case

2353

# and backwards compatibility with prior versions

2354

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

2355

if extractor is None:

2356

url = str_or_none(info_dict.get('url'))

2357

if not url:

2358

return

2359

# Try to find matching extractor for the URL and take its ie_key

2360

for ie in self._ies:

2361

if ie.suitable(url):

2362

extractor = ie.ie_key()

break

else:

return

return extractor.lower() + ' ' + video_id

2367

2368

def in_download_archive(self, info_dict):

2369

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2374

if not vid_id:

2375

return False # Incomplete video information

2376

2377

return vid_id in self.archive

2378

2379

def record_download_archive(self, info_dict):

2380

fn = self.params.get('download_archive')

2381

if fn is None:

2382

return

2383

vid_id = self._make_archive_id(info_dict)

2384

assert vid_id

2385

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2386

archive_file.write(vid_id + '\n')

2387

self.archive.add(vid_id)

2388

2389

@staticmethod

2390

def format_resolution(format, default='unknown'):

2391

if format.get('vcodec') == 'none':

2392

return 'audio only'

2393

if format.get('resolution') is not None:

2394

return format['resolution']

2395

if format.get('height') is not None:

2396

if format.get('width') is not None:

2397

res = '%sx%s' % (format['width'], format['height'])

2398

else:

2399

res = '%sp' % format['height']

2400

elif format.get('width') is not None:

2401

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2407

res = ''

2408

if fdict.get('ext') in ['f4f', 'f4m']:

2409

res += '(unsupported) '

2410

if fdict.get('language'):

2411

if res:

2412

res += ' '

2413

res += '[%s] ' % fdict['language']

2414

if fdict.get('format_note') is not None:

2415

res += fdict['format_note'] + ' '

2416

if fdict.get('tbr') is not None:

2417

res += '%4dk ' % fdict['tbr']

2418

if fdict.get('container') is not None:

2419

if res:

2420

res += ', '

2421

res += '%s container' % fdict['container']

2422

if (fdict.get('vcodec') is not None

2423

and fdict.get('vcodec') != 'none'):

2424

if res:

2425

res += ', '

2426

res += fdict['vcodec']

2427

if fdict.get('vbr') is not None:

2428

res += '@'

2429

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2430

res += 'video@'

2431

if fdict.get('vbr') is not None:

2432

res += '%4dk' % fdict['vbr']

2433

if fdict.get('fps') is not None:

2434

if res:

2435

res += ', '

2436

res += '%sfps' % fdict['fps']

2437

if fdict.get('acodec') is not None:

2438

if res:

2439

res += ', '

2440

if fdict['acodec'] == 'none':

2441

res += 'video only'

2442

else:

2443

res += '%-5s' % fdict['acodec']

2444

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2449

res += '@%3dk' % fdict['abr']

2450

if fdict.get('asr') is not None:

2451

res += ' (%5dHz)' % fdict['asr']

2452

if fdict.get('filesize') is not None:

2453

if res:

2454

res += ', '

2455

res += format_bytes(fdict['filesize'])

2456

elif fdict.get('filesize_approx') is not None:

2457

if res:

2458

res += ', '

2459

res += '~' + format_bytes(fdict['filesize_approx'])

2460

return res

2461

2462

def _format_note_table(self, f):

2463

def join_fields(*vargs):

2464

return ', '.join((val for val in vargs if val != ''))

2465

2466

return join_fields(

2467

'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',

2468

format_field(f, 'language', '[%s]'),

2469

format_field(f, 'format_note'),

2470

format_field(f, 'container', ignore=(None, f.get('ext'))),

2471

format_field(f, 'asr', '%5dHz'))

2472

2473

def list_formats(self, info_dict):

2474

formats = info_dict.get('formats', [info_dict])

2475

new_format = self.params.get('listformats_table', False)

if new_format:

table = [

[

format_field(f, 'format_id'),

2480

format_field(f, 'ext'),

2481

self.format_resolution(f),

2482

format_field(f, 'fps', '%d'),

2483

'|',

2484

format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),

2485

format_field(f, 'tbr', '%4dk'),

2486

f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),

2487

'|',

2488

format_field(f, 'vcodec', default='unknown').replace('none', ''),

2489

format_field(f, 'vbr', '%4dk'),

2490

format_field(f, 'acodec', default='unknown').replace('none', ''),

2491

format_field(f, 'abr', '%3dk'),

2492

format_field(f, 'asr', '%5dHz'),

2493

self._format_note_table(f)]

2494

for f in formats

2495

if f.get('preference') is None or f['preference'] >= -1000]

2496

header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',

2497

'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']

else:

table = [

[

format_field(f, 'format_id'),

2502

format_field(f, 'ext'),

2503

self.format_resolution(f),

2504

self._format_note(f)]

2505

for f in formats

2506

if f.get('preference') is None or f['preference'] >= -1000]

2507

header_line = ['format code', 'extension', 'resolution', 'note']

2508

2509

# if len(formats) > 1:

2510

# table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'

2511

self.to_screen(

2512

'[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(

header_line,

table,

delim=new_format,

extraGap=(0 if new_format else 1),

2517

hideEmpty=new_format)))

2518

2519

def list_thumbnails(self, info_dict):

2520

thumbnails = info_dict.get('thumbnails')

2521

if not thumbnails:

2522

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

2527

self.to_screen(render_table(

2528

['ID', 'width', 'height', 'URL'],

2529

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

2530

2531

def list_subtitles(self, video_id, subtitles, name='subtitles'):

2532

if not subtitles:

2533

self.to_screen('%s has no %s' % (video_id, name))

2534

return

2535

self.to_screen(

2536

'Available %s for %s:' % (name, video_id))

2537

self.to_screen(render_table(

2538

['Language', 'formats'],

2539

[[lang, ', '.join(f['ext'] for f in reversed(formats))]

2540

for lang, formats in subtitles.items()]))

2541

2542

def urlopen(self, req):

2543

""" Start an HTTP download """

2544

if isinstance(req, compat_basestring):

2545

req = sanitized_Request(req)

2546

return self._opener.open(req, timeout=self._socket_timeout)

2547

2548

def print_debug_header(self):

2549

if not self.params.get('verbose'):

2550

return

2551

2552

if type('') is not compat_str:

2553

# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)

2554

self.report_warning(

2555

'Your Python is broken! Update to a newer and supported version')

2556

2557

stdout_encoding = getattr(

2558

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

2559

encoding_str = (

2560

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

2561

locale.getpreferredencoding(),

2562

sys.getfilesystemencoding(),

2563

stdout_encoding,

2564

self.get_encoding()))

2565

write_string(encoding_str, encoding=None)

2566

2567

self._write_string('[debug] yt-dlp version ' + __version__ + '\n')

2568

if _LAZY_LOADER:

2569

self._write_string('[debug] Lazy loading extractors enabled' + '\n')

2570

try:

2571

sp = subprocess.Popen(

2572

['git', 'rev-parse', '--short', 'HEAD'],

2573

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

2574

cwd=os.path.dirname(os.path.abspath(__file__)))

2575

out, err = process_communicate_or_kill(sp)

2576

out = out.decode().strip()

2577

if re.match('[0-9a-f]+', out):

2578

self._write_string('[debug] Git HEAD: ' + out + '\n')

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

2586

impl_name = platform.python_implementation()

2587

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

2588

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

2589

return impl_name

2590

2591

self._write_string('[debug] Python version %s (%s) - %s\n' % (

2592

platform.python_version(), python_implementation(),

2593

platform_name()))

2594

2595

exe_versions = FFmpegPostProcessor.get_versions(self)

2596

exe_versions['rtmpdump'] = rtmpdump_version()

2597

exe_versions['phantomjs'] = PhantomJSwrapper._version()

2598

exe_str = ', '.join(

2599

'%s %s' % (exe, v)

2600

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

2606

2607

proxy_map = {}

2608

for handler in self._opener.handlers:

2609

if hasattr(handler, 'proxies'):

2610

proxy_map.update(handler.proxies)

2611

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

2612

2613

if self.params.get('call_home', False):

2614

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

2615

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

2616

return

2617

latest_version = self.urlopen(

2618

'https://yt-dl.org/latest/version').read().decode('utf-8')

2619

if version_tuple(latest_version) > version_tuple(__version__):

2620

self.report_warning(

2621

'You are using an outdated version (newest version: %s)! '

2622

'See https://yt-dl.org/update if you need help updating.' %

2623

latest_version)

2624

2625

def _setup_opener(self):

2626

timeout_val = self.params.get('socket_timeout')

2627

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

2628

2629

opts_cookiefile = self.params.get('cookiefile')

2630

opts_proxy = self.params.get('proxy')

2631

2632

if opts_cookiefile is None:

2633

self.cookiejar = compat_cookiejar.CookieJar()

2634

else:

2635

opts_cookiefile = expand_path(opts_cookiefile)

2636

self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)

2637

if os.access(opts_cookiefile, os.R_OK):

2638

self.cookiejar.load(ignore_discard=True, ignore_expires=True)

2639

2640

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

2641

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

2646

else:

2647

proxies = compat_urllib_request.getproxies()

2648

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

2649

if 'http' in proxies and 'https' not in proxies:

2650

proxies['https'] = proxies['http']

2651

proxy_handler = PerRequestProxyHandler(proxies)

2652

2653

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

2654

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

2655

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

2656

redirect_handler = YoutubeDLRedirectHandler()

2657

data_handler = compat_urllib_request_DataHandler()

2658

2659

# When passing our own FileHandler instance, build_opener won't add the

2660

# default FileHandler and allows us to disable the file protocol, which

2661

# can be used for malicious purposes (see

2662

# https://github.com/ytdl-org/youtube-dl/issues/8227)

2663

file_handler = compat_urllib_request.FileHandler()

2664

2665

def file_open(*args, **kwargs):

2666

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')

2667

file_handler.file_open = file_open

2668

2669

opener = compat_urllib_request.build_opener(

2670

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

2671

2672

# Delete the default user-agent header, which would otherwise apply in

2673

# cases where our custom HTTP handler doesn't come into play

2674

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

2675

opener.addheaders = []

2676

self._opener = opener

2677

2678

def encode(self, s):

2679

if isinstance(s, bytes):

2680

return s # Already encoded

2681

2682

try:

2683

return s.encode(self.get_encoding())

2684

except UnicodeEncodeError as err:

2685

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

2686

raise

2687

2688

def get_encoding(self):

2689

encoding = self.params.get('encoding')

2690

if encoding is None:

2691

encoding = preferredencoding()

2692

return encoding

2693

2694

def _write_thumbnails(self, info_dict, filename):

2695

if self.params.get('writethumbnail', False):

2696

thumbnails = info_dict.get('thumbnails')

2697

if thumbnails:

2698

thumbnails = [thumbnails[-1]]

2699

elif self.params.get('write_all_thumbnails', False):

2700

thumbnails = info_dict.get('thumbnails')

else:

return

if not thumbnails:

# No thumbnails present, so return immediately

return

for t in thumbnails:

thumb_ext = determine_ext(t['url'], 'jpg')

2710

suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''

2711

thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''

2712

t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))

2713

2714

if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):

2715

self.to_screen('[%s] %s: Thumbnail %sis already present' %

2716

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2717

else:

2718

self.to_screen('[%s] %s: Downloading thumbnail %s...' %

2719

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2720

try:

2721

uf = self.urlopen(t['url'])

2722

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

2723

shutil.copyfileobj(uf, thumbf)

2724

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

2725

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

2726

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2727

self.report_warning('Unable to download thumbnail "%s": %s' %

2728

(t['url'], error_to_compat_str(err)))