jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import socket
	23	import sys
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_http_client,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DownloadError,
	55	encode_compat_str,
	56	encodeFilename,
	57	error_to_compat_str,
	58	expand_path,
	59	ExtractorError,
	60	format_bytes,
	61	formatSeconds,
	62	GeoRestrictedError,
	63	int_or_none,
	64	ISO3166Utils,
	65	locked_file,
	66	make_HTTPS_handler,
	67	MaxDownloadsReached,
	68	orderedSet,
	69	PagedList,
	70	parse_filesize,
	71	PerRequestProxyHandler,
	72	platform_name,
	73	PostProcessingError,
	74	preferredencoding,
	75	prepend_extension,
	76	register_socks_protocols,
	77	render_table,
	78	replace_extension,
	79	SameFileError,
	80	sanitize_filename,
	81	sanitize_path,
	82	sanitize_url,
	83	sanitized_Request,
	84	std_headers,
	85	subtitles_filename,
	86	UnavailableVideoError,
	87	url_basename,
	88	version_tuple,
	89	write_json_file,
	90	write_string,
	91	YoutubeDLCookieProcessor,
	92	YoutubeDLHandler,
	93	)
	94	from .cache import Cache
	95	from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
	96	from .extractor.openload import PhantomJSwrapper
	97	from .downloader import get_suitable_downloader
	98	from .downloader.rtmp import rtmpdump_version
	99	from .postprocessor import (
	100	FFmpegFixupM3u8PP,
	101	FFmpegFixupM4aPP,
	102	FFmpegFixupStretchedPP,
	103	FFmpegMergerPP,
	104	FFmpegPostProcessor,
	105	get_postprocessor,
	106	)
	107	from .version import __version__
	108
	109	if compat_os_name == 'nt':
	110	import ctypes
	111
	112
	113	class YoutubeDL(object):
	114	"""YoutubeDL class.
	115
	116	YoutubeDL objects are the ones responsible of downloading the
	117	actual video file and writing it to disk if the user has requested
	118	it, among some other tasks. In most cases there should be one per
	119	program. As, given a video URL, the downloader doesn't know how to
	120	extract all the needed information, task that InfoExtractors do, it
	121	has to pass the URL to one of them.
	122
	123	For this, YoutubeDL objects have a method that allows
	124	InfoExtractors to be registered in a given order. When it is passed
	125	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	126	finds that reports being able to handle it. The InfoExtractor extracts
	127	all the information about the video or videos the URL refers to, and
	128	YoutubeDL process the extracted information, possibly using a File
	129	Downloader to download the video.
	130
	131	YoutubeDL objects accept a lot of parameters. In order not to saturate
	132	the object constructor with arguments, it receives a dictionary of
	133	options instead. These options are available through the params
	134	attribute for the InfoExtractors to use. The YoutubeDL also
	135	registers itself as the downloader in charge for the InfoExtractors
	136	that are added to it, so this is a "mutual registration".
	137
	138	Available options:
	139
	140	username: Username for authentication purposes.
	141	password: Password for authentication purposes.
	142	videopassword: Password for accessing a video.
	143	ap_mso: Adobe Pass multiple-system operator identifier.
	144	ap_username: Multiple-system operator account username.
	145	ap_password: Multiple-system operator account password.
	146	usenetrc: Use netrc for authentication instead.
	147	verbose: Print additional info to stdout.
	148	quiet: Do not print messages to stdout.
	149	no_warnings: Do not print out anything for warnings.
	150	forceurl: Force printing final URL.
	151	forcetitle: Force printing title.
	152	forceid: Force printing ID.
	153	forcethumbnail: Force printing thumbnail URL.
	154	forcedescription: Force printing description.
	155	forcefilename: Force printing final filename.
	156	forceduration: Force printing duration.
	157	forcejson: Force printing info_dict as JSON.
	158	dump_single_json: Force printing the info_dict of the whole playlist
	159	(or video) as a single JSON line.
	160	simulate: Do not download the video files.
	161	format: Video format code. See options.py for more information.
	162	outtmpl: Template for output names.
	163	restrictfilenames: Do not allow "&" and spaces in file names
	164	ignoreerrors: Do not stop on download errors.
	165	force_generic_extractor: Force downloader to use the generic extractor
	166	nooverwrites: Prevent overwriting files.
	167	playliststart: Playlist item to start at.
	168	playlistend: Playlist item to end at.
	169	playlist_items: Specific indices of playlist to download.
	170	playlistreverse: Download playlist items in reverse order.
	171	playlistrandom: Download playlist items in random order.
	172	matchtitle: Download only matching titles.
	173	rejecttitle: Reject downloads for matching titles.
	174	logger: Log messages to a logging.Logger instance.
	175	logtostderr: Log messages to stderr instead of stdout.
	176	writedescription: Write the video description to a .description file
	177	writeinfojson: Write the video description to a .info.json file
	178	writeannotations: Write the video annotations to a .annotations.xml file
	179	writethumbnail: Write the thumbnail image to a file
	180	write_all_thumbnails: Write all thumbnail formats to files
	181	writesubtitles: Write the video subtitles to a file
	182	writeautomaticsub: Write the automatically generated subtitles to a file
	183	allsubtitles: Downloads all the subtitles of the video
	184	(requires writesubtitles or writeautomaticsub)
	185	listsubtitles: Lists all available subtitles for the video
	186	subtitlesformat: The format code for subtitles
	187	subtitleslangs: List of languages of the subtitles to download
	188	keepvideo: Keep the video file after post-processing
	189	daterange: A DateRange object, download only if the upload_date is in the range.
	190	skip_download: Skip the actual download of the video file
	191	cachedir: Location of the cache files in the filesystem.
	192	False to disable filesystem cache.
	193	noplaylist: Download single video instead of a playlist if in doubt.
	194	age_limit: An integer representing the user's age in years.
	195	Unsuitable videos for the given age are skipped.
	196	min_views: An integer representing the minimum view count the video
	197	must have in order to not be skipped.
	198	Videos without view count information are always
	199	downloaded. None for no limit.
	200	max_views: An integer representing the maximum view count.
	201	Videos that are more popular than that are not
	202	downloaded.
	203	Videos without view count information are always
	204	downloaded. None for no limit.
	205	download_archive: File name of a file where all downloads are recorded.
	206	Videos already present in the file are not downloaded
	207	again.
	208	cookiefile: File name where cookies should be read from and dumped to.
	209	nocheckcertificate:Do not verify SSL certificates
	210	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	211	At the moment, this is only supported by YouTube.
	212	proxy: URL of the proxy server to use
	213	geo_verification_proxy: URL of the proxy to use for IP address verification
	214	on geo-restricted sites. (Experimental)
	215	socket_timeout: Time to wait for unresponsive hosts, in seconds
	216	bidi_workaround: Work around buggy terminals without bidirectional text
	217	support, using fridibi
	218	debug_printtraffic:Print out sent and received HTTP traffic
	219	include_ads: Download ads as well
	220	default_search: Prepend this string if an input url is not valid.
	221	'auto' for elaborate guessing
	222	encoding: Use this encoding instead of the system-specified.
	223	extract_flat: Do not resolve URLs, return the immediate result.
	224	Pass in 'in_playlist' to only show this behavior for
	225	playlist items.
	226	postprocessors: A list of dictionaries, each with an entry
	227	* key: The name of the postprocessor. See
	228	youtube_dl/postprocessor/__init__.py for a list.
	229	as well as any further keyword arguments for the
	230	postprocessor.
	231	progress_hooks: A list of functions that get called on download
	232	progress, with a dictionary with the entries
	233	* status: One of "downloading", "error", or "finished".
	234	Check this first and ignore unknown values.
	235
	236	If status is one of "downloading", or "finished", the
	237	following properties may also be present:
	238	* filename: The final filename (always present)
	239	* tmpfilename: The filename we're currently writing to
	240	* downloaded_bytes: Bytes on disk
	241	* total_bytes: Size of the whole file, None if unknown
	242	* total_bytes_estimate: Guess of the eventual file size,
	243	None if unavailable.
	244	* elapsed: The number of seconds since download started.
	245	* eta: The estimated time in seconds, None if unknown
	246	* speed: The download speed in bytes/second, None if
	247	unknown
	248	* fragment_index: The counter of the currently
	249	downloaded video fragment.
	250	* fragment_count: The number of fragments (= individual
	251	files that will be merged)
	252
	253	Progress hooks are guaranteed to be called at least once
	254	(with status "finished") if the download is successful.
	255	merge_output_format: Extension to use when merging formats.
	256	fixup: Automatically correct known faults of the file.
	257	One of:
	258	- "never": do nothing
	259	- "warn": only emit a warning
	260	- "detect_or_warn": check whether we can do anything
	261	about it, warn otherwise (default)
	262	source_address: (Experimental) Client-side IP address to bind to.
	263	call_home: Boolean, true iff we are allowed to contact the
	264	youtube-dl servers for debugging.
	265	sleep_interval: Number of seconds to sleep before each download when
	266	used alone or a lower bound of a range for randomized
	267	sleep before each download (minimum possible number
	268	of seconds to sleep) when used along with
	269	max_sleep_interval.
	270	max_sleep_interval:Upper bound of a range for randomized sleep before each
	271	download (maximum possible number of seconds to sleep).
	272	Must only be used along with sleep_interval.
	273	Actual sleep time will be a random float from range
	274	[sleep_interval; max_sleep_interval].
	275	listformats: Print an overview of available video formats and exit.
	276	list_thumbnails: Print a table of all thumbnails and exit.
	277	match_filter: A function that gets called with the info_dict of
	278	every video.
	279	If it returns a message, the video is ignored.
	280	If it returns None, the video is downloaded.
	281	match_filter_func in utils.py is one example for this.
	282	no_color: Do not emit color codes in output.
	283	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	284	HTTP header (experimental)
	285	geo_bypass_country:
	286	Two-letter ISO 3166-2 country code that will be used for
	287	explicit geographic restriction bypassing via faking
	288	X-Forwarded-For HTTP header (experimental)
	289
	290	The following options determine which downloader is picked:
	291	external_downloader: Executable of the external downloader to call.
	292	None or unset for standard (built-in) downloader.
	293	hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
	294	if True, otherwise use ffmpeg/avconv if False, otherwise
	295	use downloader suggested by extractor if None.
	296
	297	The following parameters are not used by YoutubeDL itself, they are used by
	298	the downloader (see youtube_dl/downloader/common.py):
	299	nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
	300	noresizebuffer, retries, continuedl, noprogress, consoletitle,
	301	xattr_set_filesize, external_downloader_args, hls_use_mpegts,
	302	http_chunk_size.
	303
	304	The following options are used by the post processors:
	305	prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
	306	otherwise prefer avconv.
	307	postprocessor_args: A list of additional command-line arguments for the
	308	postprocessor.
	309
	310	The following options are used by the Youtube extractor:
	311	youtube_include_dash_manifest: If True (default), DASH manifests and related
	312	data will be downloaded and processed by extractor.
	313	You can reduce network I/O by disabling it if you don't
	314	care about DASH.
	315	"""
	316
	317	_NUMERIC_FIELDS = set((
	318	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	319	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	320	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	321	'average_rating', 'comment_count', 'age_limit',
	322	'start_time', 'end_time',
	323	'chapter_number', 'season_number', 'episode_number',
	324	'track_number', 'disc_number', 'release_year',
	325	'playlist_index',
	326	))
	327
	328	params = None
	329	_ies = []
	330	_pps = []
	331	_download_retcode = None
	332	_num_downloads = None
	333	_screen_file = None
	334
	335	def __init__(self, params=None, auto_init=True):
	336	"""Create a FileDownloader object with the given options."""
	337	if params is None:
	338	params = {}
	339	self._ies = []
	340	self._ies_instances = {}
	341	self._pps = []
	342	self._progress_hooks = []
	343	self._download_retcode = 0
	344	self._num_downloads = 0
	345	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	346	self._err_file = sys.stderr
	347	self.params = {
	348	# Default parameters
	349	'nocheckcertificate': False,
	350	}
	351	self.params.update(params)
	352	self.cache = Cache(self)
	353
	354	def check_deprecated(param, option, suggestion):
	355	if self.params.get(param) is not None:
	356	self.report_warning(
	357	'%s is deprecated. Use %s instead.' % (option, suggestion))
	358	return True
	359	return False
	360
	361	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	362	if self.params.get('geo_verification_proxy') is None:
	363	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	364
	365	check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
	366	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	367	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	368
	369	if params.get('bidi_workaround', False):
	370	try:
	371	import pty
	372	master, slave = pty.openpty()
	373	width = compat_get_terminal_size().columns
	374	if width is None:
	375	width_args = []
	376	else:
	377	width_args = ['-w', str(width)]
	378	sp_kwargs = dict(
	379	stdin=subprocess.PIPE,
	380	stdout=slave,
	381	stderr=self._err_file)
	382	try:
	383	self._output_process = subprocess.Popen(
	384	['bidiv'] + width_args, **sp_kwargs
	385	)
	386	except OSError:
	387	self._output_process = subprocess.Popen(
	388	['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
	389	self._output_channel = os.fdopen(master, 'rb')
	390	except OSError as ose:
	391	if ose.errno == errno.ENOENT:
	392	self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
	393	else:
	394	raise
	395
	396	if (sys.platform != 'win32' and
	397	sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
	398	not params.get('restrictfilenames', False)):
	399	# Unicode filesystem API will throw errors (#1474, #13027)
	400	self.report_warning(
	401	'Assuming --restrict-filenames since file system encoding '
	402	'cannot encode all characters. '
	403	'Set the LC_ALL environment variable to fix this.')
	404	self.params['restrictfilenames'] = True
	405
	406	if isinstance(params.get('outtmpl'), bytes):
	407	self.report_warning(
	408	'Parameter outtmpl is bytes, but should be a unicode string. '
	409	'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
	410
	411	self._setup_opener()
	412
	413	if auto_init:
	414	self.print_debug_header()
	415	self.add_default_info_extractors()
	416
	417	for pp_def_raw in self.params.get('postprocessors', []):
	418	pp_class = get_postprocessor(pp_def_raw['key'])
	419	pp_def = dict(pp_def_raw)
	420	del pp_def['key']
	421	pp = pp_class(self, **compat_kwargs(pp_def))
	422	self.add_post_processor(pp)
	423
	424	for ph in self.params.get('progress_hooks', []):
	425	self.add_progress_hook(ph)
	426
	427	register_socks_protocols()
	428
	429	def warn_if_short_id(self, argv):
	430	# short YouTube ID starting with dash?
	431	idxs = [
	432	i for i, a in enumerate(argv)
	433	if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
	434	if idxs:
	435	correct_argv = (
	436	['youtube-dl'] +
	437	[a for i, a in enumerate(argv) if i not in idxs] +
	438	['--'] + [argv[i] for i in idxs]
	439	)
	440	self.report_warning(
	441	'Long argument string detected. '
	442	'Use -- to separate parameters and URLs, like this:\n%s\n' %
	443	args_to_str(correct_argv))
	444
	445	def add_info_extractor(self, ie):
	446	"""Add an InfoExtractor object to the end of the list."""
	447	self._ies.append(ie)
	448	if not isinstance(ie, type):
	449	self._ies_instances[ie.ie_key()] = ie
	450	ie.set_downloader(self)
	451
	452	def get_info_extractor(self, ie_key):
	453	"""
	454	Get an instance of an IE with name ie_key, it will try to get one from
	455	the _ies list, if there's no instance it will create a new one and add
	456	it to the extractor list.
	457	"""
	458	ie = self._ies_instances.get(ie_key)
	459	if ie is None:
	460	ie = get_info_extractor(ie_key)()
	461	self.add_info_extractor(ie)
	462	return ie
	463
	464	def add_default_info_extractors(self):
	465	"""
	466	Add the InfoExtractors returned by gen_extractors to the end of the list
	467	"""
	468	for ie in gen_extractor_classes():
	469	self.add_info_extractor(ie)
	470
	471	def add_post_processor(self, pp):
	472	"""Add a PostProcessor object to the end of the chain."""
	473	self._pps.append(pp)
	474	pp.set_downloader(self)
	475
	476	def add_progress_hook(self, ph):
	477	"""Add the progress hook (currently only for the file downloader)"""
	478	self._progress_hooks.append(ph)
	479
	480	def _bidi_workaround(self, message):
	481	if not hasattr(self, '_output_channel'):
	482	return message
	483
	484	assert hasattr(self, '_output_process')
	485	assert isinstance(message, compat_str)
	486	line_count = message.count('\n') + 1
	487	self._output_process.stdin.write((message + '\n').encode('utf-8'))
	488	self._output_process.stdin.flush()
	489	res = ''.join(self._output_channel.readline().decode('utf-8')
	490	for _ in range(line_count))
	491	return res[:-len('\n')]
	492
	493	def to_screen(self, message, skip_eol=False):
	494	"""Print message to stdout if not in quiet mode."""
	495	return self.to_stdout(message, skip_eol, check_quiet=True)
	496
	497	def _write_string(self, s, out=None):
	498	write_string(s, out=out, encoding=self.params.get('encoding'))
	499
	500	def to_stdout(self, message, skip_eol=False, check_quiet=False):

1

#!/usr/bin/env python

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import socket

import sys

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

31

from .compat import (

32

compat_basestring,

33

compat_cookiejar,

34

compat_get_terminal_size,

35

compat_http_client,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DownloadError,

encode_compat_str,

encodeFilename,

error_to_compat_str,

expand_path,

ExtractorError,

format_bytes,

formatSeconds,

GeoRestrictedError,

int_or_none,

ISO3166Utils,

locked_file,

make_HTTPS_handler,

MaxDownloadsReached,

orderedSet,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

register_socks_protocols,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

subtitles_filename,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieProcessor,

92

YoutubeDLHandler,

93

)

94

from .cache import Cache

95

from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER

96

from .extractor.openload import PhantomJSwrapper

97

from .downloader import get_suitable_downloader

98

from .downloader.rtmp import rtmpdump_version

99

from .postprocessor import (

100

FFmpegFixupM3u8PP,

101

FFmpegFixupM4aPP,

102

FFmpegFixupStretchedPP,

FFmpegMergerPP,

FFmpegPostProcessor,

get_postprocessor,

)

from .version import __version__

108

109

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

114

"""YoutubeDL class.

115

116

YoutubeDL objects are the ones responsible of downloading the

117

actual video file and writing it to disk if the user has requested

118

it, among some other tasks. In most cases there should be one per

119

program. As, given a video URL, the downloader doesn't know how to

120

extract all the needed information, task that InfoExtractors do, it

121

has to pass the URL to one of them.

122

123

For this, YoutubeDL objects have a method that allows

124

InfoExtractors to be registered in a given order. When it is passed

125

a URL, the YoutubeDL object handles it to the first InfoExtractor it

126

finds that reports being able to handle it. The InfoExtractor extracts

127

all the information about the video or videos the URL refers to, and

128

YoutubeDL process the extracted information, possibly using a File

129

Downloader to download the video.

130

131

YoutubeDL objects accept a lot of parameters. In order not to saturate

132

the object constructor with arguments, it receives a dictionary of

133

options instead. These options are available through the params

134

attribute for the InfoExtractors to use. The YoutubeDL also

135

registers itself as the downloader in charge for the InfoExtractors

136

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

141

password: Password for authentication purposes.

142

videopassword: Password for accessing a video.

143

ap_mso: Adobe Pass multiple-system operator identifier.

144

ap_username: Multiple-system operator account username.

145

ap_password: Multiple-system operator account password.

146

usenetrc: Use netrc for authentication instead.

147

verbose: Print additional info to stdout.

148

quiet: Do not print messages to stdout.

149

no_warnings: Do not print out anything for warnings.

150

forceurl: Force printing final URL.

151

forcetitle: Force printing title.

152

forceid: Force printing ID.

153

forcethumbnail: Force printing thumbnail URL.

154

forcedescription: Force printing description.

155

forcefilename: Force printing final filename.

156

forceduration: Force printing duration.

157

forcejson: Force printing info_dict as JSON.

158

dump_single_json: Force printing the info_dict of the whole playlist

159

(or video) as a single JSON line.

160

simulate: Do not download the video files.

161

format: Video format code. See options.py for more information.

162

outtmpl: Template for output names.

163

restrictfilenames: Do not allow "&" and spaces in file names

164

ignoreerrors: Do not stop on download errors.

165

force_generic_extractor: Force downloader to use the generic extractor

166

nooverwrites: Prevent overwriting files.

167

playliststart: Playlist item to start at.

168

playlistend: Playlist item to end at.

169

playlist_items: Specific indices of playlist to download.

170

playlistreverse: Download playlist items in reverse order.

171

playlistrandom: Download playlist items in random order.

172

matchtitle: Download only matching titles.

173

rejecttitle: Reject downloads for matching titles.

174

logger: Log messages to a logging.Logger instance.

175

logtostderr: Log messages to stderr instead of stdout.

176

writedescription: Write the video description to a .description file

177

writeinfojson: Write the video description to a .info.json file

178

writeannotations: Write the video annotations to a .annotations.xml file

179

writethumbnail: Write the thumbnail image to a file

180

write_all_thumbnails: Write all thumbnail formats to files

181

writesubtitles: Write the video subtitles to a file

182

writeautomaticsub: Write the automatically generated subtitles to a file

183

allsubtitles: Downloads all the subtitles of the video

184

(requires writesubtitles or writeautomaticsub)

185

listsubtitles: Lists all available subtitles for the video

186

subtitlesformat: The format code for subtitles

187

subtitleslangs: List of languages of the subtitles to download

188

keepvideo: Keep the video file after post-processing

189

daterange: A DateRange object, download only if the upload_date is in the range.

190

skip_download: Skip the actual download of the video file

191

cachedir: Location of the cache files in the filesystem.

192

False to disable filesystem cache.

193

noplaylist: Download single video instead of a playlist if in doubt.

194

age_limit: An integer representing the user's age in years.

195

Unsuitable videos for the given age are skipped.

196

min_views: An integer representing the minimum view count the video

197

must have in order to not be skipped.

198

Videos without view count information are always

199

downloaded. None for no limit.

200

max_views: An integer representing the maximum view count.

201

Videos that are more popular than that are not

202

downloaded.

203

Videos without view count information are always

204

downloaded. None for no limit.

205

download_archive: File name of a file where all downloads are recorded.

206

Videos already present in the file are not downloaded

207

again.

208

cookiefile: File name where cookies should be read from and dumped to.

209

nocheckcertificate:Do not verify SSL certificates

210

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

211

At the moment, this is only supported by YouTube.

212

proxy: URL of the proxy server to use

213

geo_verification_proxy: URL of the proxy to use for IP address verification

214

on geo-restricted sites. (Experimental)

215

socket_timeout: Time to wait for unresponsive hosts, in seconds

216

bidi_workaround: Work around buggy terminals without bidirectional text

217

support, using fridibi

218

debug_printtraffic:Print out sent and received HTTP traffic

219

include_ads: Download ads as well

220

default_search: Prepend this string if an input url is not valid.

221

'auto' for elaborate guessing

222

encoding: Use this encoding instead of the system-specified.

223

extract_flat: Do not resolve URLs, return the immediate result.

224

Pass in 'in_playlist' to only show this behavior for

225

playlist items.

226

postprocessors: A list of dictionaries, each with an entry

227

* key: The name of the postprocessor. See

228

youtube_dl/postprocessor/__init__.py for a list.

229

as well as any further keyword arguments for the

230

postprocessor.

231

progress_hooks: A list of functions that get called on download

232

progress, with a dictionary with the entries

233

* status: One of "downloading", "error", or "finished".

234

Check this first and ignore unknown values.

235

236

If status is one of "downloading", or "finished", the

237

following properties may also be present:

238

* filename: The final filename (always present)

239

* tmpfilename: The filename we're currently writing to

240

* downloaded_bytes: Bytes on disk

241

* total_bytes: Size of the whole file, None if unknown

242

* total_bytes_estimate: Guess of the eventual file size,

243

None if unavailable.

244

* elapsed: The number of seconds since download started.

245

* eta: The estimated time in seconds, None if unknown

246

* speed: The download speed in bytes/second, None if

247

unknown

248

* fragment_index: The counter of the currently

249

downloaded video fragment.

250

* fragment_count: The number of fragments (= individual

251

files that will be merged)

252

253

Progress hooks are guaranteed to be called at least once

254

(with status "finished") if the download is successful.

255

merge_output_format: Extension to use when merging formats.

256

fixup: Automatically correct known faults of the file.

257

One of:

258

- "never": do nothing

259

- "warn": only emit a warning

260

- "detect_or_warn": check whether we can do anything

261

about it, warn otherwise (default)

262

source_address: (Experimental) Client-side IP address to bind to.

263

call_home: Boolean, true iff we are allowed to contact the

264

youtube-dl servers for debugging.

265

sleep_interval: Number of seconds to sleep before each download when

266

used alone or a lower bound of a range for randomized

267

sleep before each download (minimum possible number

268

of seconds to sleep) when used along with

269

max_sleep_interval.

270

max_sleep_interval:Upper bound of a range for randomized sleep before each

271

download (maximum possible number of seconds to sleep).

272

Must only be used along with sleep_interval.

273

Actual sleep time will be a random float from range

274

[sleep_interval; max_sleep_interval].

275

listformats: Print an overview of available video formats and exit.

276

list_thumbnails: Print a table of all thumbnails and exit.

277

match_filter: A function that gets called with the info_dict of

278

every video.

279

If it returns a message, the video is ignored.

280

If it returns None, the video is downloaded.

281

match_filter_func in utils.py is one example for this.

282

no_color: Do not emit color codes in output.

283

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

284

HTTP header (experimental)

285

geo_bypass_country:

286

Two-letter ISO 3166-2 country code that will be used for

287

explicit geographic restriction bypassing via faking

288

X-Forwarded-For HTTP header (experimental)

289

290

The following options determine which downloader is picked:

291

external_downloader: Executable of the external downloader to call.

292

None or unset for standard (built-in) downloader.

293

hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv

294

if True, otherwise use ffmpeg/avconv if False, otherwise

295

use downloader suggested by extractor if None.

296

297

The following parameters are not used by YoutubeDL itself, they are used by

298

the downloader (see youtube_dl/downloader/common.py):

299

nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,

300

noresizebuffer, retries, continuedl, noprogress, consoletitle,

301

xattr_set_filesize, external_downloader_args, hls_use_mpegts,

302

http_chunk_size.

303

304

The following options are used by the post processors:

305

prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,

306

otherwise prefer avconv.

307

postprocessor_args: A list of additional command-line arguments for the

308

postprocessor.

309

310

The following options are used by the Youtube extractor:

311

youtube_include_dash_manifest: If True (default), DASH manifests and related

312

data will be downloaded and processed by extractor.

313

You can reduce network I/O by disabling it if you don't

care about DASH.

"""

_NUMERIC_FIELDS = set((

318

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

319

'timestamp', 'upload_year', 'upload_month', 'upload_day',

320

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

321

'average_rating', 'comment_count', 'age_limit',

322

'start_time', 'end_time',

323

'chapter_number', 'season_number', 'episode_number',

324

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = []

_download_retcode = None

332

_num_downloads = None

333

_screen_file = None

334

335

def __init__(self, params=None, auto_init=True):

336

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

341

self._pps = []

342

self._progress_hooks = []

343

self._download_retcode = 0

344

self._num_downloads = 0

345

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

346

self._err_file = sys.stderr

347

self.params = {

348

# Default parameters

349

'nocheckcertificate': False,

350

}

351

self.params.update(params)

352

self.cache = Cache(self)

353

354

def check_deprecated(param, option, suggestion):

355

if self.params.get(param) is not None:

356

self.report_warning(

357

'%s is deprecated. Use %s instead.' % (option, suggestion))

return True

return False

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

362

if self.params.get('geo_verification_proxy') is None:

363

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

364

365

check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')

366

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

367

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

368

369

if params.get('bidi_workaround', False):

370

try:

371

import pty

372

master, slave = pty.openpty()

373

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

378

sp_kwargs = dict(

379

stdin=subprocess.PIPE,

380

stdout=slave,

381

stderr=self._err_file)

382

try:

383

self._output_process = subprocess.Popen(

384

['bidiv'] + width_args, **sp_kwargs

385

)

386

except OSError:

387

self._output_process = subprocess.Popen(

388

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

389

self._output_channel = os.fdopen(master, 'rb')

390

except OSError as ose:

391

if ose.errno == errno.ENOENT:

392

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32' and

397

sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and

398

not params.get('restrictfilenames', False)):

399

# Unicode filesystem API will throw errors (#1474, #13027)

400

self.report_warning(

401

'Assuming --restrict-filenames since file system encoding '

402

'cannot encode all characters. '

403

'Set the LC_ALL environment variable to fix this.')

404

self.params['restrictfilenames'] = True

405

406

if isinstance(params.get('outtmpl'), bytes):

407

self.report_warning(

408

'Parameter outtmpl is bytes, but should be a unicode string. '

409

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

self._setup_opener()

if auto_init:

self.print_debug_header()

415

self.add_default_info_extractors()

416

417

for pp_def_raw in self.params.get('postprocessors', []):

418

pp_class = get_postprocessor(pp_def_raw['key'])

419

pp_def = dict(pp_def_raw)

420

del pp_def['key']

421

pp = pp_class(self, **compat_kwargs(pp_def))

422

self.add_post_processor(pp)

423

424

for ph in self.params.get('progress_hooks', []):

425

self.add_progress_hook(ph)

426

427

register_socks_protocols()

428

429

def warn_if_short_id(self, argv):

430

# short YouTube ID starting with dash?

431

idxs = [

432

i for i, a in enumerate(argv)

433

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['youtube-dl'] +

[a for i, a in enumerate(argv) if i not in idxs] +

438

['--'] + [argv[i] for i in idxs]

439

)

440

self.report_warning(

441

'Long argument string detected. '

442

'Use -- to separate parameters and URLs, like this:\n%s\n' %

443

args_to_str(correct_argv))

444

445

def add_info_extractor(self, ie):

446

"""Add an InfoExtractor object to the end of the list."""

447

self._ies.append(ie)

448

if not isinstance(ie, type):

449

self._ies_instances[ie.ie_key()] = ie

450

ie.set_downloader(self)

451

452

def get_info_extractor(self, ie_key):

453

"""

454

Get an instance of an IE with name ie_key, it will try to get one from

455

the _ies list, if there's no instance it will create a new one and add

456

it to the extractor list.

457

"""

458

ie = self._ies_instances.get(ie_key)

459

if ie is None:

460

ie = get_info_extractor(ie_key)()

461

self.add_info_extractor(ie)

462

return ie

463

464

def add_default_info_extractors(self):

465

"""

466

Add the InfoExtractors returned by gen_extractors to the end of the list

467

"""

468

for ie in gen_extractor_classes():

469

self.add_info_extractor(ie)

470

471

def add_post_processor(self, pp):

472

"""Add a PostProcessor object to the end of the chain."""

473

self._pps.append(pp)

474

pp.set_downloader(self)

475

476

def add_progress_hook(self, ph):

477

"""Add the progress hook (currently only for the file downloader)"""

478

self._progress_hooks.append(ph)

479

480

def _bidi_workaround(self, message):

481

if not hasattr(self, '_output_channel'):

482

return message

483

484

assert hasattr(self, '_output_process')

485

assert isinstance(message, compat_str)

486

line_count = message.count('\n') + 1

487

self._output_process.stdin.write((message + '\n').encode('utf-8'))

488

self._output_process.stdin.flush()

489

res = ''.join(self._output_channel.readline().decode('utf-8')

490

for _ in range(line_count))

491

return res[:-len('\n')]

492

493

def to_screen(self, message, skip_eol=False):

494

"""Print message to stdout if not in quiet mode."""

495

return self.to_stdout(message, skip_eol, check_quiet=True)

496

497

def _write_string(self, s, out=None):

498

write_string(s, out=out, encoding=self.params.get('encoding'))

499

500

def to_stdout(self, message, skip_eol=False, check_quiet=False):

501

"""Print message to stdout if not in quiet mode."""

502

if self.params.get('logger'):

503

self.params['logger'].debug(message)

504

elif not check_quiet or not self.params.get('quiet', False):

505

message = self._bidi_workaround(message)

506

terminator = ['\n', ''][skip_eol]

507

output = message + terminator

508

509

self._write_string(output, self._screen_file)

510

511

def to_stderr(self, message):

512

"""Print message to stderr."""

513

assert isinstance(message, compat_str)

514

if self.params.get('logger'):

515

self.params['logger'].error(message)

516

else:

517

message = self._bidi_workaround(message)

518

output = message + '\n'

519

self._write_string(output, self._err_file)

520

521

def to_console_title(self, message):

522

if not self.params.get('consoletitle', False):

523

return

524

if compat_os_name == 'nt':

525

if ctypes.windll.kernel32.GetConsoleWindow():

526

# c_wchar_p() might not be necessary if `message` is

527

# already of type unicode()

528

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

529

elif 'TERM' in os.environ:

530

self._write_string('\033]0;%s\007' % message, self._screen_file)

531

532

def save_console_title(self):

533

if not self.params.get('consoletitle', False):

534

return

535

if self.params.get('simulate', False):

536

return

537

if compat_os_name != 'nt' and 'TERM' in os.environ:

538

# Save the title on stack

539

self._write_string('\033[22;0t', self._screen_file)

540

541

def restore_console_title(self):

542

if not self.params.get('consoletitle', False):

543

return

544

if self.params.get('simulate', False):

545

return

546

if compat_os_name != 'nt' and 'TERM' in os.environ:

547

# Restore the title from stack

548

self._write_string('\033[23;0t', self._screen_file)

549

550

def __enter__(self):

551

self.save_console_title()

552

return self

553

554

def __exit__(self, *args):

555

self.restore_console_title()

556

557

if self.params.get('cookiefile') is not None:

558

self.cookiejar.save()

559

560

def trouble(self, message=None, tb=None):

561

"""Determine action to take when a download problem appears.

562

563

Depending on if the downloader has been configured to ignore

564

download errors or not, this method may throw an exception or

565

not when errors are found, after printing the message.

566

567

tb, if given, is additional traceback information.

568

"""

569

if message is not None:

570

self.to_stderr(message)

571

if self.params.get('verbose'):

572

if tb is None:

573

if sys.exc_info()[0]: # if .trouble has been called from an except block

574

tb = ''

575

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

576

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

577

tb += encode_compat_str(traceback.format_exc())

578

else:

579

tb_data = traceback.format_list(traceback.extract_stack())

580

tb = ''.join(tb_data)

581

self.to_stderr(tb)

582

if not self.params.get('ignoreerrors', False):

583

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

584

exc_info = sys.exc_info()[1].exc_info

585

else:

586

exc_info = sys.exc_info()

587

raise DownloadError(message, exc_info)

588

self._download_retcode = 1

589

590

def report_warning(self, message):

591

'''

592

Print the message to stderr, it will be prefixed with 'WARNING:'

593

If stderr is a tty file the 'WARNING:' will be colored

594

'''

595

if self.params.get('logger') is not None:

596

self.params['logger'].warning(message)

597

else:

598

if self.params.get('no_warnings'):

599

return

600

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

601

_msg_header = '\033[0;33mWARNING:\033[0m'

602

else:

603

_msg_header = 'WARNING:'

604

warning_message = '%s %s' % (_msg_header, message)

605

self.to_stderr(warning_message)

606

607

def report_error(self, message, tb=None):

608

'''

609

Do the same as trouble, but prefixes the message with 'ERROR:', colored

610

in red if stderr is a tty file.

611

'''

612

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

613

_msg_header = '\033[0;31mERROR:\033[0m'

614

else:

615

_msg_header = 'ERROR:'

616

error_message = '%s %s' % (_msg_header, message)

617

self.trouble(error_message, tb)

618

619

def report_file_already_downloaded(self, file_name):

620

"""Report file has already been fully downloaded."""

621

try:

622

self.to_screen('[download] %s has already been downloaded' % file_name)

623

except UnicodeEncodeError:

624

self.to_screen('[download] The file has already been downloaded')

625

626

def prepare_filename(self, info_dict):

627

"""Generate the output filename."""

628

try:

629

template_dict = dict(info_dict)

630

631

template_dict['epoch'] = int(time.time())

632

autonumber_size = self.params.get('autonumber_size')

633

if autonumber_size is None:

634

autonumber_size = 5

635

template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

636

if template_dict.get('resolution') is None:

637

if template_dict.get('width') and template_dict.get('height'):

638

template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])

639

elif template_dict.get('height'):

640

template_dict['resolution'] = '%sp' % template_dict['height']

641

elif template_dict.get('width'):

642

template_dict['resolution'] = '%dx?' % template_dict['width']

643

644

sanitize = lambda k, v: sanitize_filename(

645

compat_str(v),

646

restricted=self.params.get('restrictfilenames'),

647

is_id=(k == 'id' or k.endswith('_id')))

648

template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))

649

for k, v in template_dict.items()

650

if v is not None and not isinstance(v, (list, tuple, dict)))

651

template_dict = collections.defaultdict(lambda: 'NA', template_dict)

652

653

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

654

655

# For fields playlist_index and autonumber convert all occurrences

656

# of %(field)s to %(field)0Nd for backward compatibility

657

field_size_compat_map = {

658

'playlist_index': len(str(template_dict['n_entries'])),

659

'autonumber': autonumber_size,

660

}

661

FIELD_SIZE_COMPAT_RE = r'(?<!%)%$(?P<field>autonumber|playlist_index)$s'

662

mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)

663

if mobj:

664

outtmpl = re.sub(

665

FIELD_SIZE_COMPAT_RE,

666

r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],

667

outtmpl)

668

669

# Missing numeric fields used together with integer presentation types

670

# in format specification will break the argument substitution since

671

# string 'NA' is returned for missing fields. We will patch output

672

# template for missing fields to meet string presentation type.

673

for numeric_field in self._NUMERIC_FIELDS:

674

if numeric_field not in template_dict:

675

# As of [1] format syntax is:

676

# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type

677

# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting

FORMAT_RE = r'''(?x)

(?<!%)

%

${0}$ # mapping key

(?:[#0\-+ ]+)? # conversion flags (optional)

683

(?:\d+)? # minimum field width (optional)

684

(?:\.\d+)? # precision (optional)

685

[hlL]? # length modifier (optional)

686

[diouxXeEfFgGcrs%] # conversion type

687

'''

688

outtmpl = re.sub(

689

FORMAT_RE.format(numeric_field),

690

r'%({0})s'.format(numeric_field), outtmpl)

691

692

# expand_path translates '%%' into '%' and '$$' into '$'

693

# correspondingly that is not what we want since we need to keep

694

# '%%' intact for template dict substitution step. Working around

695

# with boundary-alike separator hack.

696

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

697

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

698

699

# outtmpl should be expand_path'ed before template dict substitution

700

# because meta fields may contain env variables we don't want to

701

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

702

# title "Hello $PATH", we don't want `$PATH` to be expanded.

703

filename = expand_path(outtmpl).replace(sep, '') % template_dict

704

705

# Temporary fix for #4787

706

# 'Treat' all problem characters by passing filename through preferredencoding

707

# to workaround encoding issues with subprocess on python2 @ Windows

708

if sys.version_info < (3, 0) and sys.platform == 'win32':

709

filename = encodeFilename(filename, True).decode(preferredencoding())

710

return sanitize_path(filename)

711

except ValueError as err:

712

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

713

return None

714

715

def _match_entry(self, info_dict, incomplete):

716

""" Returns None iff the file should be downloaded """

717

718

video_title = info_dict.get('title', info_dict.get('id', 'video'))

719

if 'title' in info_dict:

720

# This can happen when we're just evaluating the playlist

721

title = info_dict['title']

722

matchtitle = self.params.get('matchtitle', False)

723

if matchtitle:

724

if not re.search(matchtitle, title, re.IGNORECASE):

725

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

726

rejecttitle = self.params.get('rejecttitle', False)

727

if rejecttitle:

728

if re.search(rejecttitle, title, re.IGNORECASE):

729

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

730

date = info_dict.get('upload_date')

731

if date is not None:

732

dateRange = self.params.get('daterange', DateRange())

733

if date not in dateRange:

734

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

735

view_count = info_dict.get('view_count')

736

if view_count is not None:

737

min_views = self.params.get('min_views')

738

if min_views is not None and view_count < min_views:

739

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

740

max_views = self.params.get('max_views')

741

if max_views is not None and view_count > max_views:

742

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

743

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

744

return 'Skipping "%s" because it is age restricted' % video_title

745

if self.in_download_archive(info_dict):

746

return '%s has already been recorded in archive' % video_title

747

748

if not incomplete:

749

match_filter = self.params.get('match_filter')

750

if match_filter is not None:

751

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

@staticmethod

def add_extra_info(info_dict, extra_info):

759

'''Set the keys from extra_info in info dict if they are missing'''

760

for key, value in extra_info.items():

761

info_dict.setdefault(key, value)

762

763

def extract_info(self, url, download=True, ie_key=None, extra_info={},

764

process=True, force_generic_extractor=False):

765

'''

766

Returns a list with a dictionary for each video we find.

767

If 'download', also downloads the videos.

768

extra_info is a dict containing the extra values to add to each result

769

'''

770

771

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

781

continue

782

783

ie = self.get_info_extractor(ie.ie_key())

784

if not ie.working():

785

self.report_warning('The program functionality for this site has been marked as broken, '

786

'and will probably not work.')

787

788

try:

789

ie_result = ie.extract(url)

790

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

791

break

792

if isinstance(ie_result, list):

793

# Backwards compatibility: old IE result format

794

ie_result = {

795

'_type': 'compat_list',

796

'entries': ie_result,

797

}

798

self.add_default_extra_info(ie_result, ie, url)

799

if process:

800

return self.process_ie_result(ie_result, download, extra_info)

801

else:

802

return ie_result

803

except GeoRestrictedError as e:

804

msg = e.msg

805

if e.countries:

806

msg += '\nThis video is available in %s.' % ', '.join(

807

map(ISO3166Utils.short2full, e.countries))

808

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

809

self.report_error(msg)

810

break

811

except ExtractorError as e: # An error we somewhat expected

812

self.report_error(compat_str(e), e.format_traceback())

813

break

814

except MaxDownloadsReached:

815

raise

816

except Exception as e:

817

if self.params.get('ignoreerrors', False):

818

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

break

else:

raise

else:

self.report_error('no suitable InfoExtractor for URL %s' % url)

824

825

def add_default_extra_info(self, ie_result, ie, url):

826

self.add_extra_info(ie_result, {

827

'extractor': ie.IE_NAME,

828

'webpage_url': url,

829

'webpage_url_basename': url_basename(url),

830

'extractor_key': ie.ie_key(),

831

})

832

833

def process_ie_result(self, ie_result, download=True, extra_info={}):

834

"""

835

Take the result of the ie(may be modified) and resolve all unresolved

836

references (URLs, playlist items).

837

838

It will also download the videos if 'download'.

839

Returns the resolved ie_result.

840

"""

841

result_type = ie_result.get('_type', 'video')

842

843

if result_type in ('url', 'url_transparent'):

844

ie_result['url'] = sanitize_url(ie_result['url'])

845

extract_flat = self.params.get('extract_flat', False)

846

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or

847

extract_flat is True):

848

if self.params.get('forcejson', False):

849

self.to_stdout(json.dumps(ie_result))

850

return ie_result

851

852

if result_type == 'video':

853

self.add_extra_info(ie_result, extra_info)

854

return self.process_video_result(ie_result, download=download)

855

elif result_type == 'url':

856

# We have to add extra_info to the results because it may be

857

# contained in a playlist

858

return self.extract_info(ie_result['url'],

859

download,

860

ie_key=ie_result.get('ie_key'),

861

extra_info=extra_info)

862

elif result_type == 'url_transparent':

863

# Use the information from the embedding page

864

info = self.extract_info(

865

ie_result['url'], ie_key=ie_result.get('ie_key'),

866

extra_info=extra_info, download=False, process=False)

867

868

# extract_info may return None when ignoreerrors is enabled and

869

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

875

(k, v) for k, v in ie_result.items() if v is not None)

876

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

877

if f in force_properties:

878

del force_properties[f]

879

new_result = info.copy()

880

new_result.update(force_properties)

881

882

# Extracted info may not be a video result (i.e.

883

# info.get('_type', 'video') != video) but rather an url or

884

# url_transparent. In such cases outer metadata (from ie_result)

885

# should be propagated to inner one (info). For this to happen

886

# _type of info should be overridden with url_transparent. This

887

# fixes issue from https://github.com/rg3/youtube-dl/pull/11163.

888

if new_result.get('_type') == 'url':

889

new_result['_type'] = 'url_transparent'

890

891

return self.process_ie_result(

892

new_result, download=download, extra_info=extra_info)

893

elif result_type in ('playlist', 'multi_video'):

894

# We process each entry in the playlist

895

playlist = ie_result.get('title') or ie_result.get('id')

896

self.to_screen('[download] Downloading playlist: %s' % playlist)

897

898

playlist_results = []

899

900

playliststart = self.params.get('playliststart', 1) - 1

901

playlistend = self.params.get('playlistend')

902

# For backwards compatibility, interpret -1 as whole list

903

if playlistend == -1:

904

playlistend = None

905

906

playlistitems_str = self.params.get('playlist_items')

907

playlistitems = None

908

if playlistitems_str is not None:

909

def iter_playlistitems(format):

910

for string_segment in format.split(','):

911

if '-' in string_segment:

912

start, end = string_segment.split('-')

913

for item in range(int(start), int(end) + 1):

914

yield int(item)

915

else:

916

yield int(string_segment)

917

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

918

919

ie_entries = ie_result['entries']

920

921

def make_playlistitems_entries(list_ie_entries):

922

num_entries = len(list_ie_entries)

923

return [

924

list_ie_entries[i - 1] for i in playlistitems

925

if -num_entries <= i - 1 < num_entries]

926

927

def report_download(num_entries):

928

self.to_screen(

929

'[%s] playlist %s: Downloading %d videos' %

930

(ie_result['extractor'], playlist, num_entries))

931

932

if isinstance(ie_entries, list):

933

n_all_entries = len(ie_entries)

934

if playlistitems:

935

entries = make_playlistitems_entries(ie_entries)

936

else:

937

entries = ie_entries[playliststart:playlistend]

938

n_entries = len(entries)

939

self.to_screen(

940

'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %

941

(ie_result['extractor'], playlist, n_all_entries, n_entries))

942

elif isinstance(ie_entries, PagedList):

943

if playlistitems:

944

entries = []

945

for item in playlistitems:

946

entries.extend(ie_entries.getslice(

item - 1, item

))

else:

entries = ie_entries.getslice(

951

playliststart, playlistend)

952

n_entries = len(entries)

953

report_download(n_entries)

954

else: # iterable

955

if playlistitems:

956

entries = make_playlistitems_entries(list(itertools.islice(

957

ie_entries, 0, max(playlistitems))))

958

else:

959

entries = list(itertools.islice(

960

ie_entries, playliststart, playlistend))

961

n_entries = len(entries)

962

report_download(n_entries)

963

964

if self.params.get('playlistreverse', False):

965

entries = entries[::-1]

966

967

if self.params.get('playlistrandom', False):

968

random.shuffle(entries)

969

970

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

971

972

for i, entry in enumerate(entries, 1):

973

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

974

# This __x_forwarded_for_ip thing is a bit ugly but requires

975

# minimal changes

976

if x_forwarded_for:

977

entry['__x_forwarded_for_ip'] = x_forwarded_for

978

extra = {

979

'n_entries': n_entries,

980

'playlist': playlist,

981

'playlist_id': ie_result.get('id'),

982

'playlist_title': ie_result.get('title'),

983

'playlist_uploader': ie_result.get('uploader'),

984

'playlist_uploader_id': ie_result.get('uploader_id'),

985

'playlist_index': i + playliststart,

986

'extractor': ie_result['extractor'],

987

'webpage_url': ie_result['webpage_url'],

988

'webpage_url_basename': url_basename(ie_result['webpage_url']),

989

'extractor_key': ie_result['extractor_key'],

990

}

991

992

reason = self._match_entry(entry, incomplete=True)

993

if reason is not None:

994

self.to_screen('[download] ' + reason)

995

continue

996

997

entry_result = self.process_ie_result(entry,

998

download=download,

999

extra_info=extra)

1000

playlist_results.append(entry_result)

1001

ie_result['entries'] = playlist_results

1002

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1003

return ie_result

1004

elif result_type == 'compat_list':

1005

self.report_warning(

1006

'Extractor %s returned a compat_list result. '

1007

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1014

'webpage_url': ie_result['webpage_url'],

1015

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1016

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1021

self.process_ie_result(_fixup(r), download, extra_info)

1022

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1027

1028

def _build_format_filter(self, filter_spec):

1029

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1040

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)

1041

\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1042

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)

1043

$

1044

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1045

m = operator_rex.search(filter_spec)

1046

if m:

1047

try:

1048

comparison_value = int(m.group('value'))

1049

except ValueError:

1050

comparison_value = parse_filesize(m.group('value'))

1051

if comparison_value is None:

1052

comparison_value = parse_filesize(m.group('value') + 'B')

1053

if comparison_value is None:

1054

raise ValueError(

1055

'Invalid value %r in format specification %r' % (

1056

m.group('value'), filter_spec))

1057

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'!=': operator.ne,

'^=': lambda attr, value: attr.startswith(value),

1064

'$=': lambda attr, value: attr.endswith(value),

1065

'*=': lambda attr, value: value in attr,

1066

}

1067

str_operator_rex = re.compile(r'''(?x)

1068

1069

\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?

1070

\s*(?P<value>[a-zA-Z0-9._-]+)

1071

\s*$

1072

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1073

m = str_operator_rex.search(filter_spec)

1074

if m:

1075

comparison_value = m.group('value')

1076

op = STR_OPERATORS[m.group('op')]

1077

1078

if not m:

1079

raise ValueError('Invalid filter specification %r' % filter_spec)

1080

1081

def _filter(f):

1082

actual_value = f.get(m.group('key'))

1083

if actual_value is None:

1084

return m.group('none_inclusive')

1085

return op(actual_value, comparison_value)

1086

return _filter

1087

1088

def _default_format_spec(self, info_dict, download=True):

1089

1090

def can_merge():

1091

merger = FFmpegMergerPP(self)

1092

return merger.available and merger.can_merge()

1093

1094

def prefer_best():

1095

if self.params.get('simulate', False):

return False

if not download:

return False

if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':

1100

return True

1101

if info_dict.get('is_live'):

return True

if not can_merge():

return True

return False

req_format_list = ['bestvideo+bestaudio', 'best']

1108

if prefer_best():

1109

req_format_list.reverse()

1110

return '/'.join(req_format_list)

1111

1112

def build_format_selector(self, format_spec):

1113

def syntax_error(note, start):

1114

message = (

1115

'Invalid format specification: '

1116

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1117

return SyntaxError(message)

1118

1119

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1124

1125

def _parse_filter(tokens):

1126

filter_parts = []

1127

for type, string, start, _, _ in tokens:

1128

if type == tokenize.OP and string == ']':

1129

return ''.join(filter_parts)

1130

else:

1131

filter_parts.append(string)

1132

1133

def _remove_unused_ops(tokens):

1134

# Remove operators that we don't use and join them with the surrounding strings

1135

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1136

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1137

last_string, last_start, last_end, last_line = None, None, None, None

1138

for type, string, start, end, line in tokens:

1139

if type == tokenize.OP and string == '[':

1140

if last_string:

1141

yield tokenize.NAME, last_string, last_start, last_end, last_line

1142

last_string = None

1143

yield type, string, start, end, line

1144

# everything inside brackets will be handled by _parse_filter

1145

for type, string, start, end, line in tokens:

1146

yield type, string, start, end, line

1147

if type == tokenize.OP and string == ']':

1148

break

1149

elif type == tokenize.OP and string in ALLOWED_OPS:

1150

if last_string:

1151

yield tokenize.NAME, last_string, last_start, last_end, last_line

1152

last_string = None

1153

yield type, string, start, end, line

1154

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1161

if last_string:

1162

yield tokenize.NAME, last_string, last_start, last_end, last_line

1163

1164

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1165

selectors = []

1166

current_selector = None

1167

for type, string, start, _, _ in tokens:

1168

# ENCODING is only defined in python 3.x

1169

if type == getattr(tokenize, 'ENCODING', None):

1170

continue

1171

elif type in [tokenize.NAME, tokenize.NUMBER]:

1172

current_selector = FormatSelector(SINGLE, string, [])

1173

elif type == tokenize.OP:

1174

if string == ')':

1175

if not inside_group:

1176

# ')' will be handled by the parentheses group

1177

tokens.restore_last_token()

1178

break

1179

elif inside_merge and string in ['/', ',']:

1180

tokens.restore_last_token()

1181

break

1182

elif inside_choice and string == ',':

1183

tokens.restore_last_token()

1184

break

1185

elif string == ',':

1186

if not current_selector:

1187

raise syntax_error('"," must follow a format selector', start)

1188

selectors.append(current_selector)

1189

current_selector = None

1190

elif string == '/':

1191

if not current_selector:

1192

raise syntax_error('"/" must follow a format selector', start)

1193

first_choice = current_selector

1194

second_choice = _parse_format_selection(tokens, inside_choice=True)

1195

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1196

elif string == '[':

1197

if not current_selector:

1198

current_selector = FormatSelector(SINGLE, 'best', [])

1199

format_filter = _parse_filter(tokens)

1200

current_selector.filters.append(format_filter)

1201

elif string == '(':

1202

if current_selector:

1203

raise syntax_error('Unexpected "("', start)

1204

group = _parse_format_selection(tokens, inside_group=True)

1205

current_selector = FormatSelector(GROUP, group, [])

1206

elif string == '+':

1207

video_selector = current_selector

1208

audio_selector = _parse_format_selection(tokens, inside_merge=True)

1209

if not video_selector or not audio_selector:

1210

raise syntax_error('"+" must be between two format selectors', start)

1211

current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])

1212

else:

1213

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1214

elif type == tokenize.ENDMARKER:

1215

break

1216

if current_selector:

1217

selectors.append(current_selector)

1218

return selectors

1219

1220

def _build_selector_function(selector):

1221

if isinstance(selector, list):

1222

fs = [_build_selector_function(s) for s in selector]

1223

1224

def selector_function(ctx):

1225

for f in fs:

1226

for format in f(ctx):

1227

yield format

1228

return selector_function

1229

elif selector.type == GROUP:

1230

selector_function = _build_selector_function(selector.selector)

1231

elif selector.type == PICKFIRST:

1232

fs = [_build_selector_function(s) for s in selector.selector]

1233

1234

def selector_function(ctx):

1235

for f in fs:

1236

picked_formats = list(f(ctx))

1237

if picked_formats:

1238

return picked_formats

1239

return []

1240

elif selector.type == SINGLE:

1241

format_spec = selector.selector

1242

1243

def selector_function(ctx):

1244

formats = list(ctx['formats'])

1245

if not formats:

1246

return

1247

if format_spec == 'all':

1248

for f in formats:

1249

yield f

1250

elif format_spec in ['best', 'worst', None]:

1251

format_idx = 0 if format_spec == 'worst' else -1

1252

audiovideo_formats = [

1253

f for f in formats

1254

if f.get('vcodec') != 'none' and f.get('acodec') != 'none']

1255

if audiovideo_formats:

1256

yield audiovideo_formats[format_idx]

1257

# for extractors with incomplete formats (audio only (soundcloud)

1258

# or video only (imgur)) we will fallback to best/worst

1259

# {video,audio}-only format

1260

elif ctx['incomplete_formats']:

1261

yield formats[format_idx]

1262

elif format_spec == 'bestaudio':

1263

audio_formats = [

1264

f for f in formats

1265

if f.get('vcodec') == 'none']

1266

if audio_formats:

1267

yield audio_formats[-1]

1268

elif format_spec == 'worstaudio':

1269

audio_formats = [

1270

f for f in formats

1271

if f.get('vcodec') == 'none']

1272

if audio_formats:

1273

yield audio_formats[0]

1274

elif format_spec == 'bestvideo':

1275

video_formats = [

1276

f for f in formats

1277

if f.get('acodec') == 'none']

1278

if video_formats:

1279

yield video_formats[-1]

1280

elif format_spec == 'worstvideo':

1281

video_formats = [

1282

f for f in formats

1283

if f.get('acodec') == 'none']

1284

if video_formats:

1285

yield video_formats[0]

1286

else:

1287

extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']

1288

if format_spec in extensions:

1289

filter_f = lambda f: f['ext'] == format_spec

1290

else:

1291

filter_f = lambda f: f['format_id'] == format_spec

1292

matches = list(filter(filter_f, formats))

1293

if matches:

1294

yield matches[-1]

1295

elif selector.type == MERGE:

1296

def _merge(formats_info):

1297

format_1, format_2 = [f['format_id'] for f in formats_info]

1298

# The first format must contain the video and the

1299

# second the audio

1300

if formats_info[0].get('vcodec') == 'none':

1301

self.report_error('The first format must '

1302

'contain the video, try using '

1303

'"-f %s+%s"' % (format_2, format_1))

1304

return

1305

# Formats must be opposite (video+audio)

1306

if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':

1307

self.report_error(

1308

'Both formats %s and %s are video-only, you must specify "-f video+audio"'

1309

% (format_1, format_2))

1310

return

1311

output_ext = (

1312

formats_info[0]['ext']

1313

if self.params.get('merge_output_format') is None

1314

else self.params['merge_output_format'])

1315

return {

1316

'requested_formats': formats_info,

1317

'format': '%s+%s' % (formats_info[0].get('format'),

1318

formats_info[1].get('format')),

1319

'format_id': '%s+%s' % (formats_info[0].get('format_id'),

1320

formats_info[1].get('format_id')),

1321

'width': formats_info[0].get('width'),

1322

'height': formats_info[0].get('height'),

1323

'resolution': formats_info[0].get('resolution'),

1324

'fps': formats_info[0].get('fps'),

1325

'vcodec': formats_info[0].get('vcodec'),

1326

'vbr': formats_info[0].get('vbr'),

1327

'stretched_ratio': formats_info[0].get('stretched_ratio'),

1328

'acodec': formats_info[1].get('acodec'),

1329

'abr': formats_info[1].get('abr'),

1330

'ext': output_ext,

1331

}

1332

video_selector, audio_selector = map(_build_selector_function, selector.selector)

1333

1334

def selector_function(ctx):

1335

for pair in itertools.product(

1336

video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):

1337

yield _merge(pair)

1338

1339

filters = [self._build_format_filter(f) for f in selector.filters]

1340

1341

def final_selector(ctx):

1342

ctx_copy = copy.deepcopy(ctx)

1343

for _filter in filters:

1344

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1345

return selector_function(ctx_copy)

1346

return final_selector

1347

1348

stream = io.BytesIO(format_spec.encode('utf-8'))

1349

try:

1350

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1351

except tokenize.TokenError:

1352

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1353

1354

class TokenIterator(object):

1355

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1364

raise StopIteration()

1365

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1372

self.counter -= 1

1373

1374

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1375

return _build_selector_function(parsed_selector)

1376

1377

def _calc_headers(self, info_dict):

1378

res = std_headers.copy()

1379

1380

add_headers = info_dict.get('http_headers')

1381

if add_headers:

1382

res.update(add_headers)

1383

1384

cookies = self._calc_cookies(info_dict)

1385

if cookies:

1386

res['Cookie'] = cookies

1387

1388

if 'X-Forwarded-For' not in res:

1389

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1390

if x_forwarded_for_ip:

1391

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1396

pr = sanitized_Request(info_dict['url'])

1397

self.cookiejar.add_cookie_header(pr)

1398

return pr.get_header('Cookie')

1399

1400

def process_video_result(self, info_dict, download=True):

1401

assert info_dict.get('_type', 'video') == 'video'

1402

1403

if 'id' not in info_dict:

1404

raise ExtractorError('Missing "id" field in extractor result')

1405

if 'title' not in info_dict:

1406

raise ExtractorError('Missing "title" field in extractor result')

1407

1408

def report_force_conversion(field, field_not, conversion):

1409

self.report_warning(

1410

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1411

% (field, field_not, conversion))

1412

1413

def sanitize_string_field(info, string_field):

1414

field = info.get(string_field)

1415

if field is None or isinstance(field, compat_str):

1416

return

1417

report_force_conversion(string_field, 'a string', 'string')

1418

info[string_field] = compat_str(field)

1419

1420

def sanitize_numeric_fields(info):

1421

for numeric_field in self._NUMERIC_FIELDS:

1422

field = info.get(numeric_field)

1423

if field is None or isinstance(field, compat_numeric_types):

1424

continue

1425

report_force_conversion(numeric_field, 'numeric', 'int')

1426

info[numeric_field] = int_or_none(field)

1427

1428

sanitize_string_field(info_dict, 'id')

1429

sanitize_numeric_fields(info_dict)

1430

1431

if 'playlist' not in info_dict:

1432

# It isn't part of a playlist

1433

info_dict['playlist'] = None

1434

info_dict['playlist_index'] = None

1435

1436

thumbnails = info_dict.get('thumbnails')

1437

if thumbnails is None:

1438

thumbnail = info_dict.get('thumbnail')

1439

if thumbnail:

1440

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1441

if thumbnails:

1442

thumbnails.sort(key=lambda t: (

1443

t.get('preference') if t.get('preference') is not None else -1,

1444

t.get('width') if t.get('width') is not None else -1,

1445

t.get('height') if t.get('height') is not None else -1,

1446

t.get('id') if t.get('id') is not None else '', t.get('url')))

1447

for i, t in enumerate(thumbnails):

1448

t['url'] = sanitize_url(t['url'])

1449

if t.get('width') and t.get('height'):

1450

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1451

if t.get('id') is None:

1452

t['id'] = '%d' % i

1453

1454

if self.params.get('list_thumbnails'):

1455

self.list_thumbnails(info_dict)

1456

return

1457

1458

thumbnail = info_dict.get('thumbnail')

1459

if thumbnail:

1460

info_dict['thumbnail'] = sanitize_url(thumbnail)

1461

elif thumbnails:

1462

info_dict['thumbnail'] = thumbnails[-1]['url']

1463

1464

if 'display_id' not in info_dict and 'id' in info_dict:

1465

info_dict['display_id'] = info_dict['id']

1466

1467

if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:

1468

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

1469

# see http://bugs.python.org/issue1646728)

1470

try:

1471

upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])

1472

info_dict['upload_date'] = upload_date.strftime('%Y%m%d')

1473

except (ValueError, OverflowError, OSError):

1474

pass

1475

1476

# Auto generate title fields corresponding to the *_number fields when missing

1477

# in order to always have clean titles. This is very common for TV series.

1478

for field in ('chapter', 'season', 'episode'):

1479

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

1480

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

1481

1482

subtitles = info_dict.get('subtitles')

1483

if subtitles:

1484

for _, subtitle in subtitles.items():

1485

for subtitle_format in subtitle:

1486

if subtitle_format.get('url'):

1487

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

1488

if subtitle_format.get('ext') is None:

1489

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

1490

1491

if self.params.get('listsubtitles', False):

1492

if 'automatic_captions' in info_dict:

1493

self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')

1494

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

1495

return

1496

info_dict['requested_subtitles'] = self.process_subtitles(

1497

info_dict['id'], subtitles,

1498

info_dict.get('automatic_captions'))

1499

1500

# We now pick which formats have to be downloaded

1501

if info_dict.get('formats') is None:

1502

# There's only one format available

1503

formats = [info_dict]

1504

else:

1505

formats = info_dict['formats']

1506

1507

if not formats:

1508

raise ExtractorError('No video formats found!')

1509

1510

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

1515

'there is an error in extractor')

1516

return False

1517

if isinstance(url, bytes):

1518

sanitize_string_field(f, 'url')

1519

return True

1520

1521

# Filter out malformed formats for better extraction robustness

1522

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

1527

for i, format in enumerate(formats):

1528

sanitize_string_field(format, 'format_id')

1529

sanitize_numeric_fields(format)

1530

format['url'] = sanitize_url(format['url'])

1531

if not format.get('format_id'):

1532

format['format_id'] = compat_str(i)

1533

else:

1534

# Sanitize format_id from characters used in format selector expression

1535

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

1536

format_id = format['format_id']

1537

if format_id not in formats_dict:

1538

formats_dict[format_id] = []

1539

formats_dict[format_id].append(format)

1540

1541

# Make sure all formats have unique format_id

1542

for format_id, ambiguous_formats in formats_dict.items():

1543

if len(ambiguous_formats) > 1:

1544

for i, format in enumerate(ambiguous_formats):

1545

format['format_id'] = '%s-%d' % (format_id, i)

1546

1547

for i, format in enumerate(formats):

1548

if format.get('format') is None:

1549

format['format'] = '{id} - {res}{note}'.format(

1550

id=format['format_id'],

1551

res=self.format_resolution(format),

1552

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

1553

)

1554

# Automatically determine file extension if missing

1555

if format.get('ext') is None:

1556

format['ext'] = determine_ext(format['url']).lower()

1557

# Automatically determine protocol if missing (useful for format

1558

# selection purposes)

1559

if format.get('protocol') is None:

1560

format['protocol'] = determine_protocol(format)

1561

# Add HTTP headers, so that external programs can use them from the

1562

# json output

1563

full_format_info = info_dict.copy()

1564

full_format_info.update(format)

1565

format['http_headers'] = self._calc_headers(full_format_info)

1566

# Remove private housekeeping stuff

1567

if '__x_forwarded_for_ip' in info_dict:

1568

del info_dict['__x_forwarded_for_ip']

1569

1570

# TODO Central sorting goes here

1571

1572

if formats[0] is not info_dict:

1573

# only set the 'formats' fields if the original info_dict list them

1574

# otherwise we end up with a circular reference, the first (and unique)

1575

# element in the 'formats' field in info_dict is info_dict itself,

1576

# which can't be exported to json

1577

info_dict['formats'] = formats

1578

if self.params.get('listformats'):

1579

self.list_formats(info_dict)

1580

return

1581

1582

req_format = self.params.get('format')

1583

if req_format is None:

1584

req_format = self._default_format_spec(info_dict, download=download)

1585

if self.params.get('verbose'):

1586

self.to_stdout('[debug] Default format spec: %s' % req_format)

1587

1588

format_selector = self.build_format_selector(req_format)

1589

1590

# While in format selection we may need to have an access to the original

1591

# format set in order to calculate some metrics or do some processing.

1592

# For now we need to be able to guess whether original formats provided

1593

# by extractor are incomplete or not (i.e. whether extractor provides only

1594

# video-only or audio-only formats) for proper formats selection for

1595

# extractors with such incomplete formats (see

1596

# https://github.com/rg3/youtube-dl/pull/5556).

1597

# Since formats may be filtered during format selection and may not match

1598

# the original formats the results may be incorrect. Thus original formats

1599

# or pre-calculated metrics should be passed to format selection routines

1600

# as well.

1601

# We will pass a context object containing all necessary additional data

1602

# instead of just formats.

1603

# This fixes incorrect format selection issue (see

1604

# https://github.com/rg3/youtube-dl/issues/10083).

1605

incomplete_formats = (

1606

# All formats are video-only or

1607

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or

1608

# all formats are audio-only

1609

all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

1614

}

1615

1616

formats_to_download = list(format_selector(ctx))

1617

if not formats_to_download:

1618

raise ExtractorError('requested format not available',

expected=True)

if download:

if len(formats_to_download) > 1:

1623

self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))

1624

for format in formats_to_download:

1625

new_info = dict(info_dict)

1626

new_info.update(format)

1627

self.process_info(new_info)

1628

# We update the info dict with the best quality format (backwards compatibility)

1629

info_dict.update(formats_to_download[-1])

1630

return info_dict

1631

1632

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

1633

"""Select the requested subtitles and their format"""

1634

available_subs = {}

1635

if normal_subtitles and self.params.get('writesubtitles'):

1636

available_subs.update(normal_subtitles)

1637

if automatic_captions and self.params.get('writeautomaticsub'):

1638

for lang, cap_info in automatic_captions.items():

1639

if lang not in available_subs:

1640

available_subs[lang] = cap_info

1641

1642

if (not self.params.get('writesubtitles') and not

1643

self.params.get('writeautomaticsub') or not

available_subs):

return None

if self.params.get('allsubtitles', False):

1648

requested_langs = available_subs.keys()

1649

else:

1650

if self.params.get('subtitleslangs', False):

1651

requested_langs = self.params.get('subtitleslangs')

1652

elif 'en' in available_subs:

1653

requested_langs = ['en']

1654

else:

1655

requested_langs = [list(available_subs.keys())[0]]

1656

1657

formats_query = self.params.get('subtitlesformat', 'best')

1658

formats_preference = formats_query.split('/') if formats_query else []

1659

subs = {}

1660

for lang in requested_langs:

1661

formats = available_subs.get(lang)

1662

if formats is None:

1663

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

1664

continue

1665

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

1677

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def process_info(self, info_dict):

1682

"""Process a single resolved IE result."""

1683

1684

assert info_dict.get('_type', 'video') == 'video'

1685

1686

max_downloads = self.params.get('max_downloads')

1687

if max_downloads is not None:

1688

if self._num_downloads >= int(max_downloads):

1689

raise MaxDownloadsReached()

1690

1691

info_dict['fulltitle'] = info_dict['title']

1692

if len(info_dict['title']) > 200:

1693

info_dict['title'] = info_dict['title'][:197] + '...'

1694

1695

if 'format' not in info_dict:

1696

info_dict['format'] = info_dict['ext']

1697

1698

reason = self._match_entry(info_dict, incomplete=False)

1699

if reason is not None:

1700

self.to_screen('[download] ' + reason)

1701

return

1702

1703

self._num_downloads += 1

1704

1705

info_dict['_filename'] = filename = self.prepare_filename(info_dict)

1706

1707

# Forced printings

1708

if self.params.get('forcetitle', False):

1709

self.to_stdout(info_dict['fulltitle'])

1710

if self.params.get('forceid', False):

1711

self.to_stdout(info_dict['id'])

1712

if self.params.get('forceurl', False):

1713

if info_dict.get('requested_formats') is not None:

1714

for f in info_dict['requested_formats']:

1715

self.to_stdout(f['url'] + f.get('play_path', ''))

1716

else:

1717

# For RTMP URLs, also include the playpath

1718

self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))

1719

if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:

1720

self.to_stdout(info_dict['thumbnail'])

1721

if self.params.get('forcedescription', False) and info_dict.get('description') is not None:

1722

self.to_stdout(info_dict['description'])

1723

if self.params.get('forcefilename', False) and filename is not None:

1724

self.to_stdout(filename)

1725

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

1726

self.to_stdout(formatSeconds(info_dict['duration']))

1727

if self.params.get('forceformat', False):

1728

self.to_stdout(info_dict['format'])

1729

if self.params.get('forcejson', False):

1730

self.to_stdout(json.dumps(info_dict))

1731

1732

# Do nothing else if in simulate mode

1733

if self.params.get('simulate', False):

return

if filename is None:

return

def ensure_dir_exists(path):

1740

try:

1741

dn = os.path.dirname(path)

1742

if dn and not os.path.exists(dn):

1743

os.makedirs(dn)

1744

return True

1745

except (OSError, IOError) as err:

1746

self.report_error('unable to create directory ' + error_to_compat_str(err))

1747

return False

1748

1749

if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):

1750

return

1751

1752

if self.params.get('writedescription', False):

1753

descfn = replace_extension(filename, 'description', info_dict.get('ext'))

1754

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):

1755

self.to_screen('[info] Video description is already present')

1756

elif info_dict.get('description') is None:

1757

self.report_warning('There\'s no description to write.')

1758

else:

1759

try:

1760

self.to_screen('[info] Writing video description to: ' + descfn)

1761

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1762

descfile.write(info_dict['description'])

1763

except (OSError, IOError):

1764

self.report_error('Cannot write description file ' + descfn)

1765

return

1766

1767

if self.params.get('writeannotations', False):

1768

annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))

1769

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):

1770

self.to_screen('[info] Video annotations are already present')

1771

else:

1772

try:

1773

self.to_screen('[info] Writing video annotations to: ' + annofn)

1774

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

1775

annofile.write(info_dict['annotations'])

1776

except (KeyError, TypeError):

1777

self.report_warning('There are no annotations to write.')

1778

except (OSError, IOError):

1779

self.report_error('Cannot write annotations file: ' + annofn)

1780

return

1781

1782

subtitles_are_requested = any([self.params.get('writesubtitles', False),

1783

self.params.get('writeautomaticsub')])

1784

1785

if subtitles_are_requested and info_dict.get('requested_subtitles'):

1786

# subtitles download errors are already managed as troubles in relevant IE

1787

# that way it will silently go on when used with unsupporting IE

1788

subtitles = info_dict['requested_subtitles']

1789

ie = self.get_info_extractor(info_dict['extractor_key'])

1790

for sub_lang, sub_info in subtitles.items():

1791

sub_format = sub_info['ext']

1792

sub_filename = subtitles_filename(filename, sub_lang, sub_format)

1793

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):

1794

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

1795

else:

1796

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

1797

if sub_info.get('data') is not None:

1798

try:

1799

# Use newline='' to prevent conversion of newline characters

1800

# See https://github.com/rg3/youtube-dl/issues/10268

1801

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

1802

subfile.write(sub_info['data'])

1803

except (OSError, IOError):

1804

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

sub_data = ie._request_webpage(

1809

sub_info['url'], info_dict['id'], note=False).read()

1810

with io.open(encodeFilename(sub_filename), 'wb') as subfile:

1811

subfile.write(sub_data)

1812

except (ExtractorError, IOError, OSError, ValueError) as err:

1813

self.report_warning('Unable to download subtitle for "%s": %s' %

1814

(sub_lang, error_to_compat_str(err)))

1815

continue

1816

1817

if self.params.get('writeinfojson', False):

1818

infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))

1819

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):

1820

self.to_screen('[info] Video description metadata is already present')

1821

else:

1822

self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)

1823

try:

1824

write_json_file(self.filter_requested_info(info_dict), infofn)

1825

except (OSError, IOError):

1826

self.report_error('Cannot write metadata to JSON file ' + infofn)

1827

return

1828

1829

self._write_thumbnails(info_dict, filename)

1830

1831

if not self.params.get('skip_download', False):

1832

try:

1833

def dl(name, info):

1834

fd = get_suitable_downloader(info, self.params)(self, self.params)

1835

for ph in self._progress_hooks:

1836

fd.add_progress_hook(ph)

1837

if self.params.get('verbose'):

1838

self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))

1839

return fd.download(name, info)

1840

1841

if info_dict.get('requested_formats') is not None:

1842

downloaded = []

1843

success = True

1844

merger = FFmpegMergerPP(self)

1845

if not merger.available:

1846

postprocessors = []

1847

self.report_warning('You have requested multiple '

1848

'formats but ffmpeg or avconv are not installed.'

1849

' The formats won\'t be merged.')

1850

else:

1851

postprocessors = [merger]

1852

1853

def compatible_formats(formats):

1854

video, audio = formats

1855

# Check extension

1856

video_ext, audio_ext = video.get('ext'), audio.get('ext')

1857

if video_ext and audio_ext:

1858

COMPATIBLE_EXTS = (

1859

('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),

1860

('webm')

1861

)

1862

for exts in COMPATIBLE_EXTS:

1863

if video_ext in exts and audio_ext in exts:

1864

return True

1865

# TODO: Check acodec/vcodec

1866

return False

1867

1868

filename_real_ext = os.path.splitext(filename)[1][1:]

1869

filename_wo_ext = (

1870

os.path.splitext(filename)[0]

1871

if filename_real_ext == info_dict['ext']

1872

else filename)

1873

requested_formats = info_dict['requested_formats']

1874

if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):

1875

info_dict['ext'] = 'mkv'

1876

self.report_warning(

1877

'Requested formats are incompatible for merge and will be merged into mkv.')

1878

# Ensure filename always has a correct extension for successful merge

1879

filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])

1880

if os.path.exists(encodeFilename(filename)):

1881

self.to_screen(

1882

'[download] %s has already been downloaded and '

1883

'merged' % filename)

1884

else:

1885

for f in requested_formats:

1886

new_info = dict(info_dict)

1887

new_info.update(f)

1888

fname = prepend_extension(

1889

self.prepare_filename(new_info),

1890

'f%s' % f['format_id'], new_info['ext'])

1891

if not ensure_dir_exists(fname):

1892

return

1893

downloaded.append(fname)

1894

partial_success = dl(fname, new_info)

1895

success = success and partial_success

1896

info_dict['__postprocessors'] = postprocessors

1897

info_dict['__files_to_merge'] = downloaded

1898

else:

1899

# Just a single file

1900

success = dl(filename, info_dict)

1901

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

1902

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

1903

return

1904

except (OSError, IOError) as err:

1905

raise UnavailableVideoError(err)

1906

except (ContentTooShortError, ) as err:

1907

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

1908

return

1909

1910

if success and filename != '-':

1911

# Fixup content

1912

fixup_policy = self.params.get('fixup')

1913

if fixup_policy is None:

1914

fixup_policy = 'detect_or_warn'

1915

1916

INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'

1917

1918

stretched_ratio = info_dict.get('stretched_ratio')

1919

if stretched_ratio is not None and stretched_ratio != 1:

1920

if fixup_policy == 'warn':

1921

self.report_warning('%s: Non-uniform pixel ratio (%s)' % (

1922

info_dict['id'], stretched_ratio))

1923

elif fixup_policy == 'detect_or_warn':

1924

stretched_pp = FFmpegFixupStretchedPP(self)

1925

if stretched_pp.available:

1926

info_dict.setdefault('__postprocessors', [])

1927

info_dict['__postprocessors'].append(stretched_pp)

1928

else:

1929

self.report_warning(

1930

'%s: Non-uniform pixel ratio (%s). %s'

1931

% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))

1932

else:

1933

assert fixup_policy in ('ignore', 'never')

1934

1935

if (info_dict.get('requested_formats') is None and

1936

info_dict.get('container') == 'm4a_dash'):

1937

if fixup_policy == 'warn':

1938

self.report_warning(

1939

'%s: writing DASH m4a. '

1940

'Only some players support this container.'

1941

% info_dict['id'])

1942

elif fixup_policy == 'detect_or_warn':

1943

fixup_pp = FFmpegFixupM4aPP(self)

1944

if fixup_pp.available:

1945

info_dict.setdefault('__postprocessors', [])

1946

info_dict['__postprocessors'].append(fixup_pp)

1947

else:

1948

self.report_warning(

1949

'%s: writing DASH m4a. '

1950

'Only some players support this container. %s'

1951

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

1952

else:

1953

assert fixup_policy in ('ignore', 'never')

1954

1955

if (info_dict.get('protocol') == 'm3u8_native' or

1956

info_dict.get('protocol') == 'm3u8' and

1957

self.params.get('hls_prefer_native')):

1958

if fixup_policy == 'warn':

1959

self.report_warning('%s: malformed AAC bitstream detected.' % (

1960

info_dict['id']))

1961

elif fixup_policy == 'detect_or_warn':

1962

fixup_pp = FFmpegFixupM3u8PP(self)

1963

if fixup_pp.available:

1964

info_dict.setdefault('__postprocessors', [])

1965

info_dict['__postprocessors'].append(fixup_pp)

1966

else:

1967

self.report_warning(

1968

'%s: malformed AAC bitstream detected. %s'

1969

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

1970

else:

1971

assert fixup_policy in ('ignore', 'never')

1972

1973

try:

1974

self.post_process(filename, info_dict)

1975

except (PostProcessingError) as err:

1976

self.report_error('postprocessing: %s' % str(err))

1977

return

1978

self.record_download_archive(info_dict)

1979

1980

def download(self, url_list):

1981

"""Download a given list of URLs."""

1982

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

1983

if (len(url_list) > 1 and

1984

outtmpl != '-' and

1985

'%' not in outtmpl and

1986

self.params.get('max_downloads') != 1):

1987

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

1992

res = self.extract_info(

1993

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

1994

except UnavailableVideoError:

1995

self.report_error('unable to download video')

1996

except MaxDownloadsReached:

1997

self.to_screen('[info] Maximum number of downloaded files reached.')

1998

raise

1999

else:

2000

if self.params.get('dump_single_json', False):

2001

self.to_stdout(json.dumps(res))

2002

2003

return self._download_retcode

2004

2005

def download_with_info_file(self, info_filename):

2006

with contextlib.closing(fileinput.FileInput(

2007

[info_filename], mode='r',

2008

openhook=fileinput.hook_encoded('utf-8'))) as f:

2009

# FileInput doesn't have a read method, we can't call json.load

2010

info = self.filter_requested_info(json.loads('\n'.join(f)))

2011

try:

2012

self.process_ie_result(info, download=True)

2013

except DownloadError:

2014

webpage_url = info.get('webpage_url')

2015

if webpage_url is not None:

2016

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2017

return self.download([webpage_url])

2018

else:

2019

raise

2020

return self._download_retcode

2021

2022

@staticmethod

2023

def filter_requested_info(info_dict):

2024

return dict(

2025

(k, v) for k, v in info_dict.items()

2026

if k not in ['requested_formats', 'requested_subtitles'])

2027

2028

def post_process(self, filename, ie_info):

2029

"""Run all the postprocessors on the given file."""

2030

info = dict(ie_info)

2031

info['filepath'] = filename

2032

pps_chain = []

2033

if ie_info.get('__postprocessors') is not None:

2034

pps_chain.extend(ie_info['__postprocessors'])

2035

pps_chain.extend(self._pps)

for pp in pps_chain:

files_to_delete = []

try:

files_to_delete, info = pp.run(info)

2040

except PostProcessingError as e:

2041

self.report_error(e.msg)

2042

if files_to_delete and not self.params.get('keepvideo', False):

2043

for old_filename in files_to_delete:

2044

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2045

try:

2046

os.remove(encodeFilename(old_filename))

2047

except (IOError, OSError):

2048

self.report_warning('Unable to remove downloaded original file')

2049

2050

def _make_archive_id(self, info_dict):

2051

# Future-proof against any change in case

2052

# and backwards compatibility with prior versions

2053

extractor = info_dict.get('extractor_key')

2054

if extractor is None:

2055

if 'id' in info_dict:

2056

extractor = info_dict.get('ie_key') # key in a playlist

2057

if extractor is None:

2058

return None # Incomplete video information

2059

return extractor.lower() + ' ' + info_dict['id']

2060

2061

def in_download_archive(self, info_dict):

2062

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2067

if vid_id is None:

2068

return False # Incomplete video information

2069

2070

try:

2071

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

2072

for line in archive_file:

2073

if line.strip() == vid_id:

2074

return True

2075

except IOError as ioe:

2076

if ioe.errno != errno.ENOENT:

raise

return False

def record_download_archive(self, info_dict):

2081

fn = self.params.get('download_archive')

2082

if fn is None:

2083

return

2084

vid_id = self._make_archive_id(info_dict)

2085

assert vid_id

2086

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2087

archive_file.write(vid_id + '\n')

2088

2089

@staticmethod

2090

def format_resolution(format, default='unknown'):

2091

if format.get('vcodec') == 'none':

2092

return 'audio only'

2093

if format.get('resolution') is not None:

2094

return format['resolution']

2095

if format.get('height') is not None:

2096

if format.get('width') is not None:

2097

res = '%sx%s' % (format['width'], format['height'])

2098

else:

2099

res = '%sp' % format['height']

2100

elif format.get('width') is not None:

2101

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2107

res = ''

2108

if fdict.get('ext') in ['f4f', 'f4m']:

2109

res += '(unsupported) '

2110

if fdict.get('language'):

2111

if res:

2112

res += ' '

2113

res += '[%s] ' % fdict['language']

2114

if fdict.get('format_note') is not None:

2115

res += fdict['format_note'] + ' '

2116

if fdict.get('tbr') is not None:

2117

res += '%4dk ' % fdict['tbr']

2118

if fdict.get('container') is not None:

2119

if res:

2120

res += ', '

2121

res += '%s container' % fdict['container']

2122

if (fdict.get('vcodec') is not None and

2123

fdict.get('vcodec') != 'none'):

2124

if res:

2125

res += ', '

2126

res += fdict['vcodec']

2127

if fdict.get('vbr') is not None:

2128

res += '@'

2129

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2130

res += 'video@'

2131

if fdict.get('vbr') is not None:

2132

res += '%4dk' % fdict['vbr']

2133

if fdict.get('fps') is not None:

2134

if res:

2135

res += ', '

2136

res += '%sfps' % fdict['fps']

2137

if fdict.get('acodec') is not None:

2138

if res:

2139

res += ', '

2140

if fdict['acodec'] == 'none':

2141

res += 'video only'

2142

else:

2143

res += '%-5s' % fdict['acodec']

2144

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2149

res += '@%3dk' % fdict['abr']

2150

if fdict.get('asr') is not None:

2151

res += ' (%5dHz)' % fdict['asr']

2152

if fdict.get('filesize') is not None:

2153

if res:

2154

res += ', '

2155

res += format_bytes(fdict['filesize'])

2156

elif fdict.get('filesize_approx') is not None:

2157

if res:

2158

res += ', '

2159

res += '~' + format_bytes(fdict['filesize_approx'])

2160

return res

2161

2162

def list_formats(self, info_dict):

2163

formats = info_dict.get('formats', [info_dict])

2164

table = [

2165

[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]

2166

for f in formats

2167

if f.get('preference') is None or f['preference'] >= -1000]

2168

if len(formats) > 1:

2169

table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'

2170

2171

header_line = ['format code', 'extension', 'resolution', 'note']

2172

self.to_screen(

2173

'[info] Available formats for %s:\n%s' %

2174

(info_dict['id'], render_table(header_line, table)))

2175

2176

def list_thumbnails(self, info_dict):

2177

thumbnails = info_dict.get('thumbnails')

2178

if not thumbnails:

2179

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

2184

self.to_screen(render_table(

2185

['ID', 'width', 'height', 'URL'],

2186

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

2187

2188

def list_subtitles(self, video_id, subtitles, name='subtitles'):

2189

if not subtitles:

2190

self.to_screen('%s has no %s' % (video_id, name))

2191

return

2192

self.to_screen(

2193

'Available %s for %s:' % (name, video_id))

2194

self.to_screen(render_table(

2195

['Language', 'formats'],

2196

[[lang, ', '.join(f['ext'] for f in reversed(formats))]

2197

for lang, formats in subtitles.items()]))

2198

2199

def urlopen(self, req):

2200

""" Start an HTTP download """

2201

if isinstance(req, compat_basestring):

2202

req = sanitized_Request(req)

2203

return self._opener.open(req, timeout=self._socket_timeout)

2204

2205

def print_debug_header(self):

2206

if not self.params.get('verbose'):

2207

return

2208

2209

if type('') is not compat_str:

2210

# Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)

2211

self.report_warning(

2212

'Your Python is broken! Update to a newer and supported version')

2213

2214

stdout_encoding = getattr(

2215

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

2216

encoding_str = (

2217

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

2218

locale.getpreferredencoding(),

2219

sys.getfilesystemencoding(),

2220

stdout_encoding,

2221

self.get_encoding()))

2222

write_string(encoding_str, encoding=None)

2223

2224

self._write_string('[debug] youtube-dl version ' + __version__ + '\n')

2225

if _LAZY_LOADER:

2226

self._write_string('[debug] Lazy loading extractors enabled' + '\n')

2227

try:

2228

sp = subprocess.Popen(

2229

['git', 'rev-parse', '--short', 'HEAD'],

2230

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

2231

cwd=os.path.dirname(os.path.abspath(__file__)))

2232

out, err = sp.communicate()

2233

out = out.decode().strip()

2234

if re.match('[0-9a-f]+', out):

2235

self._write_string('[debug] Git HEAD: ' + out + '\n')

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

2243

impl_name = platform.python_implementation()

2244

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

2245

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

2246

return impl_name

2247

2248

self._write_string('[debug] Python version %s (%s) - %s\n' % (

2249

platform.python_version(), python_implementation(),

2250

platform_name()))

2251

2252

exe_versions = FFmpegPostProcessor.get_versions(self)

2253

exe_versions['rtmpdump'] = rtmpdump_version()

2254

exe_versions['phantomjs'] = PhantomJSwrapper._version()

2255

exe_str = ', '.join(

2256

'%s %s' % (exe, v)

2257

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

2263

2264

proxy_map = {}

2265

for handler in self._opener.handlers:

2266

if hasattr(handler, 'proxies'):

2267

proxy_map.update(handler.proxies)

2268

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

2269

2270

if self.params.get('call_home', False):

2271

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

2272

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

2273

latest_version = self.urlopen(

2274

'https://yt-dl.org/latest/version').read().decode('utf-8')

2275

if version_tuple(latest_version) > version_tuple(__version__):

2276

self.report_warning(

2277

'You are using an outdated version (newest version: %s)! '

2278

'See https://yt-dl.org/update if you need help updating.' %

2279

latest_version)

2280

2281

def _setup_opener(self):

2282

timeout_val = self.params.get('socket_timeout')

2283

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

2284

2285

opts_cookiefile = self.params.get('cookiefile')

2286

opts_proxy = self.params.get('proxy')

2287

2288

if opts_cookiefile is None:

2289

self.cookiejar = compat_cookiejar.CookieJar()

2290

else:

2291

opts_cookiefile = expand_path(opts_cookiefile)

2292

self.cookiejar = compat_cookiejar.MozillaCookieJar(

2293

opts_cookiefile)

2294

if os.access(opts_cookiefile, os.R_OK):

2295

self.cookiejar.load()

2296

2297

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

2298

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

2303

else:

2304

proxies = compat_urllib_request.getproxies()

2305

# Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)

2306

if 'http' in proxies and 'https' not in proxies:

2307

proxies['https'] = proxies['http']

2308

proxy_handler = PerRequestProxyHandler(proxies)

2309

2310

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

2311

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

2312

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

2313

data_handler = compat_urllib_request_DataHandler()

2314

2315

# When passing our own FileHandler instance, build_opener won't add the

2316

# default FileHandler and allows us to disable the file protocol, which

2317

# can be used for malicious purposes (see

2318

# https://github.com/rg3/youtube-dl/issues/8227)

2319

file_handler = compat_urllib_request.FileHandler()

2320

2321

def file_open(*args, **kwargs):

2322

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')

2323

file_handler.file_open = file_open

2324

2325

opener = compat_urllib_request.build_opener(

2326

proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)

2327

2328

# Delete the default user-agent header, which would otherwise apply in

2329

# cases where our custom HTTP handler doesn't come into play

2330

# (See https://github.com/rg3/youtube-dl/issues/1309 for details)

2331

opener.addheaders = []

2332

self._opener = opener

2333

2334

def encode(self, s):

2335

if isinstance(s, bytes):

2336

return s # Already encoded

2337

2338

try:

2339

return s.encode(self.get_encoding())

2340

except UnicodeEncodeError as err:

2341

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

2342

raise

2343

2344

def get_encoding(self):

2345

encoding = self.params.get('encoding')

2346

if encoding is None:

2347

encoding = preferredencoding()

2348

return encoding

2349

2350

def _write_thumbnails(self, info_dict, filename):

2351

if self.params.get('writethumbnail', False):

2352

thumbnails = info_dict.get('thumbnails')

2353

if thumbnails:

2354

thumbnails = [thumbnails[-1]]

2355

elif self.params.get('write_all_thumbnails', False):

2356

thumbnails = info_dict.get('thumbnails')

else:

return

if not thumbnails:

# No thumbnails present, so return immediately

return

for t in thumbnails:

thumb_ext = determine_ext(t['url'], 'jpg')

2366

suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''

2367

thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''

2368

t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext

2369

2370

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):

2371

self.to_screen('[%s] %s: Thumbnail %sis already present' %

2372

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2373

else:

2374

self.to_screen('[%s] %s: Downloading thumbnail %s...' %

2375

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2376

try:

2377

uf = self.urlopen(t['url'])

2378

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

2379

shutil.copyfileobj(uf, thumbf)

2380

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

2381

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

2382

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2383

self.report_warning('Unable to download thumbnail "%s": %s' %

2384

(t['url'], error_to_compat_str(err)))