jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import socket
	23	import sys
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_http_client,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DownloadError,
	55	encode_compat_str,
	56	encodeFilename,
	57	error_to_compat_str,
	58	expand_path,
	59	ExtractorError,
	60	format_bytes,
	61	formatSeconds,
	62	GeoRestrictedError,
	63	int_or_none,
	64	ISO3166Utils,
	65	locked_file,
	66	make_HTTPS_handler,
	67	MaxDownloadsReached,
	68	orderedSet,
	69	PagedList,
	70	parse_filesize,
	71	PerRequestProxyHandler,
	72	platform_name,
	73	PostProcessingError,
	74	preferredencoding,
	75	prepend_extension,
	76	register_socks_protocols,
	77	render_table,
	78	replace_extension,
	79	SameFileError,
	80	sanitize_filename,
	81	sanitize_path,
	82	sanitize_url,
	83	sanitized_Request,
	84	std_headers,
	85	str_or_none,
	86	subtitles_filename,
	87	UnavailableVideoError,
	88	url_basename,
	89	version_tuple,
	90	write_json_file,
	91	write_string,
	92	YoutubeDLCookieJar,
	93	YoutubeDLCookieProcessor,
	94	YoutubeDLHandler,
	95	YoutubeDLRedirectHandler,
	96	)
	97	from .cache import Cache
	98	from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
	99	from .extractor.openload import PhantomJSwrapper
	100	from .downloader import get_suitable_downloader
	101	from .downloader.rtmp import rtmpdump_version
	102	from .postprocessor import (
	103	FFmpegFixupM3u8PP,
	104	FFmpegFixupM4aPP,
	105	FFmpegFixupStretchedPP,
	106	FFmpegMergerPP,
	107	FFmpegPostProcessor,
	108	FFmpegSubtitlesConvertorPP,
	109	get_postprocessor,
	110	)
	111	from .version import __version__
	112
	113	if compat_os_name == 'nt':
	114	import ctypes
	115
	116
	117	class YoutubeDL(object):
	118	"""YoutubeDL class.
	119
	120	YoutubeDL objects are the ones responsible of downloading the
	121	actual video file and writing it to disk if the user has requested
	122	it, among some other tasks. In most cases there should be one per
	123	program. As, given a video URL, the downloader doesn't know how to
	124	extract all the needed information, task that InfoExtractors do, it
	125	has to pass the URL to one of them.
	126
	127	For this, YoutubeDL objects have a method that allows
	128	InfoExtractors to be registered in a given order. When it is passed
	129	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	130	finds that reports being able to handle it. The InfoExtractor extracts
	131	all the information about the video or videos the URL refers to, and
	132	YoutubeDL process the extracted information, possibly using a File
	133	Downloader to download the video.
	134
	135	YoutubeDL objects accept a lot of parameters. In order not to saturate
	136	the object constructor with arguments, it receives a dictionary of
	137	options instead. These options are available through the params
	138	attribute for the InfoExtractors to use. The YoutubeDL also
	139	registers itself as the downloader in charge for the InfoExtractors
	140	that are added to it, so this is a "mutual registration".
	141
	142	Available options:
	143
	144	username: Username for authentication purposes.
	145	password: Password for authentication purposes.
	146	videopassword: Password for accessing a video.
	147	ap_mso: Adobe Pass multiple-system operator identifier.
	148	ap_username: Multiple-system operator account username.
	149	ap_password: Multiple-system operator account password.
	150	usenetrc: Use netrc for authentication instead.
	151	verbose: Print additional info to stdout.
	152	quiet: Do not print messages to stdout.
	153	no_warnings: Do not print out anything for warnings.
	154	forceurl: Force printing final URL.
	155	forcetitle: Force printing title.
	156	forceid: Force printing ID.
	157	forcethumbnail: Force printing thumbnail URL.
	158	forcedescription: Force printing description.
	159	forcefilename: Force printing final filename.
	160	forceduration: Force printing duration.
	161	forcejson: Force printing info_dict as JSON.
	162	dump_single_json: Force printing the info_dict of the whole playlist
	163	(or video) as a single JSON line.
	164	simulate: Do not download the video files.
	165	format: Video format code. see "FORMAT SELECTION" for more details.
	166	format_sort: How to sort the video formats. see "Sorting Formats" for more details.
	167	format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
	168	allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
	169	allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
	170	outtmpl: Template for output names.
	171	restrictfilenames: Do not allow "&" and spaces in file names.
	172	trim_file_name: Limit length of filename (extension excluded).
	173	ignoreerrors: Do not stop on download errors.
	174	force_generic_extractor: Force downloader to use the generic extractor
	175	nooverwrites: Prevent overwriting files.
	176	playliststart: Playlist item to start at.
	177	playlistend: Playlist item to end at.
	178	playlist_items: Specific indices of playlist to download.
	179	playlistreverse: Download playlist items in reverse order.
	180	playlistrandom: Download playlist items in random order.
	181	matchtitle: Download only matching titles.
	182	rejecttitle: Reject downloads for matching titles.
	183	logger: Log messages to a logging.Logger instance.
	184	logtostderr: Log messages to stderr instead of stdout.
	185	writedescription: Write the video description to a .description file
	186	writeinfojson: Write the video description to a .info.json file
	187	writeannotations: Write the video annotations to a .annotations.xml file
	188	writethumbnail: Write the thumbnail image to a file
	189	write_all_thumbnails: Write all thumbnail formats to files
	190	writesubtitles: Write the video subtitles to a file
	191	writeautomaticsub: Write the automatically generated subtitles to a file
	192	allsubtitles: Downloads all the subtitles of the video
	193	(requires writesubtitles or writeautomaticsub)
	194	listsubtitles: Lists all available subtitles for the video
	195	subtitlesformat: The format code for subtitles
	196	subtitleslangs: List of languages of the subtitles to download
	197	keepvideo: Keep the video file after post-processing
	198	daterange: A DateRange object, download only if the upload_date is in the range.
	199	skip_download: Skip the actual download of the video file
	200	cachedir: Location of the cache files in the filesystem.
	201	False to disable filesystem cache.
	202	noplaylist: Download single video instead of a playlist if in doubt.
	203	age_limit: An integer representing the user's age in years.
	204	Unsuitable videos for the given age are skipped.
	205	min_views: An integer representing the minimum view count the video
	206	must have in order to not be skipped.
	207	Videos without view count information are always
	208	downloaded. None for no limit.
	209	max_views: An integer representing the maximum view count.
	210	Videos that are more popular than that are not
	211	downloaded.
	212	Videos without view count information are always
	213	downloaded. None for no limit.
	214	download_archive: File name of a file where all downloads are recorded.
	215	Videos already present in the file are not downloaded
	216	again.
	217	break_on_existing: Stop the download process after attempting to download a file that's
	218	in the archive.
	219	cookiefile: File name where cookies should be read from and dumped to.
	220	nocheckcertificate:Do not verify SSL certificates
	221	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	222	At the moment, this is only supported by YouTube.
	223	proxy: URL of the proxy server to use
	224	geo_verification_proxy: URL of the proxy to use for IP address verification
	225	on geo-restricted sites.
	226	socket_timeout: Time to wait for unresponsive hosts, in seconds
	227	bidi_workaround: Work around buggy terminals without bidirectional text
	228	support, using fridibi
	229	debug_printtraffic:Print out sent and received HTTP traffic
	230	include_ads: Download ads as well
	231	default_search: Prepend this string if an input url is not valid.
	232	'auto' for elaborate guessing
	233	encoding: Use this encoding instead of the system-specified.
	234	extract_flat: Do not resolve URLs, return the immediate result.
	235	Pass in 'in_playlist' to only show this behavior for
	236	playlist items.
	237	postprocessors: A list of dictionaries, each with an entry
	238	* key: The name of the postprocessor. See
	239	youtube_dlc/postprocessor/__init__.py for a list.
	240	as well as any further keyword arguments for the
	241	postprocessor.
	242	progress_hooks: A list of functions that get called on download
	243	progress, with a dictionary with the entries
	244	* status: One of "downloading", "error", or "finished".
	245	Check this first and ignore unknown values.
	246
	247	If status is one of "downloading", or "finished", the
	248	following properties may also be present:
	249	* filename: The final filename (always present)
	250	* tmpfilename: The filename we're currently writing to
	251	* downloaded_bytes: Bytes on disk
	252	* total_bytes: Size of the whole file, None if unknown
	253	* total_bytes_estimate: Guess of the eventual file size,
	254	None if unavailable.
	255	* elapsed: The number of seconds since download started.
	256	* eta: The estimated time in seconds, None if unknown
	257	* speed: The download speed in bytes/second, None if
	258	unknown
	259	* fragment_index: The counter of the currently
	260	downloaded video fragment.
	261	* fragment_count: The number of fragments (= individual
	262	files that will be merged)
	263
	264	Progress hooks are guaranteed to be called at least once
	265	(with status "finished") if the download is successful.
	266	merge_output_format: Extension to use when merging formats.
	267	fixup: Automatically correct known faults of the file.
	268	One of:
	269	- "never": do nothing
	270	- "warn": only emit a warning
	271	- "detect_or_warn": check whether we can do anything
	272	about it, warn otherwise (default)
	273	source_address: Client-side IP address to bind to.
	274	call_home: Boolean, true iff we are allowed to contact the
	275	youtube-dlc servers for debugging.
	276	sleep_interval: Number of seconds to sleep before each download when
	277	used alone or a lower bound of a range for randomized
	278	sleep before each download (minimum possible number
	279	of seconds to sleep) when used along with
	280	max_sleep_interval.
	281	max_sleep_interval:Upper bound of a range for randomized sleep before each
	282	download (maximum possible number of seconds to sleep).
	283	Must only be used along with sleep_interval.
	284	Actual sleep time will be a random float from range
	285	[sleep_interval; max_sleep_interval].
	286	listformats: Print an overview of available video formats and exit.
	287	list_thumbnails: Print a table of all thumbnails and exit.
	288	match_filter: A function that gets called with the info_dict of
	289	every video.
	290	If it returns a message, the video is ignored.
	291	If it returns None, the video is downloaded.
	292	match_filter_func in utils.py is one example for this.
	293	no_color: Do not emit color codes in output.
	294	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	295	HTTP header
	296	geo_bypass_country:
	297	Two-letter ISO 3166-2 country code that will be used for
	298	explicit geographic restriction bypassing via faking
	299	X-Forwarded-For HTTP header
	300	geo_bypass_ip_block:
	301	IP range in CIDR notation that will be used similarly to
	302	geo_bypass_country
	303
	304	The following options determine which downloader is picked:
	305	external_downloader: Executable of the external downloader to call.
	306	None or unset for standard (built-in) downloader.
	307	hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
	308	if True, otherwise use ffmpeg/avconv if False, otherwise
	309	use downloader suggested by extractor if None.
	310
	311	The following parameters are not used by YoutubeDL itself, they are used by
	312	the downloader (see youtube_dlc/downloader/common.py):
	313	nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
	314	noresizebuffer, retries, continuedl, noprogress, consoletitle,
	315	xattr_set_filesize, external_downloader_args, hls_use_mpegts,
	316	http_chunk_size.
	317
	318	The following options are used by the post processors:
	319	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	320	otherwise prefer ffmpeg.
	321	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	322	to the binary or its containing directory.
	323	postprocessor_args: A list of additional command-line arguments for the
	324	postprocessor.
	325
	326	The following options are used by the Youtube extractor:
	327	youtube_include_dash_manifest: If True (default), DASH manifests and related
	328	data will be downloaded and processed by extractor.
	329	You can reduce network I/O by disabling it if you don't
	330	care about DASH.
	331	"""
	332
	333	_NUMERIC_FIELDS = set((
	334	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	335	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	336	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	337	'average_rating', 'comment_count', 'age_limit',
	338	'start_time', 'end_time',
	339	'chapter_number', 'season_number', 'episode_number',
	340	'track_number', 'disc_number', 'release_year',
	341	'playlist_index',
	342	))
	343
	344	params = None
	345	_ies = []
	346	_pps = []
	347	_download_retcode = None
	348	_num_downloads = None
	349	_screen_file = None
	350
	351	def __init__(self, params=None, auto_init=True):
	352	"""Create a FileDownloader object with the given options."""
	353	if params is None:
	354	params = {}
	355	self._ies = []
	356	self._ies_instances = {}
	357	self._pps = []
	358	self._progress_hooks = []
	359	self._download_retcode = 0
	360	self._num_downloads = 0
	361	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	362	self._err_file = sys.stderr
	363	self.params = {
	364	# Default parameters
	365	'nocheckcertificate': False,
	366	}
	367	self.params.update(params)
	368	self.cache = Cache(self)
	369	self.archive = set()
	370
	371	"""Preload the archive, if any is specified"""
	372	def preload_download_archive(self):
	373	fn = self.params.get('download_archive')
	374	if fn is None:
	375	return False
	376	try:
	377	with locked_file(fn, 'r', encoding='utf-8') as archive_file:
	378	for line in archive_file:
	379	self.archive.add(line.strip())
	380	except IOError as ioe:
	381	if ioe.errno != errno.ENOENT:
	382	raise
	383	return False
	384	return True
	385
	386	def check_deprecated(param, option, suggestion):
	387	if self.params.get(param) is not None:
	388	self.report_warning(
	389	'%s is deprecated. Use %s instead.' % (option, suggestion))
	390	return True
	391	return False
	392
	393	if self.params.get('verbose'):
	394	self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
	395
	396	preload_download_archive(self)
	397
	398	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	399	if self.params.get('geo_verification_proxy') is None:
	400	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	401
	402	check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
	403	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	404	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	405
	406	if params.get('bidi_workaround', False):
	407	try:
	408	import pty
	409	master, slave = pty.openpty()
	410	width = compat_get_terminal_size().columns
	411	if width is None:
	412	width_args = []
	413	else:
	414	width_args = ['-w', str(width)]
	415	sp_kwargs = dict(
	416	stdin=subprocess.PIPE,
	417	stdout=slave,
	418	stderr=self._err_file)
	419	try:
	420	self._output_process = subprocess.Popen(
	421	['bidiv'] + width_args, **sp_kwargs
	422	)
	423	except OSError:
	424	self._output_process = subprocess.Popen(
	425	['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
	426	self._output_channel = os.fdopen(master, 'rb')
	427	except OSError as ose:
	428	if ose.errno == errno.ENOENT:
	429	self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
	430	else:
	431	raise
	432
	433	if (sys.platform != 'win32'
	434	and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
	435	and not params.get('restrictfilenames', False)):
	436	# Unicode filesystem API will throw errors (#1474, #13027)
	437	self.report_warning(
	438	'Assuming --restrict-filenames since file system encoding '
	439	'cannot encode all characters. '
	440	'Set the LC_ALL environment variable to fix this.')
	441	self.params['restrictfilenames'] = True
	442
	443	if isinstance(params.get('outtmpl'), bytes):
	444	self.report_warning(
	445	'Parameter outtmpl is bytes, but should be a unicode string. '
	446	'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
	447
	448	self._setup_opener()
	449
	450	if auto_init:
	451	self.print_debug_header()
	452	self.add_default_info_extractors()
	453
	454	for pp_def_raw in self.params.get('postprocessors', []):
	455	pp_class = get_postprocessor(pp_def_raw['key'])
	456	pp_def = dict(pp_def_raw)
	457	del pp_def['key']
	458	pp = pp_class(self, **compat_kwargs(pp_def))
	459	self.add_post_processor(pp)
	460
	461	for ph in self.params.get('progress_hooks', []):
	462	self.add_progress_hook(ph)
	463
	464	register_socks_protocols()
	465
	466	def warn_if_short_id(self, argv):
	467	# short YouTube ID starting with dash?
	468	idxs = [
	469	i for i, a in enumerate(argv)
	470	if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
	471	if idxs:
	472	correct_argv = (
	473	['youtube-dlc']
	474	+ [a for i, a in enumerate(argv) if i not in idxs]
	475	+ ['--'] + [argv[i] for i in idxs]
	476	)
	477	self.report_warning(
	478	'Long argument string detected. '
	479	'Use -- to separate parameters and URLs, like this:\n%s\n' %
	480	args_to_str(correct_argv))
	481
	482	def add_info_extractor(self, ie):
	483	"""Add an InfoExtractor object to the end of the list."""
	484	self._ies.append(ie)
	485	if not isinstance(ie, type):
	486	self._ies_instances[ie.ie_key()] = ie
	487	ie.set_downloader(self)
	488
	489	def get_info_extractor(self, ie_key):
	490	"""
	491	Get an instance of an IE with name ie_key, it will try to get one from
	492	the _ies list, if there's no instance it will create a new one and add
	493	it to the extractor list.
	494	"""
	495	ie = self._ies_instances.get(ie_key)
	496	if ie is None:
	497	ie = get_info_extractor(ie_key)()
	498	self.add_info_extractor(ie)
	499	return ie
	500

1

#!/usr/bin/env python

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import socket

import sys

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

31

from .compat import (

32

compat_basestring,

33

compat_cookiejar,

34

compat_get_terminal_size,

35

compat_http_client,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DownloadError,

encode_compat_str,

encodeFilename,

error_to_compat_str,

expand_path,

ExtractorError,

format_bytes,

formatSeconds,

GeoRestrictedError,

int_or_none,

ISO3166Utils,

locked_file,

make_HTTPS_handler,

MaxDownloadsReached,

orderedSet,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

register_socks_protocols,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

subtitles_filename,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

94

YoutubeDLHandler,

95

YoutubeDLRedirectHandler,

96

)

97

from .cache import Cache

98

from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER

99

from .extractor.openload import PhantomJSwrapper

100

from .downloader import get_suitable_downloader

101

from .downloader.rtmp import rtmpdump_version

102

from .postprocessor import (

103

FFmpegFixupM3u8PP,

104

FFmpegFixupM4aPP,

105

FFmpegFixupStretchedPP,

106

FFmpegMergerPP,

107

FFmpegPostProcessor,

108

FFmpegSubtitlesConvertorPP,

109

get_postprocessor,

110

)

111

from .version import __version__

112

113

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

118

"""YoutubeDL class.

119

120

YoutubeDL objects are the ones responsible of downloading the

121

actual video file and writing it to disk if the user has requested

122

it, among some other tasks. In most cases there should be one per

123

program. As, given a video URL, the downloader doesn't know how to

124

extract all the needed information, task that InfoExtractors do, it

125

has to pass the URL to one of them.

126

127

For this, YoutubeDL objects have a method that allows

128

InfoExtractors to be registered in a given order. When it is passed

129

a URL, the YoutubeDL object handles it to the first InfoExtractor it

130

finds that reports being able to handle it. The InfoExtractor extracts

131

all the information about the video or videos the URL refers to, and

132

YoutubeDL process the extracted information, possibly using a File

133

Downloader to download the video.

134

135

YoutubeDL objects accept a lot of parameters. In order not to saturate

136

the object constructor with arguments, it receives a dictionary of

137

options instead. These options are available through the params

138

attribute for the InfoExtractors to use. The YoutubeDL also

139

registers itself as the downloader in charge for the InfoExtractors

140

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

145

password: Password for authentication purposes.

146

videopassword: Password for accessing a video.

147

ap_mso: Adobe Pass multiple-system operator identifier.

148

ap_username: Multiple-system operator account username.

149

ap_password: Multiple-system operator account password.

150

usenetrc: Use netrc for authentication instead.

151

verbose: Print additional info to stdout.

152

quiet: Do not print messages to stdout.

153

no_warnings: Do not print out anything for warnings.

154

forceurl: Force printing final URL.

155

forcetitle: Force printing title.

156

forceid: Force printing ID.

157

forcethumbnail: Force printing thumbnail URL.

158

forcedescription: Force printing description.

159

forcefilename: Force printing final filename.

160

forceduration: Force printing duration.

161

forcejson: Force printing info_dict as JSON.

162

dump_single_json: Force printing the info_dict of the whole playlist

163

(or video) as a single JSON line.

164

simulate: Do not download the video files.

165

format: Video format code. see "FORMAT SELECTION" for more details.

166

format_sort: How to sort the video formats. see "Sorting Formats" for more details.

167

format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.

168

allow_multiple_video_streams: Allow multiple video streams to be merged into a single file

169

allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file

170

outtmpl: Template for output names.

171

restrictfilenames: Do not allow "&" and spaces in file names.

172

trim_file_name: Limit length of filename (extension excluded).

173

ignoreerrors: Do not stop on download errors.

174

force_generic_extractor: Force downloader to use the generic extractor

175

nooverwrites: Prevent overwriting files.

176

playliststart: Playlist item to start at.

177

playlistend: Playlist item to end at.

178

playlist_items: Specific indices of playlist to download.

179

playlistreverse: Download playlist items in reverse order.

180

playlistrandom: Download playlist items in random order.

181

matchtitle: Download only matching titles.

182

rejecttitle: Reject downloads for matching titles.

183

logger: Log messages to a logging.Logger instance.

184

logtostderr: Log messages to stderr instead of stdout.

185

writedescription: Write the video description to a .description file

186

writeinfojson: Write the video description to a .info.json file

187

writeannotations: Write the video annotations to a .annotations.xml file

188

writethumbnail: Write the thumbnail image to a file

189

write_all_thumbnails: Write all thumbnail formats to files

190

writesubtitles: Write the video subtitles to a file

191

writeautomaticsub: Write the automatically generated subtitles to a file

192

allsubtitles: Downloads all the subtitles of the video

193

(requires writesubtitles or writeautomaticsub)

194

listsubtitles: Lists all available subtitles for the video

195

subtitlesformat: The format code for subtitles

196

subtitleslangs: List of languages of the subtitles to download

197

keepvideo: Keep the video file after post-processing

198

daterange: A DateRange object, download only if the upload_date is in the range.

199

skip_download: Skip the actual download of the video file

200

cachedir: Location of the cache files in the filesystem.

201

False to disable filesystem cache.

202

noplaylist: Download single video instead of a playlist if in doubt.

203

age_limit: An integer representing the user's age in years.

204

Unsuitable videos for the given age are skipped.

205

min_views: An integer representing the minimum view count the video

206

must have in order to not be skipped.

207

Videos without view count information are always

208

downloaded. None for no limit.

209

max_views: An integer representing the maximum view count.

210

Videos that are more popular than that are not

211

downloaded.

212

Videos without view count information are always

213

downloaded. None for no limit.

214

download_archive: File name of a file where all downloads are recorded.

215

Videos already present in the file are not downloaded

216

again.

217

break_on_existing: Stop the download process after attempting to download a file that's

218

in the archive.

219

cookiefile: File name where cookies should be read from and dumped to.

220

nocheckcertificate:Do not verify SSL certificates

221

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

222

At the moment, this is only supported by YouTube.

223

proxy: URL of the proxy server to use

224

geo_verification_proxy: URL of the proxy to use for IP address verification

225

on geo-restricted sites.

226

socket_timeout: Time to wait for unresponsive hosts, in seconds

227

bidi_workaround: Work around buggy terminals without bidirectional text

228

support, using fridibi

229

debug_printtraffic:Print out sent and received HTTP traffic

230

include_ads: Download ads as well

231

default_search: Prepend this string if an input url is not valid.

232

'auto' for elaborate guessing

233

encoding: Use this encoding instead of the system-specified.

234

extract_flat: Do not resolve URLs, return the immediate result.

235

Pass in 'in_playlist' to only show this behavior for

236

playlist items.

237

postprocessors: A list of dictionaries, each with an entry

238

* key: The name of the postprocessor. See

239

youtube_dlc/postprocessor/__init__.py for a list.

240

as well as any further keyword arguments for the

241

postprocessor.

242

progress_hooks: A list of functions that get called on download

243

progress, with a dictionary with the entries

244

* status: One of "downloading", "error", or "finished".

245

Check this first and ignore unknown values.

246

247

If status is one of "downloading", or "finished", the

248

following properties may also be present:

249

* filename: The final filename (always present)

250

* tmpfilename: The filename we're currently writing to

251

* downloaded_bytes: Bytes on disk

252

* total_bytes: Size of the whole file, None if unknown

253

* total_bytes_estimate: Guess of the eventual file size,

254

None if unavailable.

255

* elapsed: The number of seconds since download started.

256

* eta: The estimated time in seconds, None if unknown

257

* speed: The download speed in bytes/second, None if

258

unknown

259

* fragment_index: The counter of the currently

260

downloaded video fragment.

261

* fragment_count: The number of fragments (= individual

262

files that will be merged)

263

264

Progress hooks are guaranteed to be called at least once

265

(with status "finished") if the download is successful.

266

merge_output_format: Extension to use when merging formats.

267

fixup: Automatically correct known faults of the file.

268

One of:

269

- "never": do nothing

270

- "warn": only emit a warning

271

- "detect_or_warn": check whether we can do anything

272

about it, warn otherwise (default)

273

source_address: Client-side IP address to bind to.

274

call_home: Boolean, true iff we are allowed to contact the

275

youtube-dlc servers for debugging.

276

sleep_interval: Number of seconds to sleep before each download when

277

used alone or a lower bound of a range for randomized

278

sleep before each download (minimum possible number

279

of seconds to sleep) when used along with

280

max_sleep_interval.

281

max_sleep_interval:Upper bound of a range for randomized sleep before each

282

download (maximum possible number of seconds to sleep).

283

Must only be used along with sleep_interval.

284

Actual sleep time will be a random float from range

285

[sleep_interval; max_sleep_interval].

286

listformats: Print an overview of available video formats and exit.

287

list_thumbnails: Print a table of all thumbnails and exit.

288

match_filter: A function that gets called with the info_dict of

289

every video.

290

If it returns a message, the video is ignored.

291

If it returns None, the video is downloaded.

292

match_filter_func in utils.py is one example for this.

293

no_color: Do not emit color codes in output.

294

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

295

HTTP header

296

geo_bypass_country:

297

Two-letter ISO 3166-2 country code that will be used for

298

explicit geographic restriction bypassing via faking

299

X-Forwarded-For HTTP header

300

geo_bypass_ip_block:

301

IP range in CIDR notation that will be used similarly to

302

geo_bypass_country

303

304

The following options determine which downloader is picked:

305

external_downloader: Executable of the external downloader to call.

306

None or unset for standard (built-in) downloader.

307

hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv

308

if True, otherwise use ffmpeg/avconv if False, otherwise

309

use downloader suggested by extractor if None.

310

311

The following parameters are not used by YoutubeDL itself, they are used by

312

the downloader (see youtube_dlc/downloader/common.py):

313

nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,

314

noresizebuffer, retries, continuedl, noprogress, consoletitle,

315

xattr_set_filesize, external_downloader_args, hls_use_mpegts,

316

http_chunk_size.

317

318

The following options are used by the post processors:

319

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

320

otherwise prefer ffmpeg.

321

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

322

to the binary or its containing directory.

323

postprocessor_args: A list of additional command-line arguments for the

324

postprocessor.

325

326

The following options are used by the Youtube extractor:

327

youtube_include_dash_manifest: If True (default), DASH manifests and related

328

data will be downloaded and processed by extractor.

329

You can reduce network I/O by disabling it if you don't

care about DASH.

"""

_NUMERIC_FIELDS = set((

334

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

335

'timestamp', 'upload_year', 'upload_month', 'upload_day',

336

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

337

'average_rating', 'comment_count', 'age_limit',

338

'start_time', 'end_time',

339

'chapter_number', 'season_number', 'episode_number',

340

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = []

_download_retcode = None

348

_num_downloads = None

349

_screen_file = None

350

351

def __init__(self, params=None, auto_init=True):

352

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

357

self._pps = []

358

self._progress_hooks = []

359

self._download_retcode = 0

360

self._num_downloads = 0

361

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

362

self._err_file = sys.stderr

363

self.params = {

364

# Default parameters

365

'nocheckcertificate': False,

366

}

367

self.params.update(params)

368

self.cache = Cache(self)

369

self.archive = set()

370

371

"""Preload the archive, if any is specified"""

372

def preload_download_archive(self):

373

fn = self.params.get('download_archive')

if fn is None:

return False

try:

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

378

for line in archive_file:

379

self.archive.add(line.strip())

380

except IOError as ioe:

381

if ioe.errno != errno.ENOENT:

raise

return False

return True

def check_deprecated(param, option, suggestion):

387

if self.params.get(param) is not None:

388

self.report_warning(

389

'%s is deprecated. Use %s instead.' % (option, suggestion))

return True

return False

if self.params.get('verbose'):

394

self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))

395

396

preload_download_archive(self)

397

398

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

399

if self.params.get('geo_verification_proxy') is None:

400

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

401

402

check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')

403

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

404

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

405

406

if params.get('bidi_workaround', False):

407

try:

408

import pty

409

master, slave = pty.openpty()

410

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

415

sp_kwargs = dict(

416

stdin=subprocess.PIPE,

417

stdout=slave,

418

stderr=self._err_file)

419

try:

420

self._output_process = subprocess.Popen(

421

['bidiv'] + width_args, **sp_kwargs

422

)

423

except OSError:

424

self._output_process = subprocess.Popen(

425

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

426

self._output_channel = os.fdopen(master, 'rb')

427

except OSError as ose:

428

if ose.errno == errno.ENOENT:

429

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

434

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

435

and not params.get('restrictfilenames', False)):

436

# Unicode filesystem API will throw errors (#1474, #13027)

437

self.report_warning(

438

'Assuming --restrict-filenames since file system encoding '

439

'cannot encode all characters. '

440

'Set the LC_ALL environment variable to fix this.')

441

self.params['restrictfilenames'] = True

442

443

if isinstance(params.get('outtmpl'), bytes):

444

self.report_warning(

445

'Parameter outtmpl is bytes, but should be a unicode string. '

446

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

self._setup_opener()

if auto_init:

self.print_debug_header()

452

self.add_default_info_extractors()

453

454

for pp_def_raw in self.params.get('postprocessors', []):

455

pp_class = get_postprocessor(pp_def_raw['key'])

456

pp_def = dict(pp_def_raw)

457

del pp_def['key']

458

pp = pp_class(self, **compat_kwargs(pp_def))

459

self.add_post_processor(pp)

460

461

for ph in self.params.get('progress_hooks', []):

462

self.add_progress_hook(ph)

463

464

register_socks_protocols()

465

466

def warn_if_short_id(self, argv):

467

# short YouTube ID starting with dash?

468

idxs = [

469

i for i, a in enumerate(argv)

470

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['youtube-dlc']

+ [a for i, a in enumerate(argv) if i not in idxs]

475

+ ['--'] + [argv[i] for i in idxs]

476

)

477

self.report_warning(

478

'Long argument string detected. '

479

'Use -- to separate parameters and URLs, like this:\n%s\n' %

480

args_to_str(correct_argv))

481

482

def add_info_extractor(self, ie):

483

"""Add an InfoExtractor object to the end of the list."""

484

self._ies.append(ie)

485

if not isinstance(ie, type):

486

self._ies_instances[ie.ie_key()] = ie

487

ie.set_downloader(self)

488

489

def get_info_extractor(self, ie_key):

490

"""

491

Get an instance of an IE with name ie_key, it will try to get one from

492

the _ies list, if there's no instance it will create a new one and add

493

it to the extractor list.

494

"""

495

ie = self._ies_instances.get(ie_key)

496

if ie is None:

497

ie = get_info_extractor(ie_key)()

498

self.add_info_extractor(ie)

499

return ie

500

501

def add_default_info_extractors(self):

502

"""

503

Add the InfoExtractors returned by gen_extractors to the end of the list

504

"""

505

for ie in gen_extractor_classes():

506

self.add_info_extractor(ie)

507

508

def add_post_processor(self, pp):

509

"""Add a PostProcessor object to the end of the chain."""

510

self._pps.append(pp)

511

pp.set_downloader(self)

512

513

def add_progress_hook(self, ph):

514

"""Add the progress hook (currently only for the file downloader)"""

515

self._progress_hooks.append(ph)

516

517

def _bidi_workaround(self, message):

518

if not hasattr(self, '_output_channel'):

519

return message

520

521

assert hasattr(self, '_output_process')

522

assert isinstance(message, compat_str)

523

line_count = message.count('\n') + 1

524

self._output_process.stdin.write((message + '\n').encode('utf-8'))

525

self._output_process.stdin.flush()

526

res = ''.join(self._output_channel.readline().decode('utf-8')

527

for _ in range(line_count))

528

return res[:-len('\n')]

529

530

def to_screen(self, message, skip_eol=False):

531

"""Print message to stdout if not in quiet mode."""

532

return self.to_stdout(message, skip_eol, check_quiet=True)

533

534

def _write_string(self, s, out=None):

535

write_string(s, out=out, encoding=self.params.get('encoding'))

536

537

def to_stdout(self, message, skip_eol=False, check_quiet=False):

538

"""Print message to stdout if not in quiet mode."""

539

if self.params.get('logger'):

540

self.params['logger'].debug(message)

541

elif not check_quiet or not self.params.get('quiet', False):

542

message = self._bidi_workaround(message)

543

terminator = ['\n', ''][skip_eol]

544

output = message + terminator

545

546

self._write_string(output, self._screen_file)

547

548

def to_stderr(self, message):

549

"""Print message to stderr."""

550

assert isinstance(message, compat_str)

551

if self.params.get('logger'):

552

self.params['logger'].error(message)

553

else:

554

message = self._bidi_workaround(message)

555

output = message + '\n'

556

self._write_string(output, self._err_file)

557

558

def to_console_title(self, message):

559

if not self.params.get('consoletitle', False):

560

return

561

if compat_os_name == 'nt':

562

if ctypes.windll.kernel32.GetConsoleWindow():

563

# c_wchar_p() might not be necessary if `message` is

564

# already of type unicode()

565

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

566

elif 'TERM' in os.environ:

567

self._write_string('\033]0;%s\007' % message, self._screen_file)

568

569

def save_console_title(self):

570

if not self.params.get('consoletitle', False):

571

return

572

if self.params.get('simulate', False):

573

return

574

if compat_os_name != 'nt' and 'TERM' in os.environ:

575

# Save the title on stack

576

self._write_string('\033[22;0t', self._screen_file)

577

578

def restore_console_title(self):

579

if not self.params.get('consoletitle', False):

580

return

581

if self.params.get('simulate', False):

582

return

583

if compat_os_name != 'nt' and 'TERM' in os.environ:

584

# Restore the title from stack

585

self._write_string('\033[23;0t', self._screen_file)

586

587

def __enter__(self):

588

self.save_console_title()

589

return self

590

591

def __exit__(self, *args):

592

self.restore_console_title()

593

594

if self.params.get('cookiefile') is not None:

595

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

596

597

def trouble(self, message=None, tb=None):

598

"""Determine action to take when a download problem appears.

599

600

Depending on if the downloader has been configured to ignore

601

download errors or not, this method may throw an exception or

602

not when errors are found, after printing the message.

603

604

tb, if given, is additional traceback information.

605

"""

606

if message is not None:

607

self.to_stderr(message)

608

if self.params.get('verbose'):

609

if tb is None:

610

if sys.exc_info()[0]: # if .trouble has been called from an except block

611

tb = ''

612

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

613

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

614

tb += encode_compat_str(traceback.format_exc())

615

else:

616

tb_data = traceback.format_list(traceback.extract_stack())

617

tb = ''.join(tb_data)

618

self.to_stderr(tb)

619

if not self.params.get('ignoreerrors', False):

620

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

621

exc_info = sys.exc_info()[1].exc_info

622

else:

623

exc_info = sys.exc_info()

624

raise DownloadError(message, exc_info)

625

self._download_retcode = 1

626

627

def report_warning(self, message):

628

'''

629

Print the message to stderr, it will be prefixed with 'WARNING:'

630

If stderr is a tty file the 'WARNING:' will be colored

631

'''

632

if self.params.get('logger') is not None:

633

self.params['logger'].warning(message)

634

else:

635

if self.params.get('no_warnings'):

636

return

637

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

638

_msg_header = '\033[0;33mWARNING:\033[0m'

639

else:

640

_msg_header = 'WARNING:'

641

warning_message = '%s %s' % (_msg_header, message)

642

self.to_stderr(warning_message)

643

644

def report_error(self, message, tb=None):

645

'''

646

Do the same as trouble, but prefixes the message with 'ERROR:', colored

647

in red if stderr is a tty file.

648

'''

649

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

650

_msg_header = '\033[0;31mERROR:\033[0m'

651

else:

652

_msg_header = 'ERROR:'

653

error_message = '%s %s' % (_msg_header, message)

654

self.trouble(error_message, tb)

655

656

def report_file_already_downloaded(self, file_name):

657

"""Report file has already been fully downloaded."""

658

try:

659

self.to_screen('[download] %s has already been downloaded' % file_name)

660

except UnicodeEncodeError:

661

self.to_screen('[download] The file has already been downloaded')

662

663

def prepare_filename(self, info_dict):

664

"""Generate the output filename."""

665

try:

666

template_dict = dict(info_dict)

667

668

template_dict['epoch'] = int(time.time())

669

autonumber_size = self.params.get('autonumber_size')

670

if autonumber_size is None:

671

autonumber_size = 5

672

template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

673

if template_dict.get('resolution') is None:

674

if template_dict.get('width') and template_dict.get('height'):

675

template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])

676

elif template_dict.get('height'):

677

template_dict['resolution'] = '%sp' % template_dict['height']

678

elif template_dict.get('width'):

679

template_dict['resolution'] = '%dx?' % template_dict['width']

680

681

sanitize = lambda k, v: sanitize_filename(

682

compat_str(v),

683

restricted=self.params.get('restrictfilenames'),

684

is_id=(k == 'id' or k.endswith('_id')))

685

template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))

686

for k, v in template_dict.items()

687

if v is not None and not isinstance(v, (list, tuple, dict)))

688

template_dict = collections.defaultdict(lambda: 'NA', template_dict)

689

690

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

691

692

# For fields playlist_index and autonumber convert all occurrences

693

# of %(field)s to %(field)0Nd for backward compatibility

694

field_size_compat_map = {

695

'playlist_index': len(str(template_dict['n_entries'])),

696

'autonumber': autonumber_size,

697

}

698

FIELD_SIZE_COMPAT_RE = r'(?<!%)%$(?P<field>autonumber|playlist_index)$s'

699

mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)

700

if mobj:

701

outtmpl = re.sub(

702

FIELD_SIZE_COMPAT_RE,

703

r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],

704

outtmpl)

705

706

# Missing numeric fields used together with integer presentation types

707

# in format specification will break the argument substitution since

708

# string 'NA' is returned for missing fields. We will patch output

709

# template for missing fields to meet string presentation type.

710

for numeric_field in self._NUMERIC_FIELDS:

711

if numeric_field not in template_dict:

712

# As of [1] format syntax is:

713

# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type

714

# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting

FORMAT_RE = r'''(?x)

(?<!%)

%

${0}$ # mapping key

(?:[#0\-+ ]+)? # conversion flags (optional)

720

(?:\d+)? # minimum field width (optional)

721

(?:\.\d+)? # precision (optional)

722

[hlL]? # length modifier (optional)

723

[diouxXeEfFgGcrs%] # conversion type

724

'''

725

outtmpl = re.sub(

726

FORMAT_RE.format(numeric_field),

727

r'%({0})s'.format(numeric_field), outtmpl)

728

729

# expand_path translates '%%' into '%' and '$$' into '$'

730

# correspondingly that is not what we want since we need to keep

731

# '%%' intact for template dict substitution step. Working around

732

# with boundary-alike separator hack.

733

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

734

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

735

736

# outtmpl should be expand_path'ed before template dict substitution

737

# because meta fields may contain env variables we don't want to

738

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

739

# title "Hello $PATH", we don't want `$PATH` to be expanded.

740

filename = expand_path(outtmpl).replace(sep, '') % template_dict

741

742

# https://github.com/blackjack4494/youtube-dlc/issues/85

743

trim_file_name = self.params.get('trim_file_name', False)

744

if trim_file_name:

745

fn_groups = filename.rsplit('.')

746

ext = fn_groups[-1]

747

sub_ext = ''

748

if len(fn_groups) > 2:

749

sub_ext = fn_groups[-2]

750

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

751

752

# Temporary fix for #4787

753

# 'Treat' all problem characters by passing filename through preferredencoding

754

# to workaround encoding issues with subprocess on python2 @ Windows

755

if sys.version_info < (3, 0) and sys.platform == 'win32':

756

filename = encodeFilename(filename, True).decode(preferredencoding())

757

return sanitize_path(filename)

758

except ValueError as err:

759

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

760

return None

761

762

def _match_entry(self, info_dict, incomplete):

763

""" Returns None if the file should be downloaded """

764

765

video_title = info_dict.get('title', info_dict.get('id', 'video'))

766

if 'title' in info_dict:

767

# This can happen when we're just evaluating the playlist

768

title = info_dict['title']

769

matchtitle = self.params.get('matchtitle', False)

770

if matchtitle:

771

if not re.search(matchtitle, title, re.IGNORECASE):

772

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

773

rejecttitle = self.params.get('rejecttitle', False)

774

if rejecttitle:

775

if re.search(rejecttitle, title, re.IGNORECASE):

776

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

777

date = info_dict.get('upload_date')

778

if date is not None:

779

dateRange = self.params.get('daterange', DateRange())

780

if date not in dateRange:

781

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

782

view_count = info_dict.get('view_count')

783

if view_count is not None:

784

min_views = self.params.get('min_views')

785

if min_views is not None and view_count < min_views:

786

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

787

max_views = self.params.get('max_views')

788

if max_views is not None and view_count > max_views:

789

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

790

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

791

return 'Skipping "%s" because it is age restricted' % video_title

792

if self.in_download_archive(info_dict):

793

return '%s has already been recorded in archive' % video_title

794

795

if not incomplete:

796

match_filter = self.params.get('match_filter')

797

if match_filter is not None:

798

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

@staticmethod

def add_extra_info(info_dict, extra_info):

806

'''Set the keys from extra_info in info dict if they are missing'''

807

for key, value in extra_info.items():

808

info_dict.setdefault(key, value)

809

810

def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},

811

process=True, force_generic_extractor=False):

812

'''

813

Returns a list with a dictionary for each video we find.

814

If 'download', also downloads the videos.

815

extra_info is a dict containing the extra values to add to each result

816

'''

817

818

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

continue

ie_key = ie.ie_key()

ie = self.get_info_extractor(ie_key)

832

if not ie.working():

833

self.report_warning('The program functionality for this site has been marked as broken, '

834

'and will probably not work.')

835

836

try:

837

temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)

838

except (AssertionError, IndexError, AttributeError):

839

temp_id = None

840

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

841

self.to_screen("[%s] %s: has already been recorded in archive" % (

ie_key, temp_id))

break

return self.__extract_info(url, ie, download, extra_info, process, info_dict)

846

847

else:

848

self.report_error('no suitable InfoExtractor for URL %s' % url)

849

850

def __handle_extraction_exceptions(func):

851

def wrapper(self, *args, **kwargs):

852

try:

853

return func(self, *args, **kwargs)

854

except GeoRestrictedError as e:

855

msg = e.msg

856

if e.countries:

857

msg += '\nThis video is available in %s.' % ', '.join(

858

map(ISO3166Utils.short2full, e.countries))

859

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

860

self.report_error(msg)

861

except ExtractorError as e: # An error we somewhat expected

862

self.report_error(compat_str(e), e.format_traceback())

863

except MaxDownloadsReached:

864

raise

865

except Exception as e:

866

if self.params.get('ignoreerrors', False):

867

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

873

def __extract_info(self, url, ie, download, extra_info, process, info_dict):

874

ie_result = ie.extract(url)

875

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

876

return

877

if isinstance(ie_result, list):

878

# Backwards compatibility: old IE result format

879

ie_result = {

880

'_type': 'compat_list',

881

'entries': ie_result,

882

}

883

if info_dict:

884

if info_dict.get('id'):

885

ie_result['id'] = info_dict['id']

886

if info_dict.get('title'):

887

ie_result['title'] = info_dict['title']

888

self.add_default_extra_info(ie_result, ie, url)

889

if process:

890

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

895

self.add_extra_info(ie_result, {

896

'extractor': ie.IE_NAME,

897

'webpage_url': url,

898

'webpage_url_basename': url_basename(url),

899

'extractor_key': ie.ie_key(),

900

})

901

902

def process_ie_result(self, ie_result, download=True, extra_info={}):

903

"""

904

Take the result of the ie(may be modified) and resolve all unresolved

905

references (URLs, playlist items).

906

907

It will also download the videos if 'download'.

908

Returns the resolved ie_result.

909

"""

910

result_type = ie_result.get('_type', 'video')

911

912

if result_type in ('url', 'url_transparent'):

913

ie_result['url'] = sanitize_url(ie_result['url'])

914

extract_flat = self.params.get('extract_flat', False)

915

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

916

or extract_flat is True):

917

self.__forced_printings(

918

ie_result, self.prepare_filename(ie_result),

incomplete=True)

return ie_result

if result_type == 'video':

923

self.add_extra_info(ie_result, extra_info)

924

return self.process_video_result(ie_result, download=download)

925

elif result_type == 'url':

926

# We have to add extra_info to the results because it may be

927

# contained in a playlist

928

return self.extract_info(ie_result['url'],

929

download, info_dict=ie_result,

930

ie_key=ie_result.get('ie_key'),

931

extra_info=extra_info)

932

elif result_type == 'url_transparent':

933

# Use the information from the embedding page

934

info = self.extract_info(

935

ie_result['url'], ie_key=ie_result.get('ie_key'),

936

extra_info=extra_info, download=False, process=False)

937

938

# extract_info may return None when ignoreerrors is enabled and

939

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

945

(k, v) for k, v in ie_result.items() if v is not None)

946

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

947

if f in force_properties:

948

del force_properties[f]

949

new_result = info.copy()

950

new_result.update(force_properties)

951

952

# Extracted info may not be a video result (i.e.

953

# info.get('_type', 'video') != video) but rather an url or

954

# url_transparent. In such cases outer metadata (from ie_result)

955

# should be propagated to inner one (info). For this to happen

956

# _type of info should be overridden with url_transparent. This

957

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

958

if new_result.get('_type') == 'url':

959

new_result['_type'] = 'url_transparent'

960

961

return self.process_ie_result(

962

new_result, download=download, extra_info=extra_info)

963

elif result_type in ('playlist', 'multi_video'):

964

# We process each entry in the playlist

965

playlist = ie_result.get('title') or ie_result.get('id')

966

self.to_screen('[download] Downloading playlist: %s' % playlist)

967

968

playlist_results = []

969

970

playliststart = self.params.get('playliststart', 1) - 1

971

playlistend = self.params.get('playlistend')

972

# For backwards compatibility, interpret -1 as whole list

973

if playlistend == -1:

974

playlistend = None

975

976

playlistitems_str = self.params.get('playlist_items')

977

playlistitems = None

978

if playlistitems_str is not None:

979

def iter_playlistitems(format):

980

for string_segment in format.split(','):

981

if '-' in string_segment:

982

start, end = string_segment.split('-')

983

for item in range(int(start), int(end) + 1):

984

yield int(item)

985

else:

986

yield int(string_segment)

987

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

988

989

ie_entries = ie_result['entries']

990

991

def make_playlistitems_entries(list_ie_entries):

992

num_entries = len(list_ie_entries)

993

return [

994

list_ie_entries[i - 1] for i in playlistitems

995

if -num_entries <= i - 1 < num_entries]

996

997

def report_download(num_entries):

998

self.to_screen(

999

'[%s] playlist %s: Downloading %d videos' %

1000

(ie_result['extractor'], playlist, num_entries))

1001

1002

if isinstance(ie_entries, list):

1003

n_all_entries = len(ie_entries)

1004

if playlistitems:

1005

entries = make_playlistitems_entries(ie_entries)

1006

else:

1007

entries = ie_entries[playliststart:playlistend]

1008

n_entries = len(entries)

1009

self.to_screen(

1010

'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %

1011

(ie_result['extractor'], playlist, n_all_entries, n_entries))

1012

elif isinstance(ie_entries, PagedList):

1013

if playlistitems:

1014

entries = []

1015

for item in playlistitems:

1016

entries.extend(ie_entries.getslice(

item - 1, item

))

else:

entries = ie_entries.getslice(

1021

playliststart, playlistend)

1022

n_entries = len(entries)

1023

report_download(n_entries)

1024

else: # iterable

1025

if playlistitems:

1026

entries = make_playlistitems_entries(list(itertools.islice(

1027

ie_entries, 0, max(playlistitems))))

1028

else:

1029

entries = list(itertools.islice(

1030

ie_entries, playliststart, playlistend))

1031

n_entries = len(entries)

1032

report_download(n_entries)

1033

1034

if self.params.get('playlistreverse', False):

1035

entries = entries[::-1]

1036

1037

if self.params.get('playlistrandom', False):

1038

random.shuffle(entries)

1039

1040

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1041

1042

for i, entry in enumerate(entries, 1):

1043

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1044

# This __x_forwarded_for_ip thing is a bit ugly but requires

1045

# minimal changes

1046

if x_forwarded_for:

1047

entry['__x_forwarded_for_ip'] = x_forwarded_for

1048

extra = {

1049

'n_entries': n_entries,

1050

'playlist': playlist,

1051

'playlist_id': ie_result.get('id'),

1052

'playlist_title': ie_result.get('title'),

1053

'playlist_uploader': ie_result.get('uploader'),

1054

'playlist_uploader_id': ie_result.get('uploader_id'),

1055

'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,

1056

'extractor': ie_result['extractor'],

1057

'webpage_url': ie_result['webpage_url'],

1058

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1059

'extractor_key': ie_result['extractor_key'],

1060

}

1061

1062

reason = self._match_entry(entry, incomplete=True)

1063

if reason is not None:

1064

if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):

1065

print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')

1066

break

1067

else:

1068

self.to_screen('[download] ' + reason)

1069

continue

1070

1071

entry_result = self.__process_iterable_entry(entry, download, extra)

1072

# TODO: skip failed (empty) entries?

1073

playlist_results.append(entry_result)

1074

ie_result['entries'] = playlist_results

1075

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1076

return ie_result

1077

elif result_type == 'compat_list':

1078

self.report_warning(

1079

'Extractor %s returned a compat_list result. '

1080

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1087

'webpage_url': ie_result['webpage_url'],

1088

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1089

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1094

self.process_ie_result(_fixup(r), download, extra_info)

1095

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1100

1101

@__handle_extraction_exceptions

1102

def __process_iterable_entry(self, entry, download, extra_info):

1103

return self.process_ie_result(

1104

entry, download=download, extra_info=extra_info)

1105

1106

def _build_format_filter(self, filter_spec):

1107

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1118

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)

1119

\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1120

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)

1121

$

1122

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1123

m = operator_rex.search(filter_spec)

1124

if m:

1125

try:

1126

comparison_value = int(m.group('value'))

1127

except ValueError:

1128

comparison_value = parse_filesize(m.group('value'))

1129

if comparison_value is None:

1130

comparison_value = parse_filesize(m.group('value') + 'B')

1131

if comparison_value is None:

1132

raise ValueError(

1133

'Invalid value %r in format specification %r' % (

1134

m.group('value'), filter_spec))

1135

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1141

'$=': lambda attr, value: attr.endswith(value),

1142

'*=': lambda attr, value: value in attr,

1143

}

1144

str_operator_rex = re.compile(r'''(?x)

1145

1146

\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?

1147

\s*(?P<value>[a-zA-Z0-9._-]+)

1148

\s*$

1149

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1150

m = str_operator_rex.search(filter_spec)

1151

if m:

1152

comparison_value = m.group('value')

1153

str_op = STR_OPERATORS[m.group('op')]

1154

if m.group('negation'):

1155

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise ValueError('Invalid filter specification %r' % filter_spec)

1161

1162

def _filter(f):

1163

actual_value = f.get(m.group('key'))

1164

if actual_value is None:

1165

return m.group('none_inclusive')

1166

return op(actual_value, comparison_value)

1167

return _filter

1168

1169

def _default_format_spec(self, info_dict, download=True):

1170

1171

def can_merge():

1172

merger = FFmpegMergerPP(self)

1173

return merger.available and merger.can_merge()

1174

1175

def prefer_best():

1176

if self.params.get('simulate', False):

return False

if not download:

return False

if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':

1181

return True

1182

if info_dict.get('is_live'):

return True

if not can_merge():

return True

return False

req_format_list = ['bestvideo+bestaudio', 'best']

1189

if prefer_best():

1190

req_format_list.reverse()

1191

return '/'.join(req_format_list)

1192

1193

def build_format_selector(self, format_spec):

1194

def syntax_error(note, start):

1195

message = (

1196

'Invalid format specification: '

1197

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1198

return SyntaxError(message)

1199

1200

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1205

1206

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),

1207

'video': self.params.get('allow_multiple_video_streams', True)}

1208

1209

def _parse_filter(tokens):

1210

filter_parts = []

1211

for type, string, start, _, _ in tokens:

1212

if type == tokenize.OP and string == ']':

1213

return ''.join(filter_parts)

1214

else:

1215

filter_parts.append(string)

1216

1217

def _remove_unused_ops(tokens):

1218

# Remove operators that we don't use and join them with the surrounding strings

1219

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1220

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1221

last_string, last_start, last_end, last_line = None, None, None, None

1222

for type, string, start, end, line in tokens:

1223

if type == tokenize.OP and string == '[':

1224

if last_string:

1225

yield tokenize.NAME, last_string, last_start, last_end, last_line

1226

last_string = None

1227

yield type, string, start, end, line

1228

# everything inside brackets will be handled by _parse_filter

1229

for type, string, start, end, line in tokens:

1230

yield type, string, start, end, line

1231

if type == tokenize.OP and string == ']':

1232

break

1233

elif type == tokenize.OP and string in ALLOWED_OPS:

1234

if last_string:

1235

yield tokenize.NAME, last_string, last_start, last_end, last_line

1236

last_string = None

1237

yield type, string, start, end, line

1238

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1245

if last_string:

1246

yield tokenize.NAME, last_string, last_start, last_end, last_line

1247

1248

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1249

selectors = []

1250

current_selector = None

1251

for type, string, start, _, _ in tokens:

1252

# ENCODING is only defined in python 3.x

1253

if type == getattr(tokenize, 'ENCODING', None):

1254

continue

1255

elif type in [tokenize.NAME, tokenize.NUMBER]:

1256

current_selector = FormatSelector(SINGLE, string, [])

1257

elif type == tokenize.OP:

1258

if string == ')':

1259

if not inside_group:

1260

# ')' will be handled by the parentheses group

1261

tokens.restore_last_token()

1262

break

1263

elif inside_merge and string in ['/', ',']:

1264

tokens.restore_last_token()

1265

break

1266

elif inside_choice and string == ',':

1267

tokens.restore_last_token()

1268

break

1269

elif string == ',':

1270

if not current_selector:

1271

raise syntax_error('"," must follow a format selector', start)

1272

selectors.append(current_selector)

1273

current_selector = None

1274

elif string == '/':

1275

if not current_selector:

1276

raise syntax_error('"/" must follow a format selector', start)

1277

first_choice = current_selector

1278

second_choice = _parse_format_selection(tokens, inside_choice=True)

1279

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1280

elif string == '[':

1281

if not current_selector:

1282

current_selector = FormatSelector(SINGLE, 'best', [])

1283

format_filter = _parse_filter(tokens)

1284

current_selector.filters.append(format_filter)

1285

elif string == '(':

1286

if current_selector:

1287

raise syntax_error('Unexpected "("', start)

1288

group = _parse_format_selection(tokens, inside_group=True)

1289

current_selector = FormatSelector(GROUP, group, [])

1290

elif string == '+':

1291

if not current_selector:

1292

raise syntax_error('Unexpected "+"', start)

1293

selector_1 = current_selector

1294

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1295

if not selector_2:

1296

raise syntax_error('Expected a selector', start)

1297

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1298

else:

1299

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1300

elif type == tokenize.ENDMARKER:

1301

break

1302

if current_selector:

1303

selectors.append(current_selector)

1304

return selectors

1305

1306

def _build_selector_function(selector):

1307

if isinstance(selector, list): # ,

1308

fs = [_build_selector_function(s) for s in selector]

1309

1310

def selector_function(ctx):

1311

for f in fs:

1312

for format in f(ctx):

1313

yield format

1314

return selector_function

1315

1316

elif selector.type == GROUP: # ()

1317

selector_function = _build_selector_function(selector.selector)

1318

1319

elif selector.type == PICKFIRST: # /

1320

fs = [_build_selector_function(s) for s in selector.selector]

1321

1322

def selector_function(ctx):

1323

for f in fs:

1324

picked_formats = list(f(ctx))

1325

if picked_formats:

1326

return picked_formats

1327

return []

1328

1329

elif selector.type == SINGLE: # atom

1330

format_spec = selector.selector if selector.selector is not None else 'best'

1331

1332

if format_spec == 'all':

1333

def selector_function(ctx):

1334

formats = list(ctx['formats'])

if formats:

for f in formats:

yield f

else:

format_fallback = False

1341

1342

if format_spec_obj is not None:

1343

format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1

1344

format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False

1345

not_format_type = 'v' if format_type == 'a' else 'a'

1346

format_modified = format_spec_obj.group(3) is not None

1347

1348

format_fallback = not format_type and not format_modified # for b, w

1349

filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')

1350

if format_type and format_modified # bv*, ba*, wv*, wa*

1351

else (lambda f: f.get(not_format_type + 'codec') == 'none')

1352

if format_type # bv, ba, wv, wa

1353

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1354

if not format_modified # b, w

else None) # b*, w*

else:

format_idx = -1

filter_f = ((lambda f: f.get('ext') == format_spec)

1359

if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension

1360

else (lambda f: f.get('format_id') == format_spec)) # id

1361

1362

def selector_function(ctx):

1363

formats = list(ctx['formats'])

1364

if not formats:

1365

return

1366

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1367

if matches:

1368

yield matches[format_idx]

1369

elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):

1370

# for extractors with incomplete formats (audio only (soundcloud)

1371

# or video only (imgur)) best/worst will fallback to

1372

# best/worst {video,audio}-only format

1373

yield formats[format_idx]

1374

1375

elif selector.type == MERGE: # +

1376

def _merge(formats_pair):

1377

format_1, format_2 = formats_pair

1378

1379

formats_info = []

1380

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1381

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1382

1383

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1384

get_no_more = {"video": False, "audio": False}

1385

for (i, fmt_info) in enumerate(formats_info):

1386

for aud_vid in ["audio", "video"]:

1387

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1388

if get_no_more[aud_vid]:

1389

formats_info.pop(i)

1390

get_no_more[aud_vid] = True

1391

1392

if len(formats_info) == 1:

1393

return formats_info[0]

1394

1395

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1396

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1397

1398

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1399

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1400

1401

output_ext = self.params.get('merge_output_format')

1402

if not output_ext:

1403

if the_only_video:

1404

output_ext = the_only_video['ext']

1405

elif the_only_audio and not video_fmts:

1406

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1412

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1413

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1420

'height': the_only_video.get('height'),

1421

'resolution': the_only_video.get('resolution'),

1422

'fps': the_only_video.get('fps'),

1423

'vcodec': the_only_video.get('vcodec'),

1424

'vbr': the_only_video.get('vbr'),

1425

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1431

'abr': the_only_audio.get('abr'),

})

return new_dict

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1437

1438

def selector_function(ctx):

1439

for pair in itertools.product(

1440

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1441

yield _merge(pair)

1442

1443

filters = [self._build_format_filter(f) for f in selector.filters]

1444

1445

def final_selector(ctx):

1446

ctx_copy = copy.deepcopy(ctx)

1447

for _filter in filters:

1448

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1449

return selector_function(ctx_copy)

1450

return final_selector

1451

1452

stream = io.BytesIO(format_spec.encode('utf-8'))

1453

try:

1454

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1455

except tokenize.TokenError:

1456

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1457

1458

class TokenIterator(object):

1459

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1468

raise StopIteration()

1469

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1476

self.counter -= 1

1477

1478

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1479

return _build_selector_function(parsed_selector)

1480

1481

def _calc_headers(self, info_dict):

1482

res = std_headers.copy()

1483

1484

add_headers = info_dict.get('http_headers')

1485

if add_headers:

1486

res.update(add_headers)

1487

1488

cookies = self._calc_cookies(info_dict)

1489

if cookies:

1490

res['Cookie'] = cookies

1491

1492

if 'X-Forwarded-For' not in res:

1493

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1494

if x_forwarded_for_ip:

1495

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1500

pr = sanitized_Request(info_dict['url'])

1501

self.cookiejar.add_cookie_header(pr)

1502

return pr.get_header('Cookie')

1503

1504

def process_video_result(self, info_dict, download=True):

1505

assert info_dict.get('_type', 'video') == 'video'

1506

1507

if 'id' not in info_dict:

1508

raise ExtractorError('Missing "id" field in extractor result')

1509

if 'title' not in info_dict:

1510

raise ExtractorError('Missing "title" field in extractor result')

1511

1512

def report_force_conversion(field, field_not, conversion):

1513

self.report_warning(

1514

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1515

% (field, field_not, conversion))

1516

1517

def sanitize_string_field(info, string_field):

1518

field = info.get(string_field)

1519

if field is None or isinstance(field, compat_str):

1520

return

1521

report_force_conversion(string_field, 'a string', 'string')

1522

info[string_field] = compat_str(field)

1523

1524

def sanitize_numeric_fields(info):

1525

for numeric_field in self._NUMERIC_FIELDS:

1526

field = info.get(numeric_field)

1527

if field is None or isinstance(field, compat_numeric_types):

1528

continue

1529

report_force_conversion(numeric_field, 'numeric', 'int')

1530

info[numeric_field] = int_or_none(field)

1531

1532

sanitize_string_field(info_dict, 'id')

1533

sanitize_numeric_fields(info_dict)

1534

1535

if 'playlist' not in info_dict:

1536

# It isn't part of a playlist

1537

info_dict['playlist'] = None

1538

info_dict['playlist_index'] = None

1539

1540

thumbnails = info_dict.get('thumbnails')

1541

if thumbnails is None:

1542

thumbnail = info_dict.get('thumbnail')

1543

if thumbnail:

1544

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1545

if thumbnails:

1546

thumbnails.sort(key=lambda t: (

1547

t.get('preference') if t.get('preference') is not None else -1,

1548

t.get('width') if t.get('width') is not None else -1,

1549

t.get('height') if t.get('height') is not None else -1,

1550

t.get('id') if t.get('id') is not None else '', t.get('url')))

1551

for i, t in enumerate(thumbnails):

1552

t['url'] = sanitize_url(t['url'])

1553

if t.get('width') and t.get('height'):

1554

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1555

if t.get('id') is None:

1556

t['id'] = '%d' % i

1557

1558

if self.params.get('list_thumbnails'):

1559

self.list_thumbnails(info_dict)

1560

return

1561

1562

thumbnail = info_dict.get('thumbnail')

1563

if thumbnail:

1564

info_dict['thumbnail'] = sanitize_url(thumbnail)

1565

elif thumbnails:

1566

info_dict['thumbnail'] = thumbnails[-1]['url']

1567

1568

if 'display_id' not in info_dict and 'id' in info_dict:

1569

info_dict['display_id'] = info_dict['id']

1570

1571

if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:

1572

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

1573

# see http://bugs.python.org/issue1646728)

1574

try:

1575

upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])

1576

info_dict['upload_date'] = upload_date.strftime('%Y%m%d')

1577

except (ValueError, OverflowError, OSError):

1578

pass

1579

1580

# Auto generate title fields corresponding to the *_number fields when missing

1581

# in order to always have clean titles. This is very common for TV series.

1582

for field in ('chapter', 'season', 'episode'):

1583

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

1584

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

1585

1586

for cc_kind in ('subtitles', 'automatic_captions'):

1587

cc = info_dict.get(cc_kind)

1588

if cc:

1589

for _, subtitle in cc.items():

1590

for subtitle_format in subtitle:

1591

if subtitle_format.get('url'):

1592

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

1593

if subtitle_format.get('ext') is None:

1594

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

1595

1596

automatic_captions = info_dict.get('automatic_captions')

1597

subtitles = info_dict.get('subtitles')

1598

1599

if self.params.get('listsubtitles', False):

1600

if 'automatic_captions' in info_dict:

1601

self.list_subtitles(

1602

info_dict['id'], automatic_captions, 'automatic captions')

1603

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

1604

return

1605

1606

info_dict['requested_subtitles'] = self.process_subtitles(

1607

info_dict['id'], subtitles, automatic_captions)

1608

1609

# We now pick which formats have to be downloaded

1610

if info_dict.get('formats') is None:

1611

# There's only one format available

1612

formats = [info_dict]

1613

else:

1614

formats = info_dict['formats']

1615

1616

if not formats:

1617

raise ExtractorError('No video formats found!')

1618

1619

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

1624

'there is an error in extractor')

1625

return False

1626

if isinstance(url, bytes):

1627

sanitize_string_field(f, 'url')

1628

return True

1629

1630

# Filter out malformed formats for better extraction robustness

1631

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

1636

for i, format in enumerate(formats):

1637

sanitize_string_field(format, 'format_id')

1638

sanitize_numeric_fields(format)

1639

format['url'] = sanitize_url(format['url'])

1640

if not format.get('format_id'):

1641

format['format_id'] = compat_str(i)

1642

else:

1643

# Sanitize format_id from characters used in format selector expression

1644

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

1645

format_id = format['format_id']

1646

if format_id not in formats_dict:

1647

formats_dict[format_id] = []

1648

formats_dict[format_id].append(format)

1649

1650

# Make sure all formats have unique format_id

1651

for format_id, ambiguous_formats in formats_dict.items():

1652

if len(ambiguous_formats) > 1:

1653

for i, format in enumerate(ambiguous_formats):

1654

format['format_id'] = '%s-%d' % (format_id, i)

1655

1656

for i, format in enumerate(formats):

1657

if format.get('format') is None:

1658

format['format'] = '{id} - {res}{note}'.format(

1659

id=format['format_id'],

1660

res=self.format_resolution(format),

1661

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

1662

)

1663

# Automatically determine file extension if missing

1664

if format.get('ext') is None:

1665

format['ext'] = determine_ext(format['url']).lower()

1666

# Automatically determine protocol if missing (useful for format

1667

# selection purposes)

1668

if format.get('protocol') is None:

1669

format['protocol'] = determine_protocol(format)

1670

# Add HTTP headers, so that external programs can use them from the

1671

# json output

1672

full_format_info = info_dict.copy()

1673

full_format_info.update(format)

1674

format['http_headers'] = self._calc_headers(full_format_info)

1675

# Remove private housekeeping stuff

1676

if '__x_forwarded_for_ip' in info_dict:

1677

del info_dict['__x_forwarded_for_ip']

1678

1679

# TODO Central sorting goes here

1680

1681

if formats[0] is not info_dict:

1682

# only set the 'formats' fields if the original info_dict list them

1683

# otherwise we end up with a circular reference, the first (and unique)

1684

# element in the 'formats' field in info_dict is info_dict itself,

1685

# which can't be exported to json

1686

info_dict['formats'] = formats

1687

if self.params.get('listformats'):

1688

self.list_formats(info_dict)

1689

return

1690

1691

req_format = self.params.get('format')

1692

if req_format is None:

1693

req_format = self._default_format_spec(info_dict, download=download)

1694

if self.params.get('verbose'):

1695

self.to_stdout('[debug] Default format spec: %s' % req_format)

1696

1697

format_selector = self.build_format_selector(req_format)

1698

1699

# While in format selection we may need to have an access to the original

1700

# format set in order to calculate some metrics or do some processing.

1701

# For now we need to be able to guess whether original formats provided

1702

# by extractor are incomplete or not (i.e. whether extractor provides only

1703

# video-only or audio-only formats) for proper formats selection for

1704

# extractors with such incomplete formats (see

1705

# https://github.com/ytdl-org/youtube-dl/pull/5556).

1706

# Since formats may be filtered during format selection and may not match

1707

# the original formats the results may be incorrect. Thus original formats

1708

# or pre-calculated metrics should be passed to format selection routines

1709

# as well.

1710

# We will pass a context object containing all necessary additional data

1711

# instead of just formats.

1712

# This fixes incorrect format selection issue (see

1713

# https://github.com/ytdl-org/youtube-dl/issues/10083).

1714

incomplete_formats = (

1715

# All formats are video-only or

1716

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

1717

# all formats are audio-only

1718

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

1723

}

1724

1725

formats_to_download = list(format_selector(ctx))

1726

if not formats_to_download:

1727

raise ExtractorError('requested format not available',

expected=True)

if download:

self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))

1732

if len(formats_to_download) > 1:

1733

self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))

1734

for format in formats_to_download:

1735

new_info = dict(info_dict)

1736

new_info.update(format)

1737

self.process_info(new_info)

1738

# We update the info dict with the best quality format (backwards compatibility)

1739

info_dict.update(formats_to_download[-1])

1740

return info_dict

1741

1742

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

1743

"""Select the requested subtitles and their format"""

1744

available_subs = {}

1745

if normal_subtitles and self.params.get('writesubtitles'):

1746

available_subs.update(normal_subtitles)

1747

if automatic_captions and self.params.get('writeautomaticsub'):

1748

for lang, cap_info in automatic_captions.items():

1749

if lang not in available_subs:

1750

available_subs[lang] = cap_info

1751

1752

if (not self.params.get('writesubtitles') and not

1753

self.params.get('writeautomaticsub') or not

available_subs):

return None

if self.params.get('allsubtitles', False):

1758

requested_langs = available_subs.keys()

1759

else:

1760

if self.params.get('subtitleslangs', False):

1761

requested_langs = self.params.get('subtitleslangs')

1762

elif 'en' in available_subs:

1763

requested_langs = ['en']

1764

else:

1765

requested_langs = [list(available_subs.keys())[0]]

1766

1767

formats_query = self.params.get('subtitlesformat', 'best')

1768

formats_preference = formats_query.split('/') if formats_query else []

1769

subs = {}

1770

for lang in requested_langs:

1771

formats = available_subs.get(lang)

1772

if formats is None:

1773

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

1774

continue

1775

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

1787

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

1792

def print_mandatory(field):

1793

if (self.params.get('force%s' % field, False)

1794

and (not incomplete or info_dict.get(field) is not None)):

1795

self.to_stdout(info_dict[field])

1796

1797

def print_optional(field):

1798

if (self.params.get('force%s' % field, False)

1799

and info_dict.get(field) is not None):

1800

self.to_stdout(info_dict[field])

1801

1802

print_mandatory('title')

1803

print_mandatory('id')

1804

if self.params.get('forceurl', False) and not incomplete:

1805

if info_dict.get('requested_formats') is not None:

1806

for f in info_dict['requested_formats']:

1807

self.to_stdout(f['url'] + f.get('play_path', ''))

1808

else:

1809

# For RTMP URLs, also include the playpath

1810

self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))

1811

print_optional('thumbnail')

1812

print_optional('description')

1813

if self.params.get('forcefilename', False) and filename is not None:

1814

self.to_stdout(filename)

1815

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

1816

self.to_stdout(formatSeconds(info_dict['duration']))

1817

print_mandatory('format')

1818

if self.params.get('forcejson', False):

1819

self.to_stdout(json.dumps(info_dict))

1820

1821

def process_info(self, info_dict):

1822

"""Process a single resolved IE result."""

1823

1824

assert info_dict.get('_type', 'video') == 'video'

1825

1826

max_downloads = self.params.get('max_downloads')

1827

if max_downloads is not None:

1828

if self._num_downloads >= int(max_downloads):

1829

raise MaxDownloadsReached()

1830

1831

# TODO: backward compatibility, to be removed

1832

info_dict['fulltitle'] = info_dict['title']

1833

1834

if 'format' not in info_dict:

1835

info_dict['format'] = info_dict['ext']

1836

1837

reason = self._match_entry(info_dict, incomplete=False)

1838

if reason is not None:

1839

self.to_screen('[download] ' + reason)

1840

return

1841

1842

self._num_downloads += 1

1843

1844

info_dict['_filename'] = filename = self.prepare_filename(info_dict)

1845

1846

# Forced printings

1847

self.__forced_printings(info_dict, filename, incomplete=False)

1848

1849

# Do nothing else if in simulate mode

1850

if self.params.get('simulate', False):

return

if filename is None:

return

def ensure_dir_exists(path):

1857

try:

1858

dn = os.path.dirname(path)

1859

if dn and not os.path.exists(dn):

1860

os.makedirs(dn)

1861

return True

1862

except (OSError, IOError) as err:

1863

self.report_error('unable to create directory ' + error_to_compat_str(err))

1864

return False

1865

1866

if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):

1867

return

1868

1869

if self.params.get('writedescription', False):

1870

descfn = replace_extension(filename, 'description', info_dict.get('ext'))

1871

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):

1872

self.to_screen('[info] Video description is already present')

1873

elif info_dict.get('description') is None:

1874

self.report_warning('There\'s no description to write.')

1875

else:

1876

try:

1877

self.to_screen('[info] Writing video description to: ' + descfn)

1878

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1879

descfile.write(info_dict['description'])

1880

except (OSError, IOError):

1881

self.report_error('Cannot write description file ' + descfn)

1882

return

1883

1884

if self.params.get('writeannotations', False):

1885

annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))

1886

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):

1887

self.to_screen('[info] Video annotations are already present')

1888

elif not info_dict.get('annotations'):

1889

self.report_warning('There are no annotations to write.')

1890

else:

1891

try:

1892

self.to_screen('[info] Writing video annotations to: ' + annofn)

1893

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

1894

annofile.write(info_dict['annotations'])

1895

except (KeyError, TypeError):

1896

self.report_warning('There are no annotations to write.')

1897

except (OSError, IOError):

1898

self.report_error('Cannot write annotations file: ' + annofn)

1899

return

1900

1901

def dl(name, info, subtitle=False):

1902

fd = get_suitable_downloader(info, self.params)(self, self.params)

1903

for ph in self._progress_hooks:

1904

fd.add_progress_hook(ph)

1905

if self.params.get('verbose'):

1906

self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))

1907

return fd.download(name, info, subtitle)

1908

1909

subtitles_are_requested = any([self.params.get('writesubtitles', False),

1910

self.params.get('writeautomaticsub')])

1911

1912

if subtitles_are_requested and info_dict.get('requested_subtitles'):

1913

# subtitles download errors are already managed as troubles in relevant IE

1914

# that way it will silently go on when used with unsupporting IE

1915

subtitles = info_dict['requested_subtitles']

1916

# ie = self.get_info_extractor(info_dict['extractor_key'])

1917

for sub_lang, sub_info in subtitles.items():

1918

sub_format = sub_info['ext']

1919

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

1920

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):

1921

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

1922

else:

1923

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

1924

if sub_info.get('data') is not None:

1925

try:

1926

# Use newline='' to prevent conversion of newline characters

1927

# See https://github.com/ytdl-org/youtube-dl/issues/10268

1928

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

1929

subfile.write(sub_info['data'])

1930

except (OSError, IOError):

1931

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

dl(sub_filename, sub_info, subtitle=True)

1936

'''

1937

if self.params.get('sleep_interval_subtitles', False):

1938

dl(sub_filename, sub_info)

1939

else:

1940

sub_data = ie._request_webpage(

1941

sub_info['url'], info_dict['id'], note=False).read()

1942

with io.open(encodeFilename(sub_filename), 'wb') as subfile:

1943

subfile.write(sub_data)

1944

'''

1945

except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

1946

self.report_warning('Unable to download subtitle for "%s": %s' %

1947

(sub_lang, error_to_compat_str(err)))

1948

continue

1949

1950

if self.params.get('skip_download', False):

1951

if self.params.get('convertsubtitles', False):

1952

subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))

1953

filename_real_ext = os.path.splitext(filename)[1][1:]

1954

filename_wo_ext = (

1955

os.path.splitext(filename)[0]

1956

if filename_real_ext == info_dict['ext']

1957

else filename)

1958

afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))

1959

if subconv.available:

1960

info_dict.setdefault('__postprocessors', [])

1961

# info_dict['__postprocessors'].append(subconv)

1962

if os.path.exists(encodeFilename(afilename)):

1963

self.to_screen(

1964

'[download] %s has already been downloaded and '

1965

'converted' % afilename)

1966

else:

1967

try:

1968

self.post_process(filename, info_dict)

1969

except (PostProcessingError) as err:

1970

self.report_error('postprocessing: %s' % str(err))

1971

return

1972

1973

if self.params.get('writeinfojson', False):

1974

infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))

1975

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):

1976

self.to_screen('[info] Video description metadata is already present')

1977

else:

1978

self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)

1979

try:

1980

write_json_file(self.filter_requested_info(info_dict), infofn)

1981

except (OSError, IOError):

1982

self.report_error('Cannot write metadata to JSON file ' + infofn)

1983

return

1984

1985

self._write_thumbnails(info_dict, filename)

1986

1987

if not self.params.get('skip_download', False):

1988

try:

1989

if info_dict.get('requested_formats') is not None:

1990

downloaded = []

1991

success = True

1992

merger = FFmpegMergerPP(self)

1993

if not merger.available:

1994

postprocessors = []

1995

self.report_warning('You have requested multiple '

1996

'formats but ffmpeg or avconv are not installed.'

1997

' The formats won\'t be merged.')

1998

else:

1999

postprocessors = [merger]

2000

2001

def compatible_formats(formats):

2002

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2003

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2004

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2005

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2010

COMPATIBLE_EXTS = (

2011

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2012

set(('webm',)),

2013

)

2014

for ext_sets in COMPATIBLE_EXTS:

2015

if ext_sets.issuperset(exts):

2016

return True

2017

# TODO: Check acodec/vcodec

2018

return False

2019

2020

filename_real_ext = os.path.splitext(filename)[1][1:]

2021

filename_wo_ext = (

2022

os.path.splitext(filename)[0]

2023

if filename_real_ext == info_dict['ext']

2024

else filename)

2025

requested_formats = info_dict['requested_formats']

2026

if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):

2027

info_dict['ext'] = 'mkv'

2028

self.report_warning(

2029

'Requested formats are incompatible for merge and will be merged into mkv.')

2030

# Ensure filename always has a correct extension for successful merge

2031

filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])

2032

if os.path.exists(encodeFilename(filename)):

2033

self.to_screen(

2034

'[download] %s has already been downloaded and '

2035

'merged' % filename)

2036

else:

2037

for f in requested_formats:

2038

new_info = dict(info_dict)

2039

new_info.update(f)

2040

fname = prepend_extension(

2041

self.prepare_filename(new_info),

2042

'f%s' % f['format_id'], new_info['ext'])

2043

if not ensure_dir_exists(fname):

2044

return

2045

downloaded.append(fname)

2046

partial_success = dl(fname, new_info)

2047

success = success and partial_success

2048

info_dict['__postprocessors'] = postprocessors

2049

info_dict['__files_to_merge'] = downloaded

2050

else:

2051

# Just a single file

2052

success = dl(filename, info_dict)

2053

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2054

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2055

return

2056

except (OSError, IOError) as err:

2057

raise UnavailableVideoError(err)

2058

except (ContentTooShortError, ) as err:

2059

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2060

return

2061

2062

if success and filename != '-':

2063

# Fixup content

2064

fixup_policy = self.params.get('fixup')

2065

if fixup_policy is None:

2066

fixup_policy = 'detect_or_warn'

2067

2068

INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'

2069

2070

stretched_ratio = info_dict.get('stretched_ratio')

2071

if stretched_ratio is not None and stretched_ratio != 1:

2072

if fixup_policy == 'warn':

2073

self.report_warning('%s: Non-uniform pixel ratio (%s)' % (

2074

info_dict['id'], stretched_ratio))

2075

elif fixup_policy == 'detect_or_warn':

2076

stretched_pp = FFmpegFixupStretchedPP(self)

2077

if stretched_pp.available:

2078

info_dict.setdefault('__postprocessors', [])

2079

info_dict['__postprocessors'].append(stretched_pp)

2080

else:

2081

self.report_warning(

2082

'%s: Non-uniform pixel ratio (%s). %s'

2083

% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))

2084

else:

2085

assert fixup_policy in ('ignore', 'never')

2086

2087

if (info_dict.get('requested_formats') is None

2088

and info_dict.get('container') == 'm4a_dash'):

2089

if fixup_policy == 'warn':

2090

self.report_warning(

2091

'%s: writing DASH m4a. '

2092

'Only some players support this container.'

2093

% info_dict['id'])

2094

elif fixup_policy == 'detect_or_warn':

2095

fixup_pp = FFmpegFixupM4aPP(self)

2096

if fixup_pp.available:

2097

info_dict.setdefault('__postprocessors', [])

2098

info_dict['__postprocessors'].append(fixup_pp)

2099

else:

2100

self.report_warning(

2101

'%s: writing DASH m4a. '

2102

'Only some players support this container. %s'

2103

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2104

else:

2105

assert fixup_policy in ('ignore', 'never')

2106

2107

if (info_dict.get('protocol') == 'm3u8_native'

2108

or info_dict.get('protocol') == 'm3u8'

2109

and self.params.get('hls_prefer_native')):

2110

if fixup_policy == 'warn':

2111

self.report_warning('%s: malformed AAC bitstream detected.' % (

2112

info_dict['id']))

2113

elif fixup_policy == 'detect_or_warn':

2114

fixup_pp = FFmpegFixupM3u8PP(self)

2115

if fixup_pp.available:

2116

info_dict.setdefault('__postprocessors', [])

2117

info_dict['__postprocessors'].append(fixup_pp)

2118

else:

2119

self.report_warning(

2120

'%s: malformed AAC bitstream detected. %s'

2121

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2122

else:

2123

assert fixup_policy in ('ignore', 'never')

2124

2125

try:

2126

self.post_process(filename, info_dict)

2127

except (PostProcessingError) as err:

2128

self.report_error('postprocessing: %s' % str(err))

2129

return

2130

self.record_download_archive(info_dict)

2131

2132

def download(self, url_list):

2133

"""Download a given list of URLs."""

2134

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

2135

if (len(url_list) > 1

2136

and outtmpl != '-'

2137

and '%' not in outtmpl

2138

and self.params.get('max_downloads') != 1):

2139

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2144

res = self.extract_info(

2145

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2146

except UnavailableVideoError:

2147

self.report_error('unable to download video')

2148

except MaxDownloadsReached:

2149

self.to_screen('[info] Maximum number of downloaded files reached.')

2150

raise

2151

else:

2152

if self.params.get('dump_single_json', False):

2153

self.to_stdout(json.dumps(res))

2154

2155

return self._download_retcode

2156

2157

def download_with_info_file(self, info_filename):

2158

with contextlib.closing(fileinput.FileInput(

2159

[info_filename], mode='r',

2160

openhook=fileinput.hook_encoded('utf-8'))) as f:

2161

# FileInput doesn't have a read method, we can't call json.load

2162

info = self.filter_requested_info(json.loads('\n'.join(f)))

2163

try:

2164

self.process_ie_result(info, download=True)

2165

except DownloadError:

2166

webpage_url = info.get('webpage_url')

2167

if webpage_url is not None:

2168

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2169

return self.download([webpage_url])

2170

else:

2171

raise

2172

return self._download_retcode

2173

2174

@staticmethod

2175

def filter_requested_info(info_dict):

2176

return dict(

2177

(k, v) for k, v in info_dict.items()

2178

if k not in ['requested_formats', 'requested_subtitles'])

2179

2180

def post_process(self, filename, ie_info):

2181

"""Run all the postprocessors on the given file."""

2182

info = dict(ie_info)

2183

info['filepath'] = filename

2184

pps_chain = []

2185

if ie_info.get('__postprocessors') is not None:

2186

pps_chain.extend(ie_info['__postprocessors'])

2187

pps_chain.extend(self._pps)

for pp in pps_chain:

files_to_delete = []

try:

files_to_delete, info = pp.run(info)

2192

except PostProcessingError as e:

2193

self.report_error(e.msg)

2194

if files_to_delete and not self.params.get('keepvideo', False):

2195

for old_filename in set(files_to_delete):

2196

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2197

try:

2198

os.remove(encodeFilename(old_filename))

2199

except (IOError, OSError):

2200

self.report_warning('Unable to remove downloaded original file')

2201

2202

def _make_archive_id(self, info_dict):

2203

video_id = info_dict.get('id')

2204

if not video_id:

2205

return

2206

# Future-proof against any change in case

2207

# and backwards compatibility with prior versions

2208

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

2209

if extractor is None:

2210

url = str_or_none(info_dict.get('url'))

2211

if not url:

2212

return

2213

# Try to find matching extractor for the URL and take its ie_key

2214

for ie in self._ies:

2215

if ie.suitable(url):

2216

extractor = ie.ie_key()

break

else:

return

return extractor.lower() + ' ' + video_id

2221

2222

def in_download_archive(self, info_dict):

2223

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2228

if not vid_id:

2229

return False # Incomplete video information

2230

2231

return vid_id in self.archive

2232

2233

def record_download_archive(self, info_dict):

2234

fn = self.params.get('download_archive')

2235

if fn is None:

2236

return

2237

vid_id = self._make_archive_id(info_dict)

2238

assert vid_id

2239

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2240

archive_file.write(vid_id + '\n')

2241

self.archive.add(vid_id)

2242

2243

@staticmethod

2244

def format_resolution(format, default='unknown'):

2245

if format.get('vcodec') == 'none':

2246

return 'audio only'

2247

if format.get('resolution') is not None:

2248

return format['resolution']

2249

if format.get('height') is not None:

2250

if format.get('width') is not None:

2251

res = '%sx%s' % (format['width'], format['height'])

2252

else:

2253

res = '%sp' % format['height']

2254

elif format.get('width') is not None:

2255

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2261

res = ''

2262

if fdict.get('ext') in ['f4f', 'f4m']:

2263

res += '(unsupported) '

2264

if fdict.get('language'):

2265

if res:

2266

res += ' '

2267

res += '[%s] ' % fdict['language']

2268

if fdict.get('format_note') is not None:

2269

res += fdict['format_note'] + ' '

2270

if fdict.get('tbr') is not None:

2271

res += '%4dk ' % fdict['tbr']

2272

if fdict.get('container') is not None:

2273

if res:

2274

res += ', '

2275

res += '%s container' % fdict['container']

2276

if (fdict.get('vcodec') is not None

2277

and fdict.get('vcodec') != 'none'):

2278

if res:

2279

res += ', '

2280

res += fdict['vcodec']

2281

if fdict.get('vbr') is not None:

2282

res += '@'

2283

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2284

res += 'video@'

2285

if fdict.get('vbr') is not None:

2286

res += '%4dk' % fdict['vbr']

2287

if fdict.get('fps') is not None:

2288

if res:

2289

res += ', '

2290

res += '%sfps' % fdict['fps']

2291

if fdict.get('acodec') is not None:

2292

if res:

2293

res += ', '

2294

if fdict['acodec'] == 'none':

2295

res += 'video only'

2296

else:

2297

res += '%-5s' % fdict['acodec']

2298

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2303

res += '@%3dk' % fdict['abr']

2304

if fdict.get('asr') is not None:

2305

res += ' (%5dHz)' % fdict['asr']

2306

if fdict.get('filesize') is not None:

2307

if res:

2308

res += ', '

2309

res += format_bytes(fdict['filesize'])

2310

elif fdict.get('filesize_approx') is not None:

2311

if res:

2312

res += ', '

2313

res += '~' + format_bytes(fdict['filesize_approx'])

2314

return res

2315

2316

def list_formats(self, info_dict):

2317

formats = info_dict.get('formats', [info_dict])

2318

table = [

2319

[f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]

2320

for f in formats

2321

if f.get('preference') is None or f['preference'] >= -1000]

2322

# if len(formats) > 1:

2323

# table[-1][-1] += (' ' if table[-1][-1] else '') + '(best*)'

2324

2325

header_line = ['format code', 'extension', 'resolution', 'note']

2326

self.to_screen(

2327

'[info] Available formats for %s:\n%s' %

2328

(info_dict['id'], render_table(header_line, table)))

2329

2330

def list_thumbnails(self, info_dict):

2331

thumbnails = info_dict.get('thumbnails')

2332

if not thumbnails:

2333

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

2338

self.to_screen(render_table(

2339

['ID', 'width', 'height', 'URL'],

2340

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

2341

2342

def list_subtitles(self, video_id, subtitles, name='subtitles'):

2343

if not subtitles:

2344

self.to_screen('%s has no %s' % (video_id, name))

2345

return

2346

self.to_screen(

2347

'Available %s for %s:' % (name, video_id))

2348

self.to_screen(render_table(

2349

['Language', 'formats'],

2350

[[lang, ', '.join(f['ext'] for f in reversed(formats))]

2351

for lang, formats in subtitles.items()]))

2352

2353

def urlopen(self, req):

2354

""" Start an HTTP download """

2355

if isinstance(req, compat_basestring):

2356

req = sanitized_Request(req)

2357

return self._opener.open(req, timeout=self._socket_timeout)

2358

2359

def print_debug_header(self):

2360

if not self.params.get('verbose'):

2361

return

2362

2363

if type('') is not compat_str:

2364

# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)

2365

self.report_warning(

2366

'Your Python is broken! Update to a newer and supported version')

2367

2368

stdout_encoding = getattr(

2369

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

2370

encoding_str = (

2371

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

2372

locale.getpreferredencoding(),

2373

sys.getfilesystemencoding(),

2374

stdout_encoding,

2375

self.get_encoding()))

2376

write_string(encoding_str, encoding=None)

2377

2378

self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')

2379

if _LAZY_LOADER:

2380

self._write_string('[debug] Lazy loading extractors enabled' + '\n')

2381

try:

2382

sp = subprocess.Popen(

2383

['git', 'rev-parse', '--short', 'HEAD'],

2384

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

2385

cwd=os.path.dirname(os.path.abspath(__file__)))

2386

out, err = sp.communicate()

2387

out = out.decode().strip()

2388

if re.match('[0-9a-f]+', out):

2389

self._write_string('[debug] Git HEAD: ' + out + '\n')

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

2397

impl_name = platform.python_implementation()

2398

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

2399

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

2400

return impl_name

2401

2402

self._write_string('[debug] Python version %s (%s) - %s\n' % (

2403

platform.python_version(), python_implementation(),

2404

platform_name()))

2405

2406

exe_versions = FFmpegPostProcessor.get_versions(self)

2407

exe_versions['rtmpdump'] = rtmpdump_version()

2408

exe_versions['phantomjs'] = PhantomJSwrapper._version()

2409

exe_str = ', '.join(

2410

'%s %s' % (exe, v)

2411

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

2417

2418

proxy_map = {}

2419

for handler in self._opener.handlers:

2420

if hasattr(handler, 'proxies'):

2421

proxy_map.update(handler.proxies)

2422

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

2423

2424

if self.params.get('call_home', False):

2425

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

2426

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

2427

latest_version = self.urlopen(

2428

'https://yt-dl.org/latest/version').read().decode('utf-8')

2429

if version_tuple(latest_version) > version_tuple(__version__):

2430

self.report_warning(

2431

'You are using an outdated version (newest version: %s)! '

2432

'See https://yt-dl.org/update if you need help updating.' %

2433

latest_version)

2434

2435

def _setup_opener(self):

2436

timeout_val = self.params.get('socket_timeout')

2437

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

2438

2439

opts_cookiefile = self.params.get('cookiefile')

2440

opts_proxy = self.params.get('proxy')

2441

2442

if opts_cookiefile is None:

2443

self.cookiejar = compat_cookiejar.CookieJar()

2444

else:

2445

opts_cookiefile = expand_path(opts_cookiefile)

2446

self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)

2447

if os.access(opts_cookiefile, os.R_OK):

2448

self.cookiejar.load(ignore_discard=True, ignore_expires=True)

2449

2450

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

2451

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

2456

else:

2457

proxies = compat_urllib_request.getproxies()

2458

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

2459

if 'http' in proxies and 'https' not in proxies:

2460

proxies['https'] = proxies['http']

2461

proxy_handler = PerRequestProxyHandler(proxies)

2462

2463

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

2464

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

2465

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

2466

redirect_handler = YoutubeDLRedirectHandler()

2467

data_handler = compat_urllib_request_DataHandler()

2468

2469

# When passing our own FileHandler instance, build_opener won't add the

2470

# default FileHandler and allows us to disable the file protocol, which

2471

# can be used for malicious purposes (see

2472

# https://github.com/ytdl-org/youtube-dl/issues/8227)

2473

file_handler = compat_urllib_request.FileHandler()

2474

2475

def file_open(*args, **kwargs):

2476

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')

2477

file_handler.file_open = file_open

2478

2479

opener = compat_urllib_request.build_opener(

2480

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

2481

2482

# Delete the default user-agent header, which would otherwise apply in

2483

# cases where our custom HTTP handler doesn't come into play

2484

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

2485

opener.addheaders = []

2486

self._opener = opener

2487

2488

def encode(self, s):

2489

if isinstance(s, bytes):

2490

return s # Already encoded

2491

2492

try:

2493

return s.encode(self.get_encoding())

2494

except UnicodeEncodeError as err:

2495

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

2496

raise

2497

2498

def get_encoding(self):

2499

encoding = self.params.get('encoding')

2500

if encoding is None:

2501

encoding = preferredencoding()

2502

return encoding

2503

2504

def _write_thumbnails(self, info_dict, filename):

2505

if self.params.get('writethumbnail', False):

2506

thumbnails = info_dict.get('thumbnails')

2507

if thumbnails:

2508

thumbnails = [thumbnails[-1]]

2509

elif self.params.get('write_all_thumbnails', False):

2510

thumbnails = info_dict.get('thumbnails')

else:

return

if not thumbnails:

# No thumbnails present, so return immediately

return

for t in thumbnails:

thumb_ext = determine_ext(t['url'], 'jpg')

2520

suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''

2521

thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''

2522

t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext

2523

2524

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):

2525

self.to_screen('[%s] %s: Thumbnail %sis already present' %

2526

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2527

else:

2528

self.to_screen('[%s] %s: Downloading thumbnail %s...' %

2529

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2530

try:

2531

uf = self.urlopen(t['url'])

2532

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

2533

shutil.copyfileobj(uf, thumbf)

2534

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

2535

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

2536

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2537

self.report_warning('Unable to download thumbnail "%s": %s' %

2538

(t['url'], error_to_compat_str(err)))