jfr.im git - yt-dlp.git/blame_incremental

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import absolute_import, unicode_literals
	5
	6	import collections
	7	import contextlib
	8	import copy
	9	import datetime
	10	import errno
	11	import fileinput
	12	import io
	13	import itertools
	14	import json
	15	import locale
	16	import operator
	17	import os
	18	import platform
	19	import re
	20	import shutil
	21	import subprocess
	22	import socket
	23	import sys
	24	import time
	25	import tokenize
	26	import traceback
	27	import random
	28
	29	from string import ascii_letters
	30
	31	from .compat import (
	32	compat_basestring,
	33	compat_cookiejar,
	34	compat_get_terminal_size,
	35	compat_http_client,
	36	compat_kwargs,
	37	compat_numeric_types,
	38	compat_os_name,
	39	compat_str,
	40	compat_tokenize_tokenize,
	41	compat_urllib_error,
	42	compat_urllib_request,
	43	compat_urllib_request_DataHandler,
	44	)
	45	from .utils import (
	46	age_restricted,
	47	args_to_str,
	48	ContentTooShortError,
	49	date_from_str,
	50	DateRange,
	51	DEFAULT_OUTTMPL,
	52	determine_ext,
	53	determine_protocol,
	54	DOT_DESKTOP_LINK_TEMPLATE,
	55	DOT_URL_LINK_TEMPLATE,
	56	DOT_WEBLOC_LINK_TEMPLATE,
	57	DownloadError,
	58	encode_compat_str,
	59	encodeFilename,
	60	error_to_compat_str,
	61	expand_path,
	62	ExtractorError,
	63	format_bytes,
	64	format_field,
	65	formatSeconds,
	66	GeoRestrictedError,
	67	int_or_none,
	68	iri_to_uri,
	69	ISO3166Utils,
	70	locked_file,
	71	make_HTTPS_handler,
	72	MaxDownloadsReached,
	73	orderedSet,
	74	PagedList,
	75	parse_filesize,
	76	PerRequestProxyHandler,
	77	platform_name,
	78	PostProcessingError,
	79	preferredencoding,
	80	prepend_extension,
	81	register_socks_protocols,
	82	render_table,
	83	replace_extension,
	84	SameFileError,
	85	sanitize_filename,
	86	sanitize_path,
	87	sanitize_url,
	88	sanitized_Request,
	89	std_headers,
	90	str_or_none,
	91	subtitles_filename,
	92	to_high_limit_path,
	93	UnavailableVideoError,
	94	url_basename,
	95	version_tuple,
	96	write_json_file,
	97	write_string,
	98	YoutubeDLCookieJar,
	99	YoutubeDLCookieProcessor,
	100	YoutubeDLHandler,
	101	YoutubeDLRedirectHandler,
	102	)
	103	from .cache import Cache
	104	from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
	105	from .extractor.openload import PhantomJSwrapper
	106	from .downloader import get_suitable_downloader
	107	from .downloader.rtmp import rtmpdump_version
	108	from .postprocessor import (
	109	FFmpegFixupM3u8PP,
	110	FFmpegFixupM4aPP,
	111	FFmpegFixupStretchedPP,
	112	FFmpegMergerPP,
	113	FFmpegPostProcessor,
	114	FFmpegSubtitlesConvertorPP,
	115	get_postprocessor,
	116	)
	117	from .version import __version__
	118
	119	if compat_os_name == 'nt':
	120	import ctypes
	121
	122
	123	class YoutubeDL(object):
	124	"""YoutubeDL class.
	125
	126	YoutubeDL objects are the ones responsible of downloading the
	127	actual video file and writing it to disk if the user has requested
	128	it, among some other tasks. In most cases there should be one per
	129	program. As, given a video URL, the downloader doesn't know how to
	130	extract all the needed information, task that InfoExtractors do, it
	131	has to pass the URL to one of them.
	132
	133	For this, YoutubeDL objects have a method that allows
	134	InfoExtractors to be registered in a given order. When it is passed
	135	a URL, the YoutubeDL object handles it to the first InfoExtractor it
	136	finds that reports being able to handle it. The InfoExtractor extracts
	137	all the information about the video or videos the URL refers to, and
	138	YoutubeDL process the extracted information, possibly using a File
	139	Downloader to download the video.
	140
	141	YoutubeDL objects accept a lot of parameters. In order not to saturate
	142	the object constructor with arguments, it receives a dictionary of
	143	options instead. These options are available through the params
	144	attribute for the InfoExtractors to use. The YoutubeDL also
	145	registers itself as the downloader in charge for the InfoExtractors
	146	that are added to it, so this is a "mutual registration".
	147
	148	Available options:
	149
	150	username: Username for authentication purposes.
	151	password: Password for authentication purposes.
	152	videopassword: Password for accessing a video.
	153	ap_mso: Adobe Pass multiple-system operator identifier.
	154	ap_username: Multiple-system operator account username.
	155	ap_password: Multiple-system operator account password.
	156	usenetrc: Use netrc for authentication instead.
	157	verbose: Print additional info to stdout.
	158	quiet: Do not print messages to stdout.
	159	no_warnings: Do not print out anything for warnings.
	160	forceurl: Force printing final URL.
	161	forcetitle: Force printing title.
	162	forceid: Force printing ID.
	163	forcethumbnail: Force printing thumbnail URL.
	164	forcedescription: Force printing description.
	165	forcefilename: Force printing final filename.
	166	forceduration: Force printing duration.
	167	forcejson: Force printing info_dict as JSON.
	168	dump_single_json: Force printing the info_dict of the whole playlist
	169	(or video) as a single JSON line.
	170	force_write_download_archive: Force writing download archive regardless of
	171	'skip_download' or 'simulate'.
	172	simulate: Do not download the video files.
	173	format: Video format code. see "FORMAT SELECTION" for more details.
	174	format_sort: How to sort the video formats. see "Sorting Formats" for more details.
	175	format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
	176	allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
	177	allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
	178	outtmpl: Template for output names.
	179	restrictfilenames: Do not allow "&" and spaces in file names.
	180	trim_file_name: Limit length of filename (extension excluded).
	181	ignoreerrors: Do not stop on download errors. (Default False when running youtube-dlc, but True when directly accessing YoutubeDL class)
	182	force_generic_extractor: Force downloader to use the generic extractor
	183	nooverwrites: Prevent overwriting files.
	184	playliststart: Playlist item to start at.
	185	playlistend: Playlist item to end at.
	186	playlist_items: Specific indices of playlist to download.
	187	playlistreverse: Download playlist items in reverse order.
	188	playlistrandom: Download playlist items in random order.
	189	matchtitle: Download only matching titles.
	190	rejecttitle: Reject downloads for matching titles.
	191	logger: Log messages to a logging.Logger instance.
	192	logtostderr: Log messages to stderr instead of stdout.
	193	writedescription: Write the video description to a .description file
	194	writeinfojson: Write the video description to a .info.json file
	195	writeannotations: Write the video annotations to a .annotations.xml file
	196	writethumbnail: Write the thumbnail image to a file
	197	write_all_thumbnails: Write all thumbnail formats to files
	198	writelink: Write an internet shortcut file, depending on the
	199	current platform (.url/.webloc/.desktop)
	200	writeurllink: Write a Windows internet shortcut file (.url)
	201	writewebloclink: Write a macOS internet shortcut file (.webloc)
	202	writedesktoplink: Write a Linux internet shortcut file (.desktop)
	203	writesubtitles: Write the video subtitles to a file
	204	writeautomaticsub: Write the automatically generated subtitles to a file
	205	allsubtitles: Downloads all the subtitles of the video
	206	(requires writesubtitles or writeautomaticsub)
	207	listsubtitles: Lists all available subtitles for the video
	208	subtitlesformat: The format code for subtitles
	209	subtitleslangs: List of languages of the subtitles to download
	210	keepvideo: Keep the video file after post-processing
	211	daterange: A DateRange object, download only if the upload_date is in the range.
	212	skip_download: Skip the actual download of the video file
	213	cachedir: Location of the cache files in the filesystem.
	214	False to disable filesystem cache.
	215	noplaylist: Download single video instead of a playlist if in doubt.
	216	age_limit: An integer representing the user's age in years.
	217	Unsuitable videos for the given age are skipped.
	218	min_views: An integer representing the minimum view count the video
	219	must have in order to not be skipped.
	220	Videos without view count information are always
	221	downloaded. None for no limit.
	222	max_views: An integer representing the maximum view count.
	223	Videos that are more popular than that are not
	224	downloaded.
	225	Videos without view count information are always
	226	downloaded. None for no limit.
	227	download_archive: File name of a file where all downloads are recorded.
	228	Videos already present in the file are not downloaded
	229	again.
	230	break_on_existing: Stop the download process after attempting to download a file that's
	231	in the archive.
	232	cookiefile: File name where cookies should be read from and dumped to.
	233	nocheckcertificate:Do not verify SSL certificates
	234	prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
	235	At the moment, this is only supported by YouTube.
	236	proxy: URL of the proxy server to use
	237	geo_verification_proxy: URL of the proxy to use for IP address verification
	238	on geo-restricted sites.
	239	socket_timeout: Time to wait for unresponsive hosts, in seconds
	240	bidi_workaround: Work around buggy terminals without bidirectional text
	241	support, using fridibi
	242	debug_printtraffic:Print out sent and received HTTP traffic
	243	include_ads: Download ads as well
	244	default_search: Prepend this string if an input url is not valid.
	245	'auto' for elaborate guessing
	246	encoding: Use this encoding instead of the system-specified.
	247	extract_flat: Do not resolve URLs, return the immediate result.
	248	Pass in 'in_playlist' to only show this behavior for
	249	playlist items.
	250	postprocessors: A list of dictionaries, each with an entry
	251	* key: The name of the postprocessor. See
	252	youtube_dlc/postprocessor/__init__.py for a list.
	253	as well as any further keyword arguments for the
	254	postprocessor.
	255	progress_hooks: A list of functions that get called on download
	256	progress, with a dictionary with the entries
	257	* status: One of "downloading", "error", or "finished".
	258	Check this first and ignore unknown values.
	259
	260	If status is one of "downloading", or "finished", the
	261	following properties may also be present:
	262	* filename: The final filename (always present)
	263	* tmpfilename: The filename we're currently writing to
	264	* downloaded_bytes: Bytes on disk
	265	* total_bytes: Size of the whole file, None if unknown
	266	* total_bytes_estimate: Guess of the eventual file size,
	267	None if unavailable.
	268	* elapsed: The number of seconds since download started.
	269	* eta: The estimated time in seconds, None if unknown
	270	* speed: The download speed in bytes/second, None if
	271	unknown
	272	* fragment_index: The counter of the currently
	273	downloaded video fragment.
	274	* fragment_count: The number of fragments (= individual
	275	files that will be merged)
	276
	277	Progress hooks are guaranteed to be called at least once
	278	(with status "finished") if the download is successful.
	279	merge_output_format: Extension to use when merging formats.
	280	fixup: Automatically correct known faults of the file.
	281	One of:
	282	- "never": do nothing
	283	- "warn": only emit a warning
	284	- "detect_or_warn": check whether we can do anything
	285	about it, warn otherwise (default)
	286	source_address: Client-side IP address to bind to.
	287	call_home: Boolean, true iff we are allowed to contact the
	288	youtube-dlc servers for debugging.
	289	sleep_interval: Number of seconds to sleep before each download when
	290	used alone or a lower bound of a range for randomized
	291	sleep before each download (minimum possible number
	292	of seconds to sleep) when used along with
	293	max_sleep_interval.
	294	max_sleep_interval:Upper bound of a range for randomized sleep before each
	295	download (maximum possible number of seconds to sleep).
	296	Must only be used along with sleep_interval.
	297	Actual sleep time will be a random float from range
	298	[sleep_interval; max_sleep_interval].
	299	listformats: Print an overview of available video formats and exit.
	300	list_thumbnails: Print a table of all thumbnails and exit.
	301	match_filter: A function that gets called with the info_dict of
	302	every video.
	303	If it returns a message, the video is ignored.
	304	If it returns None, the video is downloaded.
	305	match_filter_func in utils.py is one example for this.
	306	no_color: Do not emit color codes in output.
	307	geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
	308	HTTP header
	309	geo_bypass_country:
	310	Two-letter ISO 3166-2 country code that will be used for
	311	explicit geographic restriction bypassing via faking
	312	X-Forwarded-For HTTP header
	313	geo_bypass_ip_block:
	314	IP range in CIDR notation that will be used similarly to
	315	geo_bypass_country
	316
	317	The following options determine which downloader is picked:
	318	external_downloader: Executable of the external downloader to call.
	319	None or unset for standard (built-in) downloader.
	320	hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
	321	if True, otherwise use ffmpeg/avconv if False, otherwise
	322	use downloader suggested by extractor if None.
	323
	324	The following parameters are not used by YoutubeDL itself, they are used by
	325	the downloader (see youtube_dlc/downloader/common.py):
	326	nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
	327	noresizebuffer, retries, continuedl, noprogress, consoletitle,
	328	xattr_set_filesize, external_downloader_args, hls_use_mpegts,
	329	http_chunk_size.
	330
	331	The following options are used by the post processors:
	332	prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
	333	otherwise prefer ffmpeg.
	334	ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
	335	to the binary or its containing directory.
	336	postprocessor_args: A list of additional command-line arguments for the
	337	postprocessor.
	338
	339	The following options are used by the Youtube extractor:
	340	youtube_include_dash_manifest: If True (default), DASH manifests and related
	341	data will be downloaded and processed by extractor.
	342	You can reduce network I/O by disabling it if you don't
	343	care about DASH.
	344	"""
	345
	346	_NUMERIC_FIELDS = set((
	347	'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
	348	'timestamp', 'upload_year', 'upload_month', 'upload_day',
	349	'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
	350	'average_rating', 'comment_count', 'age_limit',
	351	'start_time', 'end_time',
	352	'chapter_number', 'season_number', 'episode_number',
	353	'track_number', 'disc_number', 'release_year',
	354	'playlist_index',
	355	))
	356
	357	params = None
	358	_ies = []
	359	_pps = []
	360	_download_retcode = None
	361	_num_downloads = None
	362	_screen_file = None
	363
	364	def __init__(self, params=None, auto_init=True):
	365	"""Create a FileDownloader object with the given options."""
	366	if params is None:
	367	params = {}
	368	self._ies = []
	369	self._ies_instances = {}
	370	self._pps = []
	371	self._progress_hooks = []
	372	self._download_retcode = 0
	373	self._num_downloads = 0
	374	self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
	375	self._err_file = sys.stderr
	376	self.params = {
	377	# Default parameters
	378	'nocheckcertificate': False,
	379	}
	380	self.params.update(params)
	381	self.cache = Cache(self)
	382	self.archive = set()
	383
	384	"""Preload the archive, if any is specified"""
	385	def preload_download_archive(self):
	386	fn = self.params.get('download_archive')
	387	if fn is None:
	388	return False
	389	try:
	390	with locked_file(fn, 'r', encoding='utf-8') as archive_file:
	391	for line in archive_file:
	392	self.archive.add(line.strip())
	393	except IOError as ioe:
	394	if ioe.errno != errno.ENOENT:
	395	raise
	396	return False
	397	return True
	398
	399	def check_deprecated(param, option, suggestion):
	400	if self.params.get(param) is not None:
	401	self.report_warning(
	402	'%s is deprecated. Use %s instead.' % (option, suggestion))
	403	return True
	404	return False
	405
	406	if self.params.get('verbose'):
	407	self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
	408
	409	preload_download_archive(self)
	410
	411	if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
	412	if self.params.get('geo_verification_proxy') is None:
	413	self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
	414
	415	check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
	416	check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
	417	check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
	418
	419	if params.get('bidi_workaround', False):
	420	try:
	421	import pty
	422	master, slave = pty.openpty()
	423	width = compat_get_terminal_size().columns
	424	if width is None:
	425	width_args = []
	426	else:
	427	width_args = ['-w', str(width)]
	428	sp_kwargs = dict(
	429	stdin=subprocess.PIPE,
	430	stdout=slave,
	431	stderr=self._err_file)
	432	try:
	433	self._output_process = subprocess.Popen(
	434	['bidiv'] + width_args, **sp_kwargs
	435	)
	436	except OSError:
	437	self._output_process = subprocess.Popen(
	438	['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
	439	self._output_channel = os.fdopen(master, 'rb')
	440	except OSError as ose:
	441	if ose.errno == errno.ENOENT:
	442	self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
	443	else:
	444	raise
	445
	446	if (sys.platform != 'win32'
	447	and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
	448	and not params.get('restrictfilenames', False)):
	449	# Unicode filesystem API will throw errors (#1474, #13027)
	450	self.report_warning(
	451	'Assuming --restrict-filenames since file system encoding '
	452	'cannot encode all characters. '
	453	'Set the LC_ALL environment variable to fix this.')
	454	self.params['restrictfilenames'] = True
	455
	456	if isinstance(params.get('outtmpl'), bytes):
	457	self.report_warning(
	458	'Parameter outtmpl is bytes, but should be a unicode string. '
	459	'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
	460
	461	self._setup_opener()
	462
	463	if auto_init:
	464	self.print_debug_header()
	465	self.add_default_info_extractors()
	466
	467	for pp_def_raw in self.params.get('postprocessors', []):
	468	pp_class = get_postprocessor(pp_def_raw['key'])
	469	pp_def = dict(pp_def_raw)
	470	del pp_def['key']
	471	pp = pp_class(self, **compat_kwargs(pp_def))
	472	self.add_post_processor(pp)
	473
	474	for ph in self.params.get('progress_hooks', []):
	475	self.add_progress_hook(ph)
	476
	477	register_socks_protocols()
	478
	479	def warn_if_short_id(self, argv):
	480	# short YouTube ID starting with dash?
	481	idxs = [
	482	i for i, a in enumerate(argv)
	483	if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
	484	if idxs:
	485	correct_argv = (
	486	['youtube-dlc']
	487	+ [a for i, a in enumerate(argv) if i not in idxs]
	488	+ ['--'] + [argv[i] for i in idxs]
	489	)
	490	self.report_warning(
	491	'Long argument string detected. '
	492	'Use -- to separate parameters and URLs, like this:\n%s\n' %
	493	args_to_str(correct_argv))
	494
	495	def add_info_extractor(self, ie):
	496	"""Add an InfoExtractor object to the end of the list."""
	497	self._ies.append(ie)
	498	if not isinstance(ie, type):
	499	self._ies_instances[ie.ie_key()] = ie
	500	ie.set_downloader(self)

1

#!/usr/bin/env python

2

# coding: utf-8

3

4

from __future__ import absolute_import, unicode_literals

import collections

import contextlib

import copy

import datetime

import errno

import fileinput

import io

import itertools

import json

import locale

import operator

import os

import platform

import re

import shutil

import subprocess

import socket

import sys

import time

import tokenize

import traceback

import random

from string import ascii_letters

30

31

from .compat import (

32

compat_basestring,

33

compat_cookiejar,

34

compat_get_terminal_size,

35

compat_http_client,

36

compat_kwargs,

37

compat_numeric_types,

38

compat_os_name,

39

compat_str,

40

compat_tokenize_tokenize,

41

compat_urllib_error,

42

compat_urllib_request,

43

compat_urllib_request_DataHandler,

)

from .utils import (

age_restricted,

args_to_str,

ContentTooShortError,

date_from_str,

DateRange,

DEFAULT_OUTTMPL,

determine_ext,

determine_protocol,

DOT_DESKTOP_LINK_TEMPLATE,

55

DOT_URL_LINK_TEMPLATE,

56

DOT_WEBLOC_LINK_TEMPLATE,

DownloadError,

encode_compat_str,

encodeFilename,

error_to_compat_str,

expand_path,

ExtractorError,

format_bytes,

format_field,

formatSeconds,

GeoRestrictedError,

int_or_none,

iri_to_uri,

ISO3166Utils,

locked_file,

make_HTTPS_handler,

MaxDownloadsReached,

orderedSet,

PagedList,

parse_filesize,

PerRequestProxyHandler,

platform_name,

PostProcessingError,

preferredencoding,

prepend_extension,

register_socks_protocols,

render_table,

replace_extension,

SameFileError,

sanitize_filename,

sanitize_path,

sanitize_url,

sanitized_Request,

std_headers,

str_or_none,

subtitles_filename,

to_high_limit_path,

UnavailableVideoError,

url_basename,

version_tuple,

write_json_file,

write_string,

YoutubeDLCookieJar,

YoutubeDLCookieProcessor,

100

YoutubeDLHandler,

101

YoutubeDLRedirectHandler,

102

)

103

from .cache import Cache

104

from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER

105

from .extractor.openload import PhantomJSwrapper

106

from .downloader import get_suitable_downloader

107

from .downloader.rtmp import rtmpdump_version

108

from .postprocessor import (

109

FFmpegFixupM3u8PP,

110

FFmpegFixupM4aPP,

111

FFmpegFixupStretchedPP,

112

FFmpegMergerPP,

113

FFmpegPostProcessor,

114

FFmpegSubtitlesConvertorPP,

115

get_postprocessor,

116

)

117

from .version import __version__

118

119

if compat_os_name == 'nt':

import ctypes

class YoutubeDL(object):

124

"""YoutubeDL class.

125

126

YoutubeDL objects are the ones responsible of downloading the

127

actual video file and writing it to disk if the user has requested

128

it, among some other tasks. In most cases there should be one per

129

program. As, given a video URL, the downloader doesn't know how to

130

extract all the needed information, task that InfoExtractors do, it

131

has to pass the URL to one of them.

132

133

For this, YoutubeDL objects have a method that allows

134

InfoExtractors to be registered in a given order. When it is passed

135

a URL, the YoutubeDL object handles it to the first InfoExtractor it

136

finds that reports being able to handle it. The InfoExtractor extracts

137

all the information about the video or videos the URL refers to, and

138

YoutubeDL process the extracted information, possibly using a File

139

Downloader to download the video.

140

141

YoutubeDL objects accept a lot of parameters. In order not to saturate

142

the object constructor with arguments, it receives a dictionary of

143

options instead. These options are available through the params

144

attribute for the InfoExtractors to use. The YoutubeDL also

145

registers itself as the downloader in charge for the InfoExtractors

146

that are added to it, so this is a "mutual registration".

Available options:

username: Username for authentication purposes.

151

password: Password for authentication purposes.

152

videopassword: Password for accessing a video.

153

ap_mso: Adobe Pass multiple-system operator identifier.

154

ap_username: Multiple-system operator account username.

155

ap_password: Multiple-system operator account password.

156

usenetrc: Use netrc for authentication instead.

157

verbose: Print additional info to stdout.

158

quiet: Do not print messages to stdout.

159

no_warnings: Do not print out anything for warnings.

160

forceurl: Force printing final URL.

161

forcetitle: Force printing title.

162

forceid: Force printing ID.

163

forcethumbnail: Force printing thumbnail URL.

164

forcedescription: Force printing description.

165

forcefilename: Force printing final filename.

166

forceduration: Force printing duration.

167

forcejson: Force printing info_dict as JSON.

168

dump_single_json: Force printing the info_dict of the whole playlist

169

(or video) as a single JSON line.

170

force_write_download_archive: Force writing download archive regardless of

171

'skip_download' or 'simulate'.

172

simulate: Do not download the video files.

173

format: Video format code. see "FORMAT SELECTION" for more details.

174

format_sort: How to sort the video formats. see "Sorting Formats" for more details.

175

format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.

176

allow_multiple_video_streams: Allow multiple video streams to be merged into a single file

177

allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file

178

outtmpl: Template for output names.

179

restrictfilenames: Do not allow "&" and spaces in file names.

180

trim_file_name: Limit length of filename (extension excluded).

181

ignoreerrors: Do not stop on download errors. (Default False when running youtube-dlc, but True when directly accessing YoutubeDL class)

182

force_generic_extractor: Force downloader to use the generic extractor

183

nooverwrites: Prevent overwriting files.

184

playliststart: Playlist item to start at.

185

playlistend: Playlist item to end at.

186

playlist_items: Specific indices of playlist to download.

187

playlistreverse: Download playlist items in reverse order.

188

playlistrandom: Download playlist items in random order.

189

matchtitle: Download only matching titles.

190

rejecttitle: Reject downloads for matching titles.

191

logger: Log messages to a logging.Logger instance.

192

logtostderr: Log messages to stderr instead of stdout.

193

writedescription: Write the video description to a .description file

194

writeinfojson: Write the video description to a .info.json file

195

writeannotations: Write the video annotations to a .annotations.xml file

196

writethumbnail: Write the thumbnail image to a file

197

write_all_thumbnails: Write all thumbnail formats to files

198

writelink: Write an internet shortcut file, depending on the

199

current platform (.url/.webloc/.desktop)

200

writeurllink: Write a Windows internet shortcut file (.url)

201

writewebloclink: Write a macOS internet shortcut file (.webloc)

202

writedesktoplink: Write a Linux internet shortcut file (.desktop)

203

writesubtitles: Write the video subtitles to a file

204

writeautomaticsub: Write the automatically generated subtitles to a file

205

allsubtitles: Downloads all the subtitles of the video

206

(requires writesubtitles or writeautomaticsub)

207

listsubtitles: Lists all available subtitles for the video

208

subtitlesformat: The format code for subtitles

209

subtitleslangs: List of languages of the subtitles to download

210

keepvideo: Keep the video file after post-processing

211

daterange: A DateRange object, download only if the upload_date is in the range.

212

skip_download: Skip the actual download of the video file

213

cachedir: Location of the cache files in the filesystem.

214

False to disable filesystem cache.

215

noplaylist: Download single video instead of a playlist if in doubt.

216

age_limit: An integer representing the user's age in years.

217

Unsuitable videos for the given age are skipped.

218

min_views: An integer representing the minimum view count the video

219

must have in order to not be skipped.

220

Videos without view count information are always

221

downloaded. None for no limit.

222

max_views: An integer representing the maximum view count.

223

Videos that are more popular than that are not

224

downloaded.

225

Videos without view count information are always

226

downloaded. None for no limit.

227

download_archive: File name of a file where all downloads are recorded.

228

Videos already present in the file are not downloaded

229

again.

230

break_on_existing: Stop the download process after attempting to download a file that's

231

in the archive.

232

cookiefile: File name where cookies should be read from and dumped to.

233

nocheckcertificate:Do not verify SSL certificates

234

prefer_insecure: Use HTTP instead of HTTPS to retrieve information.

235

At the moment, this is only supported by YouTube.

236

proxy: URL of the proxy server to use

237

geo_verification_proxy: URL of the proxy to use for IP address verification

238

on geo-restricted sites.

239

socket_timeout: Time to wait for unresponsive hosts, in seconds

240

bidi_workaround: Work around buggy terminals without bidirectional text

241

support, using fridibi

242

debug_printtraffic:Print out sent and received HTTP traffic

243

include_ads: Download ads as well

244

default_search: Prepend this string if an input url is not valid.

245

'auto' for elaborate guessing

246

encoding: Use this encoding instead of the system-specified.

247

extract_flat: Do not resolve URLs, return the immediate result.

248

Pass in 'in_playlist' to only show this behavior for

249

playlist items.

250

postprocessors: A list of dictionaries, each with an entry

251

* key: The name of the postprocessor. See

252

youtube_dlc/postprocessor/__init__.py for a list.

253

as well as any further keyword arguments for the

254

postprocessor.

255

progress_hooks: A list of functions that get called on download

256

progress, with a dictionary with the entries

257

* status: One of "downloading", "error", or "finished".

258

Check this first and ignore unknown values.

259

260

If status is one of "downloading", or "finished", the

261

following properties may also be present:

262

* filename: The final filename (always present)

263

* tmpfilename: The filename we're currently writing to

264

* downloaded_bytes: Bytes on disk

265

* total_bytes: Size of the whole file, None if unknown

266

* total_bytes_estimate: Guess of the eventual file size,

267

None if unavailable.

268

* elapsed: The number of seconds since download started.

269

* eta: The estimated time in seconds, None if unknown

270

* speed: The download speed in bytes/second, None if

271

unknown

272

* fragment_index: The counter of the currently

273

downloaded video fragment.

274

* fragment_count: The number of fragments (= individual

275

files that will be merged)

276

277

Progress hooks are guaranteed to be called at least once

278

(with status "finished") if the download is successful.

279

merge_output_format: Extension to use when merging formats.

280

fixup: Automatically correct known faults of the file.

281

One of:

282

- "never": do nothing

283

- "warn": only emit a warning

284

- "detect_or_warn": check whether we can do anything

285

about it, warn otherwise (default)

286

source_address: Client-side IP address to bind to.

287

call_home: Boolean, true iff we are allowed to contact the

288

youtube-dlc servers for debugging.

289

sleep_interval: Number of seconds to sleep before each download when

290

used alone or a lower bound of a range for randomized

291

sleep before each download (minimum possible number

292

of seconds to sleep) when used along with

293

max_sleep_interval.

294

max_sleep_interval:Upper bound of a range for randomized sleep before each

295

download (maximum possible number of seconds to sleep).

296

Must only be used along with sleep_interval.

297

Actual sleep time will be a random float from range

298

[sleep_interval; max_sleep_interval].

299

listformats: Print an overview of available video formats and exit.

300

list_thumbnails: Print a table of all thumbnails and exit.

301

match_filter: A function that gets called with the info_dict of

302

every video.

303

If it returns a message, the video is ignored.

304

If it returns None, the video is downloaded.

305

match_filter_func in utils.py is one example for this.

306

no_color: Do not emit color codes in output.

307

geo_bypass: Bypass geographic restriction via faking X-Forwarded-For

308

HTTP header

309

geo_bypass_country:

310

Two-letter ISO 3166-2 country code that will be used for

311

explicit geographic restriction bypassing via faking

312

X-Forwarded-For HTTP header

313

geo_bypass_ip_block:

314

IP range in CIDR notation that will be used similarly to

315

geo_bypass_country

316

317

The following options determine which downloader is picked:

318

external_downloader: Executable of the external downloader to call.

319

None or unset for standard (built-in) downloader.

320

hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv

321

if True, otherwise use ffmpeg/avconv if False, otherwise

322

use downloader suggested by extractor if None.

323

324

The following parameters are not used by YoutubeDL itself, they are used by

325

the downloader (see youtube_dlc/downloader/common.py):

326

nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,

327

noresizebuffer, retries, continuedl, noprogress, consoletitle,

328

xattr_set_filesize, external_downloader_args, hls_use_mpegts,

329

http_chunk_size.

330

331

The following options are used by the post processors:

332

prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,

333

otherwise prefer ffmpeg.

334

ffmpeg_location: Location of the ffmpeg/avconv binary; either the path

335

to the binary or its containing directory.

336

postprocessor_args: A list of additional command-line arguments for the

337

postprocessor.

338

339

The following options are used by the Youtube extractor:

340

youtube_include_dash_manifest: If True (default), DASH manifests and related

341

data will be downloaded and processed by extractor.

342

You can reduce network I/O by disabling it if you don't

care about DASH.

"""

_NUMERIC_FIELDS = set((

347

'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',

348

'timestamp', 'upload_year', 'upload_month', 'upload_day',

349

'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',

350

'average_rating', 'comment_count', 'age_limit',

351

'start_time', 'end_time',

352

'chapter_number', 'season_number', 'episode_number',

353

'track_number', 'disc_number', 'release_year',

'playlist_index',

))

params = None

_ies = []

_pps = []

_download_retcode = None

361

_num_downloads = None

362

_screen_file = None

363

364

def __init__(self, params=None, auto_init=True):

365

"""Create a FileDownloader object with the given options."""

if params is None:

params = {}

self._ies = []

self._ies_instances = {}

370

self._pps = []

371

self._progress_hooks = []

372

self._download_retcode = 0

373

self._num_downloads = 0

374

self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]

375

self._err_file = sys.stderr

376

self.params = {

377

# Default parameters

378

'nocheckcertificate': False,

379

}

380

self.params.update(params)

381

self.cache = Cache(self)

382

self.archive = set()

383

384

"""Preload the archive, if any is specified"""

385

def preload_download_archive(self):

386

fn = self.params.get('download_archive')

if fn is None:

return False

try:

with locked_file(fn, 'r', encoding='utf-8') as archive_file:

391

for line in archive_file:

392

self.archive.add(line.strip())

393

except IOError as ioe:

394

if ioe.errno != errno.ENOENT:

raise

return False

return True

def check_deprecated(param, option, suggestion):

400

if self.params.get(param) is not None:

401

self.report_warning(

402

'%s is deprecated. Use %s instead.' % (option, suggestion))

return True

return False

if self.params.get('verbose'):

407

self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))

408

409

preload_download_archive(self)

410

411

if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):

412

if self.params.get('geo_verification_proxy') is None:

413

self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']

414

415

check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')

416

check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')

417

check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')

418

419

if params.get('bidi_workaround', False):

420

try:

421

import pty

422

master, slave = pty.openpty()

423

width = compat_get_terminal_size().columns

if width is None:

width_args = []

else:

width_args = ['-w', str(width)]

428

sp_kwargs = dict(

429

stdin=subprocess.PIPE,

430

stdout=slave,

431

stderr=self._err_file)

432

try:

433

self._output_process = subprocess.Popen(

434

['bidiv'] + width_args, **sp_kwargs

435

)

436

except OSError:

437

self._output_process = subprocess.Popen(

438

['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)

439

self._output_channel = os.fdopen(master, 'rb')

440

except OSError as ose:

441

if ose.errno == errno.ENOENT:

442

self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')

else:

raise

if (sys.platform != 'win32'

447

and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']

448

and not params.get('restrictfilenames', False)):

449

# Unicode filesystem API will throw errors (#1474, #13027)

450

self.report_warning(

451

'Assuming --restrict-filenames since file system encoding '

452

'cannot encode all characters. '

453

'Set the LC_ALL environment variable to fix this.')

454

self.params['restrictfilenames'] = True

455

456

if isinstance(params.get('outtmpl'), bytes):

457

self.report_warning(

458

'Parameter outtmpl is bytes, but should be a unicode string. '

459

'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')

self._setup_opener()

if auto_init:

self.print_debug_header()

465

self.add_default_info_extractors()

466

467

for pp_def_raw in self.params.get('postprocessors', []):

468

pp_class = get_postprocessor(pp_def_raw['key'])

469

pp_def = dict(pp_def_raw)

470

del pp_def['key']

471

pp = pp_class(self, **compat_kwargs(pp_def))

472

self.add_post_processor(pp)

473

474

for ph in self.params.get('progress_hooks', []):

475

self.add_progress_hook(ph)

476

477

register_socks_protocols()

478

479

def warn_if_short_id(self, argv):

480

# short YouTube ID starting with dash?

481

idxs = [

482

i for i, a in enumerate(argv)

483

if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]

if idxs:

correct_argv = (

['youtube-dlc']

+ [a for i, a in enumerate(argv) if i not in idxs]

488

+ ['--'] + [argv[i] for i in idxs]

489

)

490

self.report_warning(

491

'Long argument string detected. '

492

'Use -- to separate parameters and URLs, like this:\n%s\n' %

493

args_to_str(correct_argv))

494

495

def add_info_extractor(self, ie):

496

"""Add an InfoExtractor object to the end of the list."""

497

self._ies.append(ie)

498

if not isinstance(ie, type):

499

self._ies_instances[ie.ie_key()] = ie

500

ie.set_downloader(self)

501

502

def get_info_extractor(self, ie_key):

503

"""

504

Get an instance of an IE with name ie_key, it will try to get one from

505

the _ies list, if there's no instance it will create a new one and add

506

it to the extractor list.

507

"""

508

ie = self._ies_instances.get(ie_key)

509

if ie is None:

510

ie = get_info_extractor(ie_key)()

511

self.add_info_extractor(ie)

512

return ie

513

514

def add_default_info_extractors(self):

515

"""

516

Add the InfoExtractors returned by gen_extractors to the end of the list

517

"""

518

for ie in gen_extractor_classes():

519

self.add_info_extractor(ie)

520

521

def add_post_processor(self, pp):

522

"""Add a PostProcessor object to the end of the chain."""

523

self._pps.append(pp)

524

pp.set_downloader(self)

525

526

def add_progress_hook(self, ph):

527

"""Add the progress hook (currently only for the file downloader)"""

528

self._progress_hooks.append(ph)

529

530

def _bidi_workaround(self, message):

531

if not hasattr(self, '_output_channel'):

532

return message

533

534

assert hasattr(self, '_output_process')

535

assert isinstance(message, compat_str)

536

line_count = message.count('\n') + 1

537

self._output_process.stdin.write((message + '\n').encode('utf-8'))

538

self._output_process.stdin.flush()

539

res = ''.join(self._output_channel.readline().decode('utf-8')

540

for _ in range(line_count))

541

return res[:-len('\n')]

542

543

def to_screen(self, message, skip_eol=False):

544

"""Print message to stdout if not in quiet mode."""

545

return self.to_stdout(message, skip_eol, check_quiet=True)

546

547

def _write_string(self, s, out=None):

548

write_string(s, out=out, encoding=self.params.get('encoding'))

549

550

def to_stdout(self, message, skip_eol=False, check_quiet=False):

551

"""Print message to stdout if not in quiet mode."""

552

if self.params.get('logger'):

553

self.params['logger'].debug(message)

554

elif not check_quiet or not self.params.get('quiet', False):

555

message = self._bidi_workaround(message)

556

terminator = ['\n', ''][skip_eol]

557

output = message + terminator

558

559

self._write_string(output, self._screen_file)

560

561

def to_stderr(self, message):

562

"""Print message to stderr."""

563

assert isinstance(message, compat_str)

564

if self.params.get('logger'):

565

self.params['logger'].error(message)

566

else:

567

message = self._bidi_workaround(message)

568

output = message + '\n'

569

self._write_string(output, self._err_file)

570

571

def to_console_title(self, message):

572

if not self.params.get('consoletitle', False):

573

return

574

if compat_os_name == 'nt':

575

if ctypes.windll.kernel32.GetConsoleWindow():

576

# c_wchar_p() might not be necessary if `message` is

577

# already of type unicode()

578

ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))

579

elif 'TERM' in os.environ:

580

self._write_string('\033]0;%s\007' % message, self._screen_file)

581

582

def save_console_title(self):

583

if not self.params.get('consoletitle', False):

584

return

585

if self.params.get('simulate', False):

586

return

587

if compat_os_name != 'nt' and 'TERM' in os.environ:

588

# Save the title on stack

589

self._write_string('\033[22;0t', self._screen_file)

590

591

def restore_console_title(self):

592

if not self.params.get('consoletitle', False):

593

return

594

if self.params.get('simulate', False):

595

return

596

if compat_os_name != 'nt' and 'TERM' in os.environ:

597

# Restore the title from stack

598

self._write_string('\033[23;0t', self._screen_file)

599

600

def __enter__(self):

601

self.save_console_title()

602

return self

603

604

def __exit__(self, *args):

605

self.restore_console_title()

606

607

if self.params.get('cookiefile') is not None:

608

self.cookiejar.save(ignore_discard=True, ignore_expires=True)

609

610

def trouble(self, message=None, tb=None):

611

"""Determine action to take when a download problem appears.

612

613

Depending on if the downloader has been configured to ignore

614

download errors or not, this method may throw an exception or

615

not when errors are found, after printing the message.

616

617

tb, if given, is additional traceback information.

618

"""

619

if message is not None:

620

self.to_stderr(message)

621

if self.params.get('verbose'):

622

if tb is None:

623

if sys.exc_info()[0]: # if .trouble has been called from an except block

624

tb = ''

625

if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

626

tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))

627

tb += encode_compat_str(traceback.format_exc())

628

else:

629

tb_data = traceback.format_list(traceback.extract_stack())

630

tb = ''.join(tb_data)

631

self.to_stderr(tb)

632

if not self.params.get('ignoreerrors', False):

633

if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:

634

exc_info = sys.exc_info()[1].exc_info

635

else:

636

exc_info = sys.exc_info()

637

raise DownloadError(message, exc_info)

638

self._download_retcode = 1

639

640

def report_warning(self, message):

641

'''

642

Print the message to stderr, it will be prefixed with 'WARNING:'

643

If stderr is a tty file the 'WARNING:' will be colored

644

'''

645

if self.params.get('logger') is not None:

646

self.params['logger'].warning(message)

647

else:

648

if self.params.get('no_warnings'):

649

return

650

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

651

_msg_header = '\033[0;33mWARNING:\033[0m'

652

else:

653

_msg_header = 'WARNING:'

654

warning_message = '%s %s' % (_msg_header, message)

655

self.to_stderr(warning_message)

656

657

def report_error(self, message, tb=None):

658

'''

659

Do the same as trouble, but prefixes the message with 'ERROR:', colored

660

in red if stderr is a tty file.

661

'''

662

if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':

663

_msg_header = '\033[0;31mERROR:\033[0m'

664

else:

665

_msg_header = 'ERROR:'

666

error_message = '%s %s' % (_msg_header, message)

667

self.trouble(error_message, tb)

668

669

def report_file_already_downloaded(self, file_name):

670

"""Report file has already been fully downloaded."""

671

try:

672

self.to_screen('[download] %s has already been downloaded' % file_name)

673

except UnicodeEncodeError:

674

self.to_screen('[download] The file has already been downloaded')

675

676

def prepare_filename(self, info_dict):

677

"""Generate the output filename."""

678

try:

679

template_dict = dict(info_dict)

680

681

template_dict['epoch'] = int(time.time())

682

autonumber_size = self.params.get('autonumber_size')

683

if autonumber_size is None:

684

autonumber_size = 5

685

template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads

686

if template_dict.get('resolution') is None:

687

if template_dict.get('width') and template_dict.get('height'):

688

template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])

689

elif template_dict.get('height'):

690

template_dict['resolution'] = '%sp' % template_dict['height']

691

elif template_dict.get('width'):

692

template_dict['resolution'] = '%dx?' % template_dict['width']

693

694

sanitize = lambda k, v: sanitize_filename(

695

compat_str(v),

696

restricted=self.params.get('restrictfilenames'),

697

is_id=(k == 'id' or k.endswith('_id')))

698

template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))

699

for k, v in template_dict.items()

700

if v is not None and not isinstance(v, (list, tuple, dict)))

701

template_dict = collections.defaultdict(lambda: 'NA', template_dict)

702

703

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

704

705

# For fields playlist_index and autonumber convert all occurrences

706

# of %(field)s to %(field)0Nd for backward compatibility

707

field_size_compat_map = {

708

'playlist_index': len(str(template_dict['n_entries'])),

709

'autonumber': autonumber_size,

710

}

711

FIELD_SIZE_COMPAT_RE = r'(?<!%)%$(?P<field>autonumber|playlist_index)$s'

712

mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)

713

if mobj:

714

outtmpl = re.sub(

715

FIELD_SIZE_COMPAT_RE,

716

r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],

717

outtmpl)

718

719

# Missing numeric fields used together with integer presentation types

720

# in format specification will break the argument substitution since

721

# string 'NA' is returned for missing fields. We will patch output

722

# template for missing fields to meet string presentation type.

723

for numeric_field in self._NUMERIC_FIELDS:

724

if numeric_field not in template_dict:

725

# As of [1] format syntax is:

726

# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type

727

# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting

FORMAT_RE = r'''(?x)

(?<!%)

%

${0}$ # mapping key

(?:[#0\-+ ]+)? # conversion flags (optional)

733

(?:\d+)? # minimum field width (optional)

734

(?:\.\d+)? # precision (optional)

735

[hlL]? # length modifier (optional)

736

[diouxXeEfFgGcrs%] # conversion type

737

'''

738

outtmpl = re.sub(

739

FORMAT_RE.format(numeric_field),

740

r'%({0})s'.format(numeric_field), outtmpl)

741

742

# expand_path translates '%%' into '%' and '$$' into '$'

743

# correspondingly that is not what we want since we need to keep

744

# '%%' intact for template dict substitution step. Working around

745

# with boundary-alike separator hack.

746

sep = ''.join([random.choice(ascii_letters) for _ in range(32)])

747

outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))

748

749

# outtmpl should be expand_path'ed before template dict substitution

750

# because meta fields may contain env variables we don't want to

751

# be expanded. For example, for outtmpl "%(title)s.%(ext)s" and

752

# title "Hello $PATH", we don't want `$PATH` to be expanded.

753

filename = expand_path(outtmpl).replace(sep, '') % template_dict

754

755

# https://github.com/blackjack4494/youtube-dlc/issues/85

756

trim_file_name = self.params.get('trim_file_name', False)

757

if trim_file_name:

758

fn_groups = filename.rsplit('.')

759

ext = fn_groups[-1]

760

sub_ext = ''

761

if len(fn_groups) > 2:

762

sub_ext = fn_groups[-2]

763

filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))

764

765

# Temporary fix for #4787

766

# 'Treat' all problem characters by passing filename through preferredencoding

767

# to workaround encoding issues with subprocess on python2 @ Windows

768

if sys.version_info < (3, 0) and sys.platform == 'win32':

769

filename = encodeFilename(filename, True).decode(preferredencoding())

770

return sanitize_path(filename)

771

except ValueError as err:

772

self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')

773

return None

774

775

def _match_entry(self, info_dict, incomplete):

776

""" Returns None if the file should be downloaded """

777

778

video_title = info_dict.get('title', info_dict.get('id', 'video'))

779

if 'title' in info_dict:

780

# This can happen when we're just evaluating the playlist

781

title = info_dict['title']

782

matchtitle = self.params.get('matchtitle', False)

783

if matchtitle:

784

if not re.search(matchtitle, title, re.IGNORECASE):

785

return '"' + title + '" title did not match pattern "' + matchtitle + '"'

786

rejecttitle = self.params.get('rejecttitle', False)

787

if rejecttitle:

788

if re.search(rejecttitle, title, re.IGNORECASE):

789

return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'

790

date = info_dict.get('upload_date')

791

if date is not None:

792

dateRange = self.params.get('daterange', DateRange())

793

if date not in dateRange:

794

return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)

795

view_count = info_dict.get('view_count')

796

if view_count is not None:

797

min_views = self.params.get('min_views')

798

if min_views is not None and view_count < min_views:

799

return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)

800

max_views = self.params.get('max_views')

801

if max_views is not None and view_count > max_views:

802

return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)

803

if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):

804

return 'Skipping "%s" because it is age restricted' % video_title

805

if self.in_download_archive(info_dict):

806

return '%s has already been recorded in archive' % video_title

807

808

if not incomplete:

809

match_filter = self.params.get('match_filter')

810

if match_filter is not None:

811

ret = match_filter(info_dict)

if ret is not None:

return ret

return None

@staticmethod

def add_extra_info(info_dict, extra_info):

819

'''Set the keys from extra_info in info dict if they are missing'''

820

for key, value in extra_info.items():

821

info_dict.setdefault(key, value)

822

823

def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},

824

process=True, force_generic_extractor=False):

825

'''

826

Returns a list with a dictionary for each video we find.

827

If 'download', also downloads the videos.

828

extra_info is a dict containing the extra values to add to each result

829

'''

830

831

if not ie_key and force_generic_extractor:

ie_key = 'Generic'

if ie_key:

ies = [self.get_info_extractor(ie_key)]

else:

ies = self._ies

for ie in ies:

if not ie.suitable(url):

continue

ie_key = ie.ie_key()

ie = self.get_info_extractor(ie_key)

845

if not ie.working():

846

self.report_warning('The program functionality for this site has been marked as broken, '

847

'and will probably not work.')

848

849

try:

850

temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)

851

except (AssertionError, IndexError, AttributeError):

852

temp_id = None

853

if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):

854

self.to_screen("[%s] %s: has already been recorded in archive" % (

ie_key, temp_id))

break

return self.__extract_info(url, ie, download, extra_info, process, info_dict)

859

860

else:

861

self.report_error('no suitable InfoExtractor for URL %s' % url)

862

863

def __handle_extraction_exceptions(func):

864

def wrapper(self, *args, **kwargs):

865

try:

866

return func(self, *args, **kwargs)

867

except GeoRestrictedError as e:

868

msg = e.msg

869

if e.countries:

870

msg += '\nThis video is available in %s.' % ', '.join(

871

map(ISO3166Utils.short2full, e.countries))

872

msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'

873

self.report_error(msg)

874

except ExtractorError as e: # An error we somewhat expected

875

self.report_error(compat_str(e), e.format_traceback())

876

except MaxDownloadsReached:

877

raise

878

except Exception as e:

879

if self.params.get('ignoreerrors', False):

880

self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))

else:

raise

return wrapper

@__handle_extraction_exceptions

886

def __extract_info(self, url, ie, download, extra_info, process, info_dict):

887

ie_result = ie.extract(url)

888

if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)

889

return

890

if isinstance(ie_result, list):

891

# Backwards compatibility: old IE result format

892

ie_result = {

893

'_type': 'compat_list',

894

'entries': ie_result,

895

}

896

if info_dict:

897

if info_dict.get('id'):

898

ie_result['id'] = info_dict['id']

899

if info_dict.get('title'):

900

ie_result['title'] = info_dict['title']

901

self.add_default_extra_info(ie_result, ie, url)

902

if process:

903

return self.process_ie_result(ie_result, download, extra_info)

else:

return ie_result

def add_default_extra_info(self, ie_result, ie, url):

908

self.add_extra_info(ie_result, {

909

'extractor': ie.IE_NAME,

910

'webpage_url': url,

911

'duration_string': (

912

formatSeconds(ie_result['duration'], '-')

913

if ie_result.get('duration', None) is not None

914

else None),

915

'webpage_url_basename': url_basename(url),

916

'extractor_key': ie.ie_key(),

917

})

918

919

def process_ie_result(self, ie_result, download=True, extra_info={}):

920

"""

921

Take the result of the ie(may be modified) and resolve all unresolved

922

references (URLs, playlist items).

923

924

It will also download the videos if 'download'.

925

Returns the resolved ie_result.

926

"""

927

result_type = ie_result.get('_type', 'video')

928

929

if result_type in ('url', 'url_transparent'):

930

ie_result['url'] = sanitize_url(ie_result['url'])

931

extract_flat = self.params.get('extract_flat', False)

932

if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)

933

or extract_flat is True):

934

self.__forced_printings(

935

ie_result, self.prepare_filename(ie_result),

incomplete=True)

return ie_result

if result_type == 'video':

940

self.add_extra_info(ie_result, extra_info)

941

return self.process_video_result(ie_result, download=download)

942

elif result_type == 'url':

943

# We have to add extra_info to the results because it may be

944

# contained in a playlist

945

return self.extract_info(ie_result['url'],

946

download, info_dict=ie_result,

947

ie_key=ie_result.get('ie_key'),

948

extra_info=extra_info)

949

elif result_type == 'url_transparent':

950

# Use the information from the embedding page

951

info = self.extract_info(

952

ie_result['url'], ie_key=ie_result.get('ie_key'),

953

extra_info=extra_info, download=False, process=False)

954

955

# extract_info may return None when ignoreerrors is enabled and

956

# extraction failed with an error, don't crash and return early

# in this case

if not info:

return info

force_properties = dict(

962

(k, v) for k, v in ie_result.items() if v is not None)

963

for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):

964

if f in force_properties:

965

del force_properties[f]

966

new_result = info.copy()

967

new_result.update(force_properties)

968

969

# Extracted info may not be a video result (i.e.

970

# info.get('_type', 'video') != video) but rather an url or

971

# url_transparent. In such cases outer metadata (from ie_result)

972

# should be propagated to inner one (info). For this to happen

973

# _type of info should be overridden with url_transparent. This

974

# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.

975

if new_result.get('_type') == 'url':

976

new_result['_type'] = 'url_transparent'

977

978

return self.process_ie_result(

979

new_result, download=download, extra_info=extra_info)

980

elif result_type in ('playlist', 'multi_video'):

981

# We process each entry in the playlist

982

playlist = ie_result.get('title') or ie_result.get('id')

983

self.to_screen('[download] Downloading playlist: %s' % playlist)

984

985

playlist_results = []

986

987

playliststart = self.params.get('playliststart', 1) - 1

988

playlistend = self.params.get('playlistend')

989

# For backwards compatibility, interpret -1 as whole list

990

if playlistend == -1:

991

playlistend = None

992

993

playlistitems_str = self.params.get('playlist_items')

994

playlistitems = None

995

if playlistitems_str is not None:

996

def iter_playlistitems(format):

997

for string_segment in format.split(','):

998

if '-' in string_segment:

999

start, end = string_segment.split('-')

1000

for item in range(int(start), int(end) + 1):

1001

yield int(item)

1002

else:

1003

yield int(string_segment)

1004

playlistitems = orderedSet(iter_playlistitems(playlistitems_str))

1005

1006

ie_entries = ie_result['entries']

1007

1008

def make_playlistitems_entries(list_ie_entries):

1009

num_entries = len(list_ie_entries)

1010

return [

1011

list_ie_entries[i - 1] for i in playlistitems

1012

if -num_entries <= i - 1 < num_entries]

1013

1014

def report_download(num_entries):

1015

self.to_screen(

1016

'[%s] playlist %s: Downloading %d videos' %

1017

(ie_result['extractor'], playlist, num_entries))

1018

1019

if isinstance(ie_entries, list):

1020

n_all_entries = len(ie_entries)

1021

if playlistitems:

1022

entries = make_playlistitems_entries(ie_entries)

1023

else:

1024

entries = ie_entries[playliststart:playlistend]

1025

n_entries = len(entries)

1026

self.to_screen(

1027

'[%s] playlist %s: Collected %d video ids (downloading %d of them)' %

1028

(ie_result['extractor'], playlist, n_all_entries, n_entries))

1029

elif isinstance(ie_entries, PagedList):

1030

if playlistitems:

1031

entries = []

1032

for item in playlistitems:

1033

entries.extend(ie_entries.getslice(

item - 1, item

))

else:

entries = ie_entries.getslice(

1038

playliststart, playlistend)

1039

n_entries = len(entries)

1040

report_download(n_entries)

1041

else: # iterable

1042

if playlistitems:

1043

entries = make_playlistitems_entries(list(itertools.islice(

1044

ie_entries, 0, max(playlistitems))))

1045

else:

1046

entries = list(itertools.islice(

1047

ie_entries, playliststart, playlistend))

1048

n_entries = len(entries)

1049

report_download(n_entries)

1050

1051

if self.params.get('playlistreverse', False):

1052

entries = entries[::-1]

1053

1054

if self.params.get('playlistrandom', False):

1055

random.shuffle(entries)

1056

1057

x_forwarded_for = ie_result.get('__x_forwarded_for_ip')

1058

1059

for i, entry in enumerate(entries, 1):

1060

self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))

1061

# This __x_forwarded_for_ip thing is a bit ugly but requires

1062

# minimal changes

1063

if x_forwarded_for:

1064

entry['__x_forwarded_for_ip'] = x_forwarded_for

1065

extra = {

1066

'n_entries': n_entries,

1067

'playlist': playlist,

1068

'playlist_id': ie_result.get('id'),

1069

'playlist_title': ie_result.get('title'),

1070

'playlist_uploader': ie_result.get('uploader'),

1071

'playlist_uploader_id': ie_result.get('uploader_id'),

1072

'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,

1073

'extractor': ie_result['extractor'],

1074

'webpage_url': ie_result['webpage_url'],

1075

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1076

'extractor_key': ie_result['extractor_key'],

1077

}

1078

1079

reason = self._match_entry(entry, incomplete=True)

1080

if reason is not None:

1081

if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):

1082

print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')

1083

break

1084

else:

1085

self.to_screen('[download] ' + reason)

1086

continue

1087

1088

entry_result = self.__process_iterable_entry(entry, download, extra)

1089

# TODO: skip failed (empty) entries?

1090

playlist_results.append(entry_result)

1091

ie_result['entries'] = playlist_results

1092

self.to_screen('[download] Finished downloading playlist: %s' % playlist)

1093

return ie_result

1094

elif result_type == 'compat_list':

1095

self.report_warning(

1096

'Extractor %s returned a compat_list result. '

1097

'It needs to be updated.' % ie_result.get('extractor'))

def _fixup(r):

self.add_extra_info(

r,

{

'extractor': ie_result['extractor'],

1104

'webpage_url': ie_result['webpage_url'],

1105

'webpage_url_basename': url_basename(ie_result['webpage_url']),

1106

'extractor_key': ie_result['extractor_key'],

}

)

return r

ie_result['entries'] = [

1111

self.process_ie_result(_fixup(r), download, extra_info)

1112

for r in ie_result['entries']

]

return ie_result

else:

raise Exception('Invalid result type: %s' % result_type)

1117

1118

@__handle_extraction_exceptions

1119

def __process_iterable_entry(self, entry, download, extra_info):

1120

return self.process_ie_result(

1121

entry, download=download, extra_info=extra_info)

1122

1123

def _build_format_filter(self, filter_spec):

1124

" Returns a function to filter the formats according to the filter_spec "

OPERATORS = {

'<': operator.lt,

'<=': operator.le,

'>': operator.gt,

'>=': operator.ge,

'=': operator.eq,

'!=': operator.ne,

}

operator_rex = re.compile(r'''(?x)\s*

1135

(?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)

1136

\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*

1137

(?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)

1138

$

1139

''' % '|'.join(map(re.escape, OPERATORS.keys())))

1140

m = operator_rex.search(filter_spec)

1141

if m:

1142

try:

1143

comparison_value = int(m.group('value'))

1144

except ValueError:

1145

comparison_value = parse_filesize(m.group('value'))

1146

if comparison_value is None:

1147

comparison_value = parse_filesize(m.group('value') + 'B')

1148

if comparison_value is None:

1149

raise ValueError(

1150

'Invalid value %r in format specification %r' % (

1151

m.group('value'), filter_spec))

1152

op = OPERATORS[m.group('op')]

if not m:

STR_OPERATORS = {

'=': operator.eq,

'^=': lambda attr, value: attr.startswith(value),

1158

'$=': lambda attr, value: attr.endswith(value),

1159

'*=': lambda attr, value: value in attr,

1160

}

1161

str_operator_rex = re.compile(r'''(?x)

1162

\s*(?P<key>[a-zA-Z0-9._-]+)

1163

\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?

1164

\s*(?P<value>[a-zA-Z0-9._-]+)

1165

\s*$

1166

''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))

1167

m = str_operator_rex.search(filter_spec)

1168

if m:

1169

comparison_value = m.group('value')

1170

str_op = STR_OPERATORS[m.group('op')]

1171

if m.group('negation'):

1172

op = lambda attr, value: not str_op(attr, value)

else:

op = str_op

if not m:

raise ValueError('Invalid filter specification %r' % filter_spec)

1178

1179

def _filter(f):

1180

actual_value = f.get(m.group('key'))

1181

if actual_value is None:

1182

return m.group('none_inclusive')

1183

return op(actual_value, comparison_value)

1184

return _filter

1185

1186

def _default_format_spec(self, info_dict, download=True):

1187

1188

def can_merge():

1189

merger = FFmpegMergerPP(self)

1190

return merger.available and merger.can_merge()

1191

1192

prefer_best = (

1193

not self.params.get('simulate', False)

and download

and (

not can_merge()

or info_dict.get('is_live')

1198

or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))

1199

1200

return (

1201

'best/bestvideo+bestaudio'

1202

if prefer_best

1203

else 'bestvideo*+bestaudio/best'

1204

if self.params.get('allow_multiple_audio_streams', False)

1205

else 'bestvideo+bestaudio/best')

1206

1207

def build_format_selector(self, format_spec):

1208

def syntax_error(note, start):

1209

message = (

1210

'Invalid format specification: '

1211

'{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))

1212

return SyntaxError(message)

1213

1214

PICKFIRST = 'PICKFIRST'

MERGE = 'MERGE'

SINGLE = 'SINGLE'

GROUP = 'GROUP'

FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])

1219

1220

allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),

1221

'video': self.params.get('allow_multiple_video_streams', False)}

1222

1223

def _parse_filter(tokens):

1224

filter_parts = []

1225

for type, string, start, _, _ in tokens:

1226

if type == tokenize.OP and string == ']':

1227

return ''.join(filter_parts)

1228

else:

1229

filter_parts.append(string)

1230

1231

def _remove_unused_ops(tokens):

1232

# Remove operators that we don't use and join them with the surrounding strings

1233

# for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'

1234

ALLOWED_OPS = ('/', '+', ',', '(', ')')

1235

last_string, last_start, last_end, last_line = None, None, None, None

1236

for type, string, start, end, line in tokens:

1237

if type == tokenize.OP and string == '[':

1238

if last_string:

1239

yield tokenize.NAME, last_string, last_start, last_end, last_line

1240

last_string = None

1241

yield type, string, start, end, line

1242

# everything inside brackets will be handled by _parse_filter

1243

for type, string, start, end, line in tokens:

1244

yield type, string, start, end, line

1245

if type == tokenize.OP and string == ']':

1246

break

1247

elif type == tokenize.OP and string in ALLOWED_OPS:

1248

if last_string:

1249

yield tokenize.NAME, last_string, last_start, last_end, last_line

1250

last_string = None

1251

yield type, string, start, end, line

1252

elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:

if not last_string:

last_string = string

last_start = start

last_end = end

else:

last_string += string

1259

if last_string:

1260

yield tokenize.NAME, last_string, last_start, last_end, last_line

1261

1262

def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):

1263

selectors = []

1264

current_selector = None

1265

for type, string, start, _, _ in tokens:

1266

# ENCODING is only defined in python 3.x

1267

if type == getattr(tokenize, 'ENCODING', None):

1268

continue

1269

elif type in [tokenize.NAME, tokenize.NUMBER]:

1270

current_selector = FormatSelector(SINGLE, string, [])

1271

elif type == tokenize.OP:

1272

if string == ')':

1273

if not inside_group:

1274

# ')' will be handled by the parentheses group

1275

tokens.restore_last_token()

1276

break

1277

elif inside_merge and string in ['/', ',']:

1278

tokens.restore_last_token()

1279

break

1280

elif inside_choice and string == ',':

1281

tokens.restore_last_token()

1282

break

1283

elif string == ',':

1284

if not current_selector:

1285

raise syntax_error('"," must follow a format selector', start)

1286

selectors.append(current_selector)

1287

current_selector = None

1288

elif string == '/':

1289

if not current_selector:

1290

raise syntax_error('"/" must follow a format selector', start)

1291

first_choice = current_selector

1292

second_choice = _parse_format_selection(tokens, inside_choice=True)

1293

current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])

1294

elif string == '[':

1295

if not current_selector:

1296

current_selector = FormatSelector(SINGLE, 'best', [])

1297

format_filter = _parse_filter(tokens)

1298

current_selector.filters.append(format_filter)

1299

elif string == '(':

1300

if current_selector:

1301

raise syntax_error('Unexpected "("', start)

1302

group = _parse_format_selection(tokens, inside_group=True)

1303

current_selector = FormatSelector(GROUP, group, [])

1304

elif string == '+':

1305

if not current_selector:

1306

raise syntax_error('Unexpected "+"', start)

1307

selector_1 = current_selector

1308

selector_2 = _parse_format_selection(tokens, inside_merge=True)

1309

if not selector_2:

1310

raise syntax_error('Expected a selector', start)

1311

current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])

1312

else:

1313

raise syntax_error('Operator not recognized: "{0}"'.format(string), start)

1314

elif type == tokenize.ENDMARKER:

1315

break

1316

if current_selector:

1317

selectors.append(current_selector)

1318

return selectors

1319

1320

def _build_selector_function(selector):

1321

if isinstance(selector, list): # ,

1322

fs = [_build_selector_function(s) for s in selector]

1323

1324

def selector_function(ctx):

1325

for f in fs:

1326

for format in f(ctx):

1327

yield format

1328

return selector_function

1329

1330

elif selector.type == GROUP: # ()

1331

selector_function = _build_selector_function(selector.selector)

1332

1333

elif selector.type == PICKFIRST: # /

1334

fs = [_build_selector_function(s) for s in selector.selector]

1335

1336

def selector_function(ctx):

1337

for f in fs:

1338

picked_formats = list(f(ctx))

1339

if picked_formats:

1340

return picked_formats

1341

return []

1342

1343

elif selector.type == SINGLE: # atom

1344

format_spec = selector.selector if selector.selector is not None else 'best'

1345

1346

if format_spec == 'all':

1347

def selector_function(ctx):

1348

formats = list(ctx['formats'])

if formats:

for f in formats:

yield f

else:

format_fallback = False

1355

1356

if format_spec_obj is not None:

1357

format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1

1358

format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False

1359

not_format_type = 'v' if format_type == 'a' else 'a'

1360

format_modified = format_spec_obj.group(3) is not None

1361

1362

format_fallback = not format_type and not format_modified # for b, w

1363

filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')

1364

if format_type and format_modified # bv*, ba*, wv*, wa*

1365

else (lambda f: f.get(not_format_type + 'codec') == 'none')

1366

if format_type # bv, ba, wv, wa

1367

else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')

1368

if not format_modified # b, w

else None) # b*, w*

else:

format_idx = -1

filter_f = ((lambda f: f.get('ext') == format_spec)

1373

if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension

1374

else (lambda f: f.get('format_id') == format_spec)) # id

1375

1376

def selector_function(ctx):

1377

formats = list(ctx['formats'])

1378

if not formats:

1379

return

1380

matches = list(filter(filter_f, formats)) if filter_f is not None else formats

1381

if matches:

1382

yield matches[format_idx]

1383

elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):

1384

# for extractors with incomplete formats (audio only (soundcloud)

1385

# or video only (imgur)) best/worst will fallback to

1386

# best/worst {video,audio}-only format

1387

yield formats[format_idx]

1388

1389

elif selector.type == MERGE: # +

1390

def _merge(formats_pair):

1391

format_1, format_2 = formats_pair

1392

1393

formats_info = []

1394

formats_info.extend(format_1.get('requested_formats', (format_1,)))

1395

formats_info.extend(format_2.get('requested_formats', (format_2,)))

1396

1397

if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:

1398

get_no_more = {"video": False, "audio": False}

1399

for (i, fmt_info) in enumerate(formats_info):

1400

for aud_vid in ["audio", "video"]:

1401

if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':

1402

if get_no_more[aud_vid]:

1403

formats_info.pop(i)

1404

get_no_more[aud_vid] = True

1405

1406

if len(formats_info) == 1:

1407

return formats_info[0]

1408

1409

video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']

1410

audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']

1411

1412

the_only_video = video_fmts[0] if len(video_fmts) == 1 else None

1413

the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None

1414

1415

output_ext = self.params.get('merge_output_format')

1416

if not output_ext:

1417

if the_only_video:

1418

output_ext = the_only_video['ext']

1419

elif the_only_audio and not video_fmts:

1420

output_ext = the_only_audio['ext']

else:

output_ext = 'mkv'

new_dict = {

'requested_formats': formats_info,

1426

'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),

1427

'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),

'ext': output_ext,

}

if the_only_video:

new_dict.update({

'width': the_only_video.get('width'),

1434

'height': the_only_video.get('height'),

1435

'resolution': the_only_video.get('resolution'),

1436

'fps': the_only_video.get('fps'),

1437

'vcodec': the_only_video.get('vcodec'),

1438

'vbr': the_only_video.get('vbr'),

1439

'stretched_ratio': the_only_video.get('stretched_ratio'),

})

if the_only_audio:

new_dict.update({

'acodec': the_only_audio.get('acodec'),

1445

'abr': the_only_audio.get('abr'),

})

return new_dict

selector_1, selector_2 = map(_build_selector_function, selector.selector)

1451

1452

def selector_function(ctx):

1453

for pair in itertools.product(

1454

selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):

1455

yield _merge(pair)

1456

1457

filters = [self._build_format_filter(f) for f in selector.filters]

1458

1459

def final_selector(ctx):

1460

ctx_copy = copy.deepcopy(ctx)

1461

for _filter in filters:

1462

ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))

1463

return selector_function(ctx_copy)

1464

return final_selector

1465

1466

stream = io.BytesIO(format_spec.encode('utf-8'))

1467

try:

1468

tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))

1469

except tokenize.TokenError:

1470

raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))

1471

1472

class TokenIterator(object):

1473

def __init__(self, tokens):

self.tokens = tokens

self.counter = 0

def __iter__(self):

return self

def __next__(self):

if self.counter >= len(self.tokens):

1482

raise StopIteration()

1483

value = self.tokens[self.counter]

self.counter += 1

return value

next = __next__

def restore_last_token(self):

1490

self.counter -= 1

1491

1492

parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))

1493

return _build_selector_function(parsed_selector)

1494

1495

def _calc_headers(self, info_dict):

1496

res = std_headers.copy()

1497

1498

add_headers = info_dict.get('http_headers')

1499

if add_headers:

1500

res.update(add_headers)

1501

1502

cookies = self._calc_cookies(info_dict)

1503

if cookies:

1504

res['Cookie'] = cookies

1505

1506

if 'X-Forwarded-For' not in res:

1507

x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')

1508

if x_forwarded_for_ip:

1509

res['X-Forwarded-For'] = x_forwarded_for_ip

return res

def _calc_cookies(self, info_dict):

1514

pr = sanitized_Request(info_dict['url'])

1515

self.cookiejar.add_cookie_header(pr)

1516

return pr.get_header('Cookie')

1517

1518

def process_video_result(self, info_dict, download=True):

1519

assert info_dict.get('_type', 'video') == 'video'

1520

1521

if 'id' not in info_dict:

1522

raise ExtractorError('Missing "id" field in extractor result')

1523

if 'title' not in info_dict:

1524

raise ExtractorError('Missing "title" field in extractor result')

1525

1526

def report_force_conversion(field, field_not, conversion):

1527

self.report_warning(

1528

'"%s" field is not %s - forcing %s conversion, there is an error in extractor'

1529

% (field, field_not, conversion))

1530

1531

def sanitize_string_field(info, string_field):

1532

field = info.get(string_field)

1533

if field is None or isinstance(field, compat_str):

1534

return

1535

report_force_conversion(string_field, 'a string', 'string')

1536

info[string_field] = compat_str(field)

1537

1538

def sanitize_numeric_fields(info):

1539

for numeric_field in self._NUMERIC_FIELDS:

1540

field = info.get(numeric_field)

1541

if field is None or isinstance(field, compat_numeric_types):

1542

continue

1543

report_force_conversion(numeric_field, 'numeric', 'int')

1544

info[numeric_field] = int_or_none(field)

1545

1546

sanitize_string_field(info_dict, 'id')

1547

sanitize_numeric_fields(info_dict)

1548

1549

if 'playlist' not in info_dict:

1550

# It isn't part of a playlist

1551

info_dict['playlist'] = None

1552

info_dict['playlist_index'] = None

1553

1554

thumbnails = info_dict.get('thumbnails')

1555

if thumbnails is None:

1556

thumbnail = info_dict.get('thumbnail')

1557

if thumbnail:

1558

info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]

1559

if thumbnails:

1560

thumbnails.sort(key=lambda t: (

1561

t.get('preference') if t.get('preference') is not None else -1,

1562

t.get('width') if t.get('width') is not None else -1,

1563

t.get('height') if t.get('height') is not None else -1,

1564

t.get('id') if t.get('id') is not None else '', t.get('url')))

1565

for i, t in enumerate(thumbnails):

1566

t['url'] = sanitize_url(t['url'])

1567

if t.get('width') and t.get('height'):

1568

t['resolution'] = '%dx%d' % (t['width'], t['height'])

1569

if t.get('id') is None:

1570

t['id'] = '%d' % i

1571

1572

if self.params.get('list_thumbnails'):

1573

self.list_thumbnails(info_dict)

1574

return

1575

1576

thumbnail = info_dict.get('thumbnail')

1577

if thumbnail:

1578

info_dict['thumbnail'] = sanitize_url(thumbnail)

1579

elif thumbnails:

1580

info_dict['thumbnail'] = thumbnails[-1]['url']

1581

1582

if 'display_id' not in info_dict and 'id' in info_dict:

1583

info_dict['display_id'] = info_dict['id']

1584

1585

if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:

1586

# Working around out-of-range timestamp values (e.g. negative ones on Windows,

1587

# see http://bugs.python.org/issue1646728)

1588

try:

1589

upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])

1590

info_dict['upload_date'] = upload_date.strftime('%Y%m%d')

1591

except (ValueError, OverflowError, OSError):

1592

pass

1593

1594

# Auto generate title fields corresponding to the *_number fields when missing

1595

# in order to always have clean titles. This is very common for TV series.

1596

for field in ('chapter', 'season', 'episode'):

1597

if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):

1598

info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])

1599

1600

for cc_kind in ('subtitles', 'automatic_captions'):

1601

cc = info_dict.get(cc_kind)

1602

if cc:

1603

for _, subtitle in cc.items():

1604

for subtitle_format in subtitle:

1605

if subtitle_format.get('url'):

1606

subtitle_format['url'] = sanitize_url(subtitle_format['url'])

1607

if subtitle_format.get('ext') is None:

1608

subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()

1609

1610

automatic_captions = info_dict.get('automatic_captions')

1611

subtitles = info_dict.get('subtitles')

1612

1613

if self.params.get('listsubtitles', False):

1614

if 'automatic_captions' in info_dict:

1615

self.list_subtitles(

1616

info_dict['id'], automatic_captions, 'automatic captions')

1617

self.list_subtitles(info_dict['id'], subtitles, 'subtitles')

1618

return

1619

1620

info_dict['requested_subtitles'] = self.process_subtitles(

1621

info_dict['id'], subtitles, automatic_captions)

1622

1623

# We now pick which formats have to be downloaded

1624

if info_dict.get('formats') is None:

1625

# There's only one format available

1626

formats = [info_dict]

1627

else:

1628

formats = info_dict['formats']

1629

1630

if not formats:

1631

raise ExtractorError('No video formats found!')

1632

1633

def is_wellformed(f):

url = f.get('url')

if not url:

self.report_warning(

'"url" field is missing or empty - skipping format, '

1638

'there is an error in extractor')

1639

return False

1640

if isinstance(url, bytes):

1641

sanitize_string_field(f, 'url')

1642

return True

1643

1644

# Filter out malformed formats for better extraction robustness

1645

formats = list(filter(is_wellformed, formats))

formats_dict = {}

# We check that all the formats have the format and format_id fields

1650

for i, format in enumerate(formats):

1651

sanitize_string_field(format, 'format_id')

1652

sanitize_numeric_fields(format)

1653

format['url'] = sanitize_url(format['url'])

1654

if not format.get('format_id'):

1655

format['format_id'] = compat_str(i)

1656

else:

1657

# Sanitize format_id from characters used in format selector expression

1658

format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])

1659

format_id = format['format_id']

1660

if format_id not in formats_dict:

1661

formats_dict[format_id] = []

1662

formats_dict[format_id].append(format)

1663

1664

# Make sure all formats have unique format_id

1665

for format_id, ambiguous_formats in formats_dict.items():

1666

if len(ambiguous_formats) > 1:

1667

for i, format in enumerate(ambiguous_formats):

1668

format['format_id'] = '%s-%d' % (format_id, i)

1669

1670

for i, format in enumerate(formats):

1671

if format.get('format') is None:

1672

format['format'] = '{id} - {res}{note}'.format(

1673

id=format['format_id'],

1674

res=self.format_resolution(format),

1675

note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',

1676

)

1677

# Automatically determine file extension if missing

1678

if format.get('ext') is None:

1679

format['ext'] = determine_ext(format['url']).lower()

1680

# Automatically determine protocol if missing (useful for format

1681

# selection purposes)

1682

if format.get('protocol') is None:

1683

format['protocol'] = determine_protocol(format)

1684

# Add HTTP headers, so that external programs can use them from the

1685

# json output

1686

full_format_info = info_dict.copy()

1687

full_format_info.update(format)

1688

format['http_headers'] = self._calc_headers(full_format_info)

1689

# Remove private housekeeping stuff

1690

if '__x_forwarded_for_ip' in info_dict:

1691

del info_dict['__x_forwarded_for_ip']

1692

1693

# TODO Central sorting goes here

1694

1695

if formats[0] is not info_dict:

1696

# only set the 'formats' fields if the original info_dict list them

1697

# otherwise we end up with a circular reference, the first (and unique)

1698

# element in the 'formats' field in info_dict is info_dict itself,

1699

# which can't be exported to json

1700

info_dict['formats'] = formats

1701

if self.params.get('listformats'):

1702

self.list_formats(info_dict)

1703

return

1704

1705

req_format = self.params.get('format')

1706

if req_format is None:

1707

req_format = self._default_format_spec(info_dict, download=download)

1708

if self.params.get('verbose'):

1709

self._write_string('[debug] Default format spec: %s\n' % req_format)

1710

1711

format_selector = self.build_format_selector(req_format)

1712

1713

# While in format selection we may need to have an access to the original

1714

# format set in order to calculate some metrics or do some processing.

1715

# For now we need to be able to guess whether original formats provided

1716

# by extractor are incomplete or not (i.e. whether extractor provides only

1717

# video-only or audio-only formats) for proper formats selection for

1718

# extractors with such incomplete formats (see

1719

# https://github.com/ytdl-org/youtube-dl/pull/5556).

1720

# Since formats may be filtered during format selection and may not match

1721

# the original formats the results may be incorrect. Thus original formats

1722

# or pre-calculated metrics should be passed to format selection routines

1723

# as well.

1724

# We will pass a context object containing all necessary additional data

1725

# instead of just formats.

1726

# This fixes incorrect format selection issue (see

1727

# https://github.com/ytdl-org/youtube-dl/issues/10083).

1728

incomplete_formats = (

1729

# All formats are video-only or

1730

all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)

1731

# all formats are audio-only

1732

or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))

ctx = {

'formats': formats,

'incomplete_formats': incomplete_formats,

1737

}

1738

1739

formats_to_download = list(format_selector(ctx))

1740

if not formats_to_download:

1741

raise ExtractorError('requested format not available',

expected=True)

if download:

self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))

1746

if len(formats_to_download) > 1:

1747

self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))

1748

for format in formats_to_download:

1749

new_info = dict(info_dict)

1750

new_info.update(format)

1751

self.process_info(new_info)

1752

# We update the info dict with the best quality format (backwards compatibility)

1753

info_dict.update(formats_to_download[-1])

1754

return info_dict

1755

1756

def process_subtitles(self, video_id, normal_subtitles, automatic_captions):

1757

"""Select the requested subtitles and their format"""

1758

available_subs = {}

1759

if normal_subtitles and self.params.get('writesubtitles'):

1760

available_subs.update(normal_subtitles)

1761

if automatic_captions and self.params.get('writeautomaticsub'):

1762

for lang, cap_info in automatic_captions.items():

1763

if lang not in available_subs:

1764

available_subs[lang] = cap_info

1765

1766

if (not self.params.get('writesubtitles') and not

1767

self.params.get('writeautomaticsub') or not

available_subs):

return None

if self.params.get('allsubtitles', False):

1772

requested_langs = available_subs.keys()

1773

else:

1774

if self.params.get('subtitleslangs', False):

1775

requested_langs = self.params.get('subtitleslangs')

1776

elif 'en' in available_subs:

1777

requested_langs = ['en']

1778

else:

1779

requested_langs = [list(available_subs.keys())[0]]

1780

1781

formats_query = self.params.get('subtitlesformat', 'best')

1782

formats_preference = formats_query.split('/') if formats_query else []

1783

subs = {}

1784

for lang in requested_langs:

1785

formats = available_subs.get(lang)

1786

if formats is None:

1787

self.report_warning('%s subtitles not available for %s' % (lang, video_id))

1788

continue

1789

for ext in formats_preference:

if ext == 'best':

f = formats[-1]

break

matches = list(filter(lambda f: f['ext'] == ext, formats))

if matches:

f = matches[-1]

break

else:

f = formats[-1]

self.report_warning(

'No subtitle format found matching "%s" for language %s, '

1801

'using %s' % (formats_query, lang, f['ext']))

subs[lang] = f

return subs

def __forced_printings(self, info_dict, filename, incomplete):

1806

def print_mandatory(field):

1807

if (self.params.get('force%s' % field, False)

1808

and (not incomplete or info_dict.get(field) is not None)):

1809

self.to_stdout(info_dict[field])

1810

1811

def print_optional(field):

1812

if (self.params.get('force%s' % field, False)

1813

and info_dict.get(field) is not None):

1814

self.to_stdout(info_dict[field])

1815

1816

print_mandatory('title')

1817

print_mandatory('id')

1818

if self.params.get('forceurl', False) and not incomplete:

1819

if info_dict.get('requested_formats') is not None:

1820

for f in info_dict['requested_formats']:

1821

self.to_stdout(f['url'] + f.get('play_path', ''))

1822

else:

1823

# For RTMP URLs, also include the playpath

1824

self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))

1825

print_optional('thumbnail')

1826

print_optional('description')

1827

if self.params.get('forcefilename', False) and filename is not None:

1828

self.to_stdout(filename)

1829

if self.params.get('forceduration', False) and info_dict.get('duration') is not None:

1830

self.to_stdout(formatSeconds(info_dict['duration']))

1831

print_mandatory('format')

1832

if self.params.get('forcejson', False):

1833

self.to_stdout(json.dumps(info_dict))

1834

1835

def process_info(self, info_dict):

1836

"""Process a single resolved IE result."""

1837

1838

assert info_dict.get('_type', 'video') == 'video'

1839

1840

max_downloads = self.params.get('max_downloads')

1841

if max_downloads is not None:

1842

if self._num_downloads >= int(max_downloads):

1843

raise MaxDownloadsReached()

1844

1845

# TODO: backward compatibility, to be removed

1846

info_dict['fulltitle'] = info_dict['title']

1847

1848

if 'format' not in info_dict:

1849

info_dict['format'] = info_dict['ext']

1850

1851

reason = self._match_entry(info_dict, incomplete=False)

1852

if reason is not None:

1853

self.to_screen('[download] ' + reason)

1854

return

1855

1856

self._num_downloads += 1

1857

1858

info_dict['_filename'] = filename = self.prepare_filename(info_dict)

1859

1860

# Forced printings

1861

self.__forced_printings(info_dict, filename, incomplete=False)

1862

1863

if self.params.get('simulate', False):

1864

if self.params.get('force_write_download_archive', False):

1865

self.record_download_archive(info_dict)

1866

1867

# Do nothing else if in simulate mode

return

if filename is None:

return

def ensure_dir_exists(path):

1874

try:

1875

dn = os.path.dirname(path)

1876

if dn and not os.path.exists(dn):

1877

os.makedirs(dn)

1878

return True

1879

except (OSError, IOError) as err:

1880

self.report_error('unable to create directory ' + error_to_compat_str(err))

1881

return False

1882

1883

if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):

1884

return

1885

1886

if self.params.get('writedescription', False):

1887

descfn = replace_extension(filename, 'description', info_dict.get('ext'))

1888

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):

1889

self.to_screen('[info] Video description is already present')

1890

elif info_dict.get('description') is None:

1891

self.report_warning('There\'s no description to write.')

1892

else:

1893

try:

1894

self.to_screen('[info] Writing video description to: ' + descfn)

1895

with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:

1896

descfile.write(info_dict['description'])

1897

except (OSError, IOError):

1898

self.report_error('Cannot write description file ' + descfn)

1899

return

1900

1901

if self.params.get('writeannotations', False):

1902

annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))

1903

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):

1904

self.to_screen('[info] Video annotations are already present')

1905

elif not info_dict.get('annotations'):

1906

self.report_warning('There are no annotations to write.')

1907

else:

1908

try:

1909

self.to_screen('[info] Writing video annotations to: ' + annofn)

1910

with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:

1911

annofile.write(info_dict['annotations'])

1912

except (KeyError, TypeError):

1913

self.report_warning('There are no annotations to write.')

1914

except (OSError, IOError):

1915

self.report_error('Cannot write annotations file: ' + annofn)

1916

return

1917

1918

def dl(name, info, subtitle=False):

1919

fd = get_suitable_downloader(info, self.params)(self, self.params)

1920

for ph in self._progress_hooks:

1921

fd.add_progress_hook(ph)

1922

if self.params.get('verbose'):

1923

self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))

1924

return fd.download(name, info, subtitle)

1925

1926

subtitles_are_requested = any([self.params.get('writesubtitles', False),

1927

self.params.get('writeautomaticsub')])

1928

1929

if subtitles_are_requested and info_dict.get('requested_subtitles'):

1930

# subtitles download errors are already managed as troubles in relevant IE

1931

# that way it will silently go on when used with unsupporting IE

1932

subtitles = info_dict['requested_subtitles']

1933

# ie = self.get_info_extractor(info_dict['extractor_key'])

1934

for sub_lang, sub_info in subtitles.items():

1935

sub_format = sub_info['ext']

1936

sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))

1937

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):

1938

self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))

1939

else:

1940

self.to_screen('[info] Writing video subtitles to: ' + sub_filename)

1941

if sub_info.get('data') is not None:

1942

try:

1943

# Use newline='' to prevent conversion of newline characters

1944

# See https://github.com/ytdl-org/youtube-dl/issues/10268

1945

with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:

1946

subfile.write(sub_info['data'])

1947

except (OSError, IOError):

1948

self.report_error('Cannot write subtitles file ' + sub_filename)

return

else:

try:

dl(sub_filename, sub_info, subtitle=True)

1953

'''

1954

if self.params.get('sleep_interval_subtitles', False):

1955

dl(sub_filename, sub_info)

1956

else:

1957

sub_data = ie._request_webpage(

1958

sub_info['url'], info_dict['id'], note=False).read()

1959

with io.open(encodeFilename(sub_filename), 'wb') as subfile:

1960

subfile.write(sub_data)

1961

'''

1962

except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

1963

self.report_warning('Unable to download subtitle for "%s": %s' %

1964

(sub_lang, error_to_compat_str(err)))

1965

continue

1966

1967

if self.params.get('skip_download', False):

1968

if self.params.get('convertsubtitles', False):

1969

subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))

1970

filename_real_ext = os.path.splitext(filename)[1][1:]

1971

filename_wo_ext = (

1972

os.path.splitext(filename)[0]

1973

if filename_real_ext == info_dict['ext']

1974

else filename)

1975

afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))

1976

if subconv.available:

1977

info_dict.setdefault('__postprocessors', [])

1978

# info_dict['__postprocessors'].append(subconv)

1979

if os.path.exists(encodeFilename(afilename)):

1980

self.to_screen(

1981

'[download] %s has already been downloaded and '

1982

'converted' % afilename)

1983

else:

1984

try:

1985

self.post_process(filename, info_dict)

1986

except (PostProcessingError) as err:

1987

self.report_error('postprocessing: %s' % str(err))

1988

return

1989

1990

if self.params.get('writeinfojson', False):

1991

infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))

1992

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):

1993

self.to_screen('[info] Video description metadata is already present')

1994

else:

1995

self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)

1996

try:

1997

write_json_file(self.filter_requested_info(info_dict), infofn)

1998

except (OSError, IOError):

1999

self.report_error('Cannot write metadata to JSON file ' + infofn)

2000

return

2001

2002

self._write_thumbnails(info_dict, filename)

2003

2004

# Write internet shortcut files

2005

url_link = webloc_link = desktop_link = False

2006

if self.params.get('writelink', False):

2007

if sys.platform == "darwin": # macOS.

2008

webloc_link = True

2009

elif sys.platform.startswith("linux"):

2010

desktop_link = True

2011

else: # if sys.platform in ['win32', 'cygwin']:

2012

url_link = True

2013

if self.params.get('writeurllink', False):

2014

url_link = True

2015

if self.params.get('writewebloclink', False):

2016

webloc_link = True

2017

if self.params.get('writedesktoplink', False):

2018

desktop_link = True

2019

2020

if url_link or webloc_link or desktop_link:

2021

if 'webpage_url' not in info_dict:

2022

self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')

2023

return

2024

ascii_url = iri_to_uri(info_dict['webpage_url'])

2025

2026

def _write_link_file(extension, template, newline, embed_filename):

2027

linkfn = replace_extension(filename, extension, info_dict.get('ext'))

2028

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):

2029

self.to_screen('[info] Internet shortcut is already present')

2030

else:

2031

try:

2032

self.to_screen('[info] Writing internet shortcut to: ' + linkfn)

2033

with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:

2034

template_vars = {'url': ascii_url}

2035

if embed_filename:

2036

template_vars['filename'] = linkfn[:-(len(extension) + 1)]

2037

linkfile.write(template % template_vars)

2038

except (OSError, IOError):

2039

self.report_error('Cannot write internet shortcut ' + linkfn)

return False

return True

if url_link:

if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):

2045

return

2046

if webloc_link:

2047

if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):

2048

return

2049

if desktop_link:

2050

if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):

return

# Download

must_record_download_archive = False

2055

if not self.params.get('skip_download', False):

2056

try:

2057

if info_dict.get('requested_formats') is not None:

2058

downloaded = []

2059

success = True

2060

merger = FFmpegMergerPP(self)

2061

if not merger.available:

2062

postprocessors = []

2063

self.report_warning('You have requested multiple '

2064

'formats but ffmpeg or avconv are not installed.'

2065

' The formats won\'t be merged.')

2066

else:

2067

postprocessors = [merger]

2068

2069

def compatible_formats(formats):

2070

# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.

2071

video_formats = [format for format in formats if format.get('vcodec') != 'none']

2072

audio_formats = [format for format in formats if format.get('acodec') != 'none']

2073

if len(video_formats) > 2 or len(audio_formats) > 2:

return False

# Check extension

exts = set(format.get('ext') for format in formats)

2078

COMPATIBLE_EXTS = (

2079

set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),

2080

set(('webm',)),

2081

)

2082

for ext_sets in COMPATIBLE_EXTS:

2083

if ext_sets.issuperset(exts):

2084

return True

2085

# TODO: Check acodec/vcodec

2086

return False

2087

2088

filename_real_ext = os.path.splitext(filename)[1][1:]

2089

filename_wo_ext = (

2090

os.path.splitext(filename)[0]

2091

if filename_real_ext == info_dict['ext']

2092

else filename)

2093

requested_formats = info_dict['requested_formats']

2094

if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):

2095

info_dict['ext'] = 'mkv'

2096

self.report_warning(

2097

'Requested formats are incompatible for merge and will be merged into mkv.')

2098

# Ensure filename always has a correct extension for successful merge

2099

filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])

2100

if os.path.exists(encodeFilename(filename)):

2101

self.to_screen(

2102

'[download] %s has already been downloaded and '

2103

'merged' % filename)

2104

else:

2105

for f in requested_formats:

2106

new_info = dict(info_dict)

2107

new_info.update(f)

2108

fname = prepend_extension(

2109

self.prepare_filename(new_info),

2110

'f%s' % f['format_id'], new_info['ext'])

2111

if not ensure_dir_exists(fname):

2112

return

2113

downloaded.append(fname)

2114

partial_success, real_download = dl(fname, new_info)

2115

success = success and partial_success

2116

info_dict['__postprocessors'] = postprocessors

2117

info_dict['__files_to_merge'] = downloaded

2118

# Even if there were no downloads, it is being merged only now

2119

info_dict['__real_download'] = True

2120

else:

2121

# Just a single file

2122

success, real_download = dl(filename, info_dict)

2123

info_dict['__real_download'] = real_download

2124

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2125

self.report_error('unable to download video data: %s' % error_to_compat_str(err))

2126

return

2127

except (OSError, IOError) as err:

2128

raise UnavailableVideoError(err)

2129

except (ContentTooShortError, ) as err:

2130

self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))

2131

return

2132

2133

if success and filename != '-':

2134

# Fixup content

2135

fixup_policy = self.params.get('fixup')

2136

if fixup_policy is None:

2137

fixup_policy = 'detect_or_warn'

2138

2139

INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'

2140

2141

stretched_ratio = info_dict.get('stretched_ratio')

2142

if stretched_ratio is not None and stretched_ratio != 1:

2143

if fixup_policy == 'warn':

2144

self.report_warning('%s: Non-uniform pixel ratio (%s)' % (

2145

info_dict['id'], stretched_ratio))

2146

elif fixup_policy == 'detect_or_warn':

2147

stretched_pp = FFmpegFixupStretchedPP(self)

2148

if stretched_pp.available:

2149

info_dict.setdefault('__postprocessors', [])

2150

info_dict['__postprocessors'].append(stretched_pp)

2151

else:

2152

self.report_warning(

2153

'%s: Non-uniform pixel ratio (%s). %s'

2154

% (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))

2155

else:

2156

assert fixup_policy in ('ignore', 'never')

2157

2158

if (info_dict.get('requested_formats') is None

2159

and info_dict.get('container') == 'm4a_dash'):

2160

if fixup_policy == 'warn':

2161

self.report_warning(

2162

'%s: writing DASH m4a. '

2163

'Only some players support this container.'

2164

% info_dict['id'])

2165

elif fixup_policy == 'detect_or_warn':

2166

fixup_pp = FFmpegFixupM4aPP(self)

2167

if fixup_pp.available:

2168

info_dict.setdefault('__postprocessors', [])

2169

info_dict['__postprocessors'].append(fixup_pp)

2170

else:

2171

self.report_warning(

2172

'%s: writing DASH m4a. '

2173

'Only some players support this container. %s'

2174

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2175

else:

2176

assert fixup_policy in ('ignore', 'never')

2177

2178

if (info_dict.get('protocol') == 'm3u8_native'

2179

or info_dict.get('protocol') == 'm3u8'

2180

and self.params.get('hls_prefer_native')):

2181

if fixup_policy == 'warn':

2182

self.report_warning('%s: malformed AAC bitstream detected.' % (

2183

info_dict['id']))

2184

elif fixup_policy == 'detect_or_warn':

2185

fixup_pp = FFmpegFixupM3u8PP(self)

2186

if fixup_pp.available:

2187

info_dict.setdefault('__postprocessors', [])

2188

info_dict['__postprocessors'].append(fixup_pp)

2189

else:

2190

self.report_warning(

2191

'%s: malformed AAC bitstream detected. %s'

2192

% (info_dict['id'], INSTALL_FFMPEG_MESSAGE))

2193

else:

2194

assert fixup_policy in ('ignore', 'never')

2195

2196

try:

2197

self.post_process(filename, info_dict)

2198

except (PostProcessingError) as err:

2199

self.report_error('postprocessing: %s' % str(err))

2200

return

2201

must_record_download_archive = True

2202

2203

if must_record_download_archive or self.params.get('force_write_download_archive', False):

2204

self.record_download_archive(info_dict)

2205

2206

def download(self, url_list):

2207

"""Download a given list of URLs."""

2208

outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)

2209

if (len(url_list) > 1

2210

and outtmpl != '-'

2211

and '%' not in outtmpl

2212

and self.params.get('max_downloads') != 1):

2213

raise SameFileError(outtmpl)

for url in url_list:

try:

# It also downloads the videos

2218

res = self.extract_info(

2219

url, force_generic_extractor=self.params.get('force_generic_extractor', False))

2220

except UnavailableVideoError:

2221

self.report_error('unable to download video')

2222

except MaxDownloadsReached:

2223

self.to_screen('[info] Maximum number of downloaded files reached.')

2224

raise

2225

else:

2226

if self.params.get('dump_single_json', False):

2227

self.to_stdout(json.dumps(res))

2228

2229

return self._download_retcode

2230

2231

def download_with_info_file(self, info_filename):

2232

with contextlib.closing(fileinput.FileInput(

2233

[info_filename], mode='r',

2234

openhook=fileinput.hook_encoded('utf-8'))) as f:

2235

# FileInput doesn't have a read method, we can't call json.load

2236

info = self.filter_requested_info(json.loads('\n'.join(f)))

2237

try:

2238

self.process_ie_result(info, download=True)

2239

except DownloadError:

2240

webpage_url = info.get('webpage_url')

2241

if webpage_url is not None:

2242

self.report_warning('The info failed to download, trying with "%s"' % webpage_url)

2243

return self.download([webpage_url])

2244

else:

2245

raise

2246

return self._download_retcode

2247

2248

@staticmethod

2249

def filter_requested_info(info_dict):

2250

return dict(

2251

(k, v) for k, v in info_dict.items()

2252

if k not in ['requested_formats', 'requested_subtitles'])

2253

2254

def post_process(self, filename, ie_info):

2255

"""Run all the postprocessors on the given file."""

2256

info = dict(ie_info)

2257

info['filepath'] = filename

2258

pps_chain = []

2259

if ie_info.get('__postprocessors') is not None:

2260

pps_chain.extend(ie_info['__postprocessors'])

2261

pps_chain.extend(self._pps)

for pp in pps_chain:

files_to_delete = []

try:

files_to_delete, info = pp.run(info)

2266

except PostProcessingError as e:

2267

self.report_error(e.msg)

2268

if files_to_delete and not self.params.get('keepvideo', False):

2269

for old_filename in set(files_to_delete):

2270

self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)

2271

try:

2272

os.remove(encodeFilename(old_filename))

2273

except (IOError, OSError):

2274

self.report_warning('Unable to remove downloaded original file')

2275

2276

def _make_archive_id(self, info_dict):

2277

video_id = info_dict.get('id')

2278

if not video_id:

2279

return

2280

# Future-proof against any change in case

2281

# and backwards compatibility with prior versions

2282

extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist

2283

if extractor is None:

2284

url = str_or_none(info_dict.get('url'))

2285

if not url:

2286

return

2287

# Try to find matching extractor for the URL and take its ie_key

2288

for ie in self._ies:

2289

if ie.suitable(url):

2290

extractor = ie.ie_key()

break

else:

return

return extractor.lower() + ' ' + video_id

2295

2296

def in_download_archive(self, info_dict):

2297

fn = self.params.get('download_archive')

if fn is None:

return False

vid_id = self._make_archive_id(info_dict)

2302

if not vid_id:

2303

return False # Incomplete video information

2304

2305

return vid_id in self.archive

2306

2307

def record_download_archive(self, info_dict):

2308

fn = self.params.get('download_archive')

2309

if fn is None:

2310

return

2311

vid_id = self._make_archive_id(info_dict)

2312

assert vid_id

2313

with locked_file(fn, 'a', encoding='utf-8') as archive_file:

2314

archive_file.write(vid_id + '\n')

2315

self.archive.add(vid_id)

2316

2317

@staticmethod

2318

def format_resolution(format, default='unknown'):

2319

if format.get('vcodec') == 'none':

2320

return 'audio only'

2321

if format.get('resolution') is not None:

2322

return format['resolution']

2323

if format.get('height') is not None:

2324

if format.get('width') is not None:

2325

res = '%sx%s' % (format['width'], format['height'])

2326

else:

2327

res = '%sp' % format['height']

2328

elif format.get('width') is not None:

2329

res = '%dx?' % format['width']

else:

res = default

return res

def _format_note(self, fdict):

2335

res = ''

2336

if fdict.get('ext') in ['f4f', 'f4m']:

2337

res += '(unsupported) '

2338

if fdict.get('language'):

2339

if res:

2340

res += ' '

2341

res += '[%s] ' % fdict['language']

2342

if fdict.get('format_note') is not None:

2343

res += fdict['format_note'] + ' '

2344

if fdict.get('tbr') is not None:

2345

res += '%4dk ' % fdict['tbr']

2346

if fdict.get('container') is not None:

2347

if res:

2348

res += ', '

2349

res += '%s container' % fdict['container']

2350

if (fdict.get('vcodec') is not None

2351

and fdict.get('vcodec') != 'none'):

2352

if res:

2353

res += ', '

2354

res += fdict['vcodec']

2355

if fdict.get('vbr') is not None:

2356

res += '@'

2357

elif fdict.get('vbr') is not None and fdict.get('abr') is not None:

2358

res += 'video@'

2359

if fdict.get('vbr') is not None:

2360

res += '%4dk' % fdict['vbr']

2361

if fdict.get('fps') is not None:

2362

if res:

2363

res += ', '

2364

res += '%sfps' % fdict['fps']

2365

if fdict.get('acodec') is not None:

2366

if res:

2367

res += ', '

2368

if fdict['acodec'] == 'none':

2369

res += 'video only'

2370

else:

2371

res += '%-5s' % fdict['acodec']

2372

elif fdict.get('abr') is not None:

if res:

res += ', '

res += 'audio'

if fdict.get('abr') is not None:

2377

res += '@%3dk' % fdict['abr']

2378

if fdict.get('asr') is not None:

2379

res += ' (%5dHz)' % fdict['asr']

2380

if fdict.get('filesize') is not None:

2381

if res:

2382

res += ', '

2383

res += format_bytes(fdict['filesize'])

2384

elif fdict.get('filesize_approx') is not None:

2385

if res:

2386

res += ', '

2387

res += '~' + format_bytes(fdict['filesize_approx'])

2388

return res

2389

2390

def _format_note_table(self, f):

2391

def join_fields(*vargs):

2392

return ', '.join((val for val in vargs if val != ''))

2393

2394

return join_fields(

2395

'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',

2396

format_field(f, 'language', '[%s]'),

2397

format_field(f, 'format_note'),

2398

format_field(f, 'container', ignore=(None, f.get('ext'))),

2399

format_field(f, 'asr', '%5dHz'))

2400

2401

def list_formats(self, info_dict):

2402

formats = info_dict.get('formats', [info_dict])

2403

new_format = self.params.get('listformats_table', False)

if new_format:

table = [

[

format_field(f, 'format_id'),

2408

format_field(f, 'ext'),

2409

self.format_resolution(f),

2410

format_field(f, 'fps', '%d'),

2411

'|',

2412

format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),

2413

format_field(f, 'tbr', '%4dk'),

2414

f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),

2415

'|',

2416

format_field(f, 'vcodec', default='unknown').replace('none', ''),

2417

format_field(f, 'vbr', '%4dk'),

2418

format_field(f, 'acodec', default='unknown').replace('none', ''),

2419

format_field(f, 'abr', '%3dk'),

2420

format_field(f, 'asr', '%5dHz'),

2421

self._format_note_table(f)]

2422

for f in formats

2423

if f.get('preference') is None or f['preference'] >= -1000]

2424

header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',

2425

'|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']

else:

table = [

[

format_field(f, 'format_id'),

2430

format_field(f, 'ext'),

2431

self.format_resolution(f),

2432

self._format_note(f)]

2433

for f in formats

2434

if f.get('preference') is None or f['preference'] >= -1000]

2435

header_line = ['format code', 'extension', 'resolution', 'note']

2436

2437

# if len(formats) > 1:

2438

# table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'

2439

self.to_screen(

2440

'[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(

header_line,

table,

delim=new_format,

extraGap=(0 if new_format else 1),

2445

hideEmpty=new_format)))

2446

2447

def list_thumbnails(self, info_dict):

2448

thumbnails = info_dict.get('thumbnails')

2449

if not thumbnails:

2450

self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])

return

self.to_screen(

'[info] Thumbnails for %s:' % info_dict['id'])

2455

self.to_screen(render_table(

2456

['ID', 'width', 'height', 'URL'],

2457

[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))

2458

2459

def list_subtitles(self, video_id, subtitles, name='subtitles'):

2460

if not subtitles:

2461

self.to_screen('%s has no %s' % (video_id, name))

2462

return

2463

self.to_screen(

2464

'Available %s for %s:' % (name, video_id))

2465

self.to_screen(render_table(

2466

['Language', 'formats'],

2467

[[lang, ', '.join(f['ext'] for f in reversed(formats))]

2468

for lang, formats in subtitles.items()]))

2469

2470

def urlopen(self, req):

2471

""" Start an HTTP download """

2472

if isinstance(req, compat_basestring):

2473

req = sanitized_Request(req)

2474

return self._opener.open(req, timeout=self._socket_timeout)

2475

2476

def print_debug_header(self):

2477

if not self.params.get('verbose'):

2478

return

2479

2480

if type('') is not compat_str:

2481

# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)

2482

self.report_warning(

2483

'Your Python is broken! Update to a newer and supported version')

2484

2485

stdout_encoding = getattr(

2486

sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)

2487

encoding_str = (

2488

'[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (

2489

locale.getpreferredencoding(),

2490

sys.getfilesystemencoding(),

2491

stdout_encoding,

2492

self.get_encoding()))

2493

write_string(encoding_str, encoding=None)

2494

2495

self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')

2496

if _LAZY_LOADER:

2497

self._write_string('[debug] Lazy loading extractors enabled' + '\n')

2498

try:

2499

sp = subprocess.Popen(

2500

['git', 'rev-parse', '--short', 'HEAD'],

2501

stdout=subprocess.PIPE, stderr=subprocess.PIPE,

2502

cwd=os.path.dirname(os.path.abspath(__file__)))

2503

out, err = sp.communicate()

2504

out = out.decode().strip()

2505

if re.match('[0-9a-f]+', out):

2506

self._write_string('[debug] Git HEAD: ' + out + '\n')

except Exception:

try:

sys.exc_clear()

except Exception:

pass

def python_implementation():

2514

impl_name = platform.python_implementation()

2515

if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):

2516

return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]

2517

return impl_name

2518

2519

self._write_string('[debug] Python version %s (%s) - %s\n' % (

2520

platform.python_version(), python_implementation(),

2521

platform_name()))

2522

2523

exe_versions = FFmpegPostProcessor.get_versions(self)

2524

exe_versions['rtmpdump'] = rtmpdump_version()

2525

exe_versions['phantomjs'] = PhantomJSwrapper._version()

2526

exe_str = ', '.join(

2527

'%s %s' % (exe, v)

2528

for exe, v in sorted(exe_versions.items())

if v

)

if not exe_str:

exe_str = 'none'

self._write_string('[debug] exe versions: %s\n' % exe_str)

2534

2535

proxy_map = {}

2536

for handler in self._opener.handlers:

2537

if hasattr(handler, 'proxies'):

2538

proxy_map.update(handler.proxies)

2539

self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')

2540

2541

if self.params.get('call_home', False):

2542

ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')

2543

self._write_string('[debug] Public IP address: %s\n' % ipaddr)

2544

latest_version = self.urlopen(

2545

'https://yt-dl.org/latest/version').read().decode('utf-8')

2546

if version_tuple(latest_version) > version_tuple(__version__):

2547

self.report_warning(

2548

'You are using an outdated version (newest version: %s)! '

2549

'See https://yt-dl.org/update if you need help updating.' %

2550

latest_version)

2551

2552

def _setup_opener(self):

2553

timeout_val = self.params.get('socket_timeout')

2554

self._socket_timeout = 600 if timeout_val is None else float(timeout_val)

2555

2556

opts_cookiefile = self.params.get('cookiefile')

2557

opts_proxy = self.params.get('proxy')

2558

2559

if opts_cookiefile is None:

2560

self.cookiejar = compat_cookiejar.CookieJar()

2561

else:

2562

opts_cookiefile = expand_path(opts_cookiefile)

2563

self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)

2564

if os.access(opts_cookiefile, os.R_OK):

2565

self.cookiejar.load(ignore_discard=True, ignore_expires=True)

2566

2567

cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)

2568

if opts_proxy is not None:

if opts_proxy == '':

proxies = {}

else:

proxies = {'http': opts_proxy, 'https': opts_proxy}

2573

else:

2574

proxies = compat_urllib_request.getproxies()

2575

# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)

2576

if 'http' in proxies and 'https' not in proxies:

2577

proxies['https'] = proxies['http']

2578

proxy_handler = PerRequestProxyHandler(proxies)

2579

2580

debuglevel = 1 if self.params.get('debug_printtraffic') else 0

2581

https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)

2582

ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)

2583

redirect_handler = YoutubeDLRedirectHandler()

2584

data_handler = compat_urllib_request_DataHandler()

2585

2586

# When passing our own FileHandler instance, build_opener won't add the

2587

# default FileHandler and allows us to disable the file protocol, which

2588

# can be used for malicious purposes (see

2589

# https://github.com/ytdl-org/youtube-dl/issues/8227)

2590

file_handler = compat_urllib_request.FileHandler()

2591

2592

def file_open(*args, **kwargs):

2593

raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')

2594

file_handler.file_open = file_open

2595

2596

opener = compat_urllib_request.build_opener(

2597

proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)

2598

2599

# Delete the default user-agent header, which would otherwise apply in

2600

# cases where our custom HTTP handler doesn't come into play

2601

# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)

2602

opener.addheaders = []

2603

self._opener = opener

2604

2605

def encode(self, s):

2606

if isinstance(s, bytes):

2607

return s # Already encoded

2608

2609

try:

2610

return s.encode(self.get_encoding())

2611

except UnicodeEncodeError as err:

2612

err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'

2613

raise

2614

2615

def get_encoding(self):

2616

encoding = self.params.get('encoding')

2617

if encoding is None:

2618

encoding = preferredencoding()

2619

return encoding

2620

2621

def _write_thumbnails(self, info_dict, filename):

2622

if self.params.get('writethumbnail', False):

2623

thumbnails = info_dict.get('thumbnails')

2624

if thumbnails:

2625

thumbnails = [thumbnails[-1]]

2626

elif self.params.get('write_all_thumbnails', False):

2627

thumbnails = info_dict.get('thumbnails')

else:

return

if not thumbnails:

# No thumbnails present, so return immediately

return

for t in thumbnails:

thumb_ext = determine_ext(t['url'], 'jpg')

2637

suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''

2638

thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''

2639

t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))

2640

2641

if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):

2642

self.to_screen('[%s] %s: Thumbnail %sis already present' %

2643

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2644

else:

2645

self.to_screen('[%s] %s: Downloading thumbnail %s...' %

2646

(info_dict['extractor'], info_dict['id'], thumb_display_id))

2647

try:

2648

uf = self.urlopen(t['url'])

2649

with open(encodeFilename(thumb_filename), 'wb') as thumbf:

2650

shutil.copyfileobj(uf, thumbf)

2651

self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %

2652

(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))

2653

except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:

2654

self.report_warning('Unable to download thumbnail "%s": %s' %

2655

(t['url'], error_to_compat_str(err)))