]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[youtube] PEP 8
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import socket
15da37c7 23import string
8222d8de
JMF
24import sys
25import time
67134eab 26import tokenize
8222d8de 27import traceback
75822ca7 28import random
8222d8de 29
8c25f81b 30from .compat import (
82d8a8b6 31 compat_basestring,
dca08720 32 compat_cookiejar,
003c69a8 33 compat_get_terminal_size,
ce02ed60 34 compat_http_client,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b
PH
43)
44from .utils import (
eedb7ba5
S
45 age_restricted,
46 args_to_str,
ce02ed60
PH
47 ContentTooShortError,
48 date_from_str,
49 DateRange,
acd69589 50 DEFAULT_OUTTMPL,
ce02ed60 51 determine_ext,
b5559424 52 determine_protocol,
ce02ed60 53 DownloadError,
c0384f22 54 encode_compat_str,
ce02ed60 55 encodeFilename,
9b9c5355 56 error_to_compat_str,
590bc6f6 57 expand_path,
ce02ed60 58 ExtractorError,
02dbf93f 59 format_bytes,
525ef922 60 formatSeconds,
773f291d 61 GeoRestrictedError,
c9969434 62 int_or_none,
773f291d 63 ISO3166Utils,
ce02ed60 64 locked_file,
dca08720 65 make_HTTPS_handler,
ce02ed60 66 MaxDownloadsReached,
b7ab0590 67 PagedList,
083c9df9 68 parse_filesize,
91410c9b 69 PerRequestProxyHandler,
dca08720 70 platform_name,
eedb7ba5 71 PostProcessingError,
ce02ed60 72 preferredencoding,
eedb7ba5 73 prepend_extension,
51fb4995 74 register_socks_protocols,
cfb56d1a 75 render_table,
eedb7ba5 76 replace_extension,
ce02ed60
PH
77 SameFileError,
78 sanitize_filename,
1bb5c511 79 sanitize_path,
dcf77cf1 80 sanitize_url,
67dda517 81 sanitized_Request,
e5660ee6 82 std_headers,
ce02ed60 83 subtitles_filename,
ce02ed60 84 UnavailableVideoError,
29eb5174 85 url_basename,
58b1f00d 86 version_tuple,
ce02ed60
PH
87 write_json_file,
88 write_string,
6a3f4c3f 89 YoutubeDLCookieProcessor,
dca08720 90 YoutubeDLHandler,
ce02ed60 91)
a0e07d31 92from .cache import Cache
e0986e31 93from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
3bc2ddcc 94from .downloader import get_suitable_downloader
4c83c967 95from .downloader.rtmp import rtmpdump_version
4f026faf 96from .postprocessor import (
f17f8651 97 FFmpegFixupM3u8PP,
62cd676c 98 FFmpegFixupM4aPP,
6271f1ca 99 FFmpegFixupStretchedPP,
4f026faf
PH
100 FFmpegMergerPP,
101 FFmpegPostProcessor,
102 get_postprocessor,
103)
dca08720 104from .version import __version__
8222d8de 105
e9c0cdd3
YCH
106if compat_os_name == 'nt':
107 import ctypes
108
8222d8de
JMF
109
110class YoutubeDL(object):
111 """YoutubeDL class.
112
113 YoutubeDL objects are the ones responsible of downloading the
114 actual video file and writing it to disk if the user has requested
115 it, among some other tasks. In most cases there should be one per
116 program. As, given a video URL, the downloader doesn't know how to
117 extract all the needed information, task that InfoExtractors do, it
118 has to pass the URL to one of them.
119
120 For this, YoutubeDL objects have a method that allows
121 InfoExtractors to be registered in a given order. When it is passed
122 a URL, the YoutubeDL object handles it to the first InfoExtractor it
123 finds that reports being able to handle it. The InfoExtractor extracts
124 all the information about the video or videos the URL refers to, and
125 YoutubeDL process the extracted information, possibly using a File
126 Downloader to download the video.
127
128 YoutubeDL objects accept a lot of parameters. In order not to saturate
129 the object constructor with arguments, it receives a dictionary of
130 options instead. These options are available through the params
131 attribute for the InfoExtractors to use. The YoutubeDL also
132 registers itself as the downloader in charge for the InfoExtractors
133 that are added to it, so this is a "mutual registration".
134
135 Available options:
136
137 username: Username for authentication purposes.
138 password: Password for authentication purposes.
180940e0 139 videopassword: Password for accessing a video.
1da50aa3
S
140 ap_mso: Adobe Pass multiple-system operator identifier.
141 ap_username: Multiple-system operator account username.
142 ap_password: Multiple-system operator account password.
8222d8de
JMF
143 usenetrc: Use netrc for authentication instead.
144 verbose: Print additional info to stdout.
145 quiet: Do not print messages to stdout.
ad8915b7 146 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
147 forceurl: Force printing final URL.
148 forcetitle: Force printing title.
149 forceid: Force printing ID.
150 forcethumbnail: Force printing thumbnail URL.
151 forcedescription: Force printing description.
152 forcefilename: Force printing final filename.
525ef922 153 forceduration: Force printing duration.
8694c600 154 forcejson: Force printing info_dict as JSON.
63e0be34
PH
155 dump_single_json: Force printing the info_dict of the whole playlist
156 (or video) as a single JSON line.
8222d8de 157 simulate: Do not download the video files.
d8600787 158 format: Video format code. See options.py for more information.
8222d8de
JMF
159 outtmpl: Template for output names.
160 restrictfilenames: Do not allow "&" and spaces in file names
161 ignoreerrors: Do not stop on download errors.
d22dec74 162 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
163 nooverwrites: Prevent overwriting files.
164 playliststart: Playlist item to start at.
165 playlistend: Playlist item to end at.
c14e88f0 166 playlist_items: Specific indices of playlist to download.
ff815fe6 167 playlistreverse: Download playlist items in reverse order.
75822ca7 168 playlistrandom: Download playlist items in random order.
8222d8de
JMF
169 matchtitle: Download only matching titles.
170 rejecttitle: Reject downloads for matching titles.
8bf9319e 171 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
172 logtostderr: Log messages to stderr instead of stdout.
173 writedescription: Write the video description to a .description file
174 writeinfojson: Write the video description to a .info.json file
1fb07d10 175 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 176 writethumbnail: Write the thumbnail image to a file
ec82d85a 177 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 178 writesubtitles: Write the video subtitles to a file
741dd8ea 179 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 180 allsubtitles: Downloads all the subtitles of the video
0b7f3118 181 (requires writesubtitles or writeautomaticsub)
8222d8de 182 listsubtitles: Lists all available subtitles for the video
a504ced0 183 subtitlesformat: The format code for subtitles
aa6a10c4 184 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
185 keepvideo: Keep the video file after post-processing
186 daterange: A DateRange object, download only if the upload_date is in the range.
187 skip_download: Skip the actual download of the video file
c35f9e72 188 cachedir: Location of the cache files in the filesystem.
a0e07d31 189 False to disable filesystem cache.
47192f92 190 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
191 age_limit: An integer representing the user's age in years.
192 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
193 min_views: An integer representing the minimum view count the video
194 must have in order to not be skipped.
195 Videos without view count information are always
196 downloaded. None for no limit.
197 max_views: An integer representing the maximum view count.
198 Videos that are more popular than that are not
199 downloaded.
200 Videos without view count information are always
201 downloaded. None for no limit.
202 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
203 Videos already present in the file are not downloaded
204 again.
dca08720 205 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 206 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
207 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
208 At the moment, this is only supported by YouTube.
a1ee09e8 209 proxy: URL of the proxy server to use
38cce791
YCH
210 geo_verification_proxy: URL of the proxy to use for IP address verification
211 on geo-restricted sites. (Experimental)
e344693b 212 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
213 bidi_workaround: Work around buggy terminals without bidirectional text
214 support, using fridibi
a0ddb8a2 215 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 216 include_ads: Download ads as well
04b4d394
PH
217 default_search: Prepend this string if an input url is not valid.
218 'auto' for elaborate guessing
62fec3b2 219 encoding: Use this encoding instead of the system-specified.
e8ee972c 220 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
221 Pass in 'in_playlist' to only show this behavior for
222 playlist items.
4f026faf 223 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
224 * key: The name of the postprocessor. See
225 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
226 as well as any further keyword arguments for the
227 postprocessor.
71b640cc
PH
228 progress_hooks: A list of functions that get called on download
229 progress, with a dictionary with the entries
5cda4eda 230 * status: One of "downloading", "error", or "finished".
ee69b99a 231 Check this first and ignore unknown values.
71b640cc 232
5cda4eda 233 If status is one of "downloading", or "finished", the
ee69b99a
PH
234 following properties may also be present:
235 * filename: The final filename (always present)
5cda4eda 236 * tmpfilename: The filename we're currently writing to
71b640cc
PH
237 * downloaded_bytes: Bytes on disk
238 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
239 * total_bytes_estimate: Guess of the eventual file size,
240 None if unavailable.
241 * elapsed: The number of seconds since download started.
71b640cc
PH
242 * eta: The estimated time in seconds, None if unknown
243 * speed: The download speed in bytes/second, None if
244 unknown
5cda4eda
PH
245 * fragment_index: The counter of the currently
246 downloaded video fragment.
247 * fragment_count: The number of fragments (= individual
248 files that will be merged)
71b640cc
PH
249
250 Progress hooks are guaranteed to be called at least once
251 (with status "finished") if the download is successful.
45598f15 252 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
253 fixup: Automatically correct known faults of the file.
254 One of:
255 - "never": do nothing
256 - "warn": only emit a warning
257 - "detect_or_warn": check whether we can do anything
62cd676c 258 about it, warn otherwise (default)
be4a824d 259 source_address: (Experimental) Client-side IP address to bind to.
6ec6cb4e 260 call_home: Boolean, true iff we are allowed to contact the
8bfa7545 261 youtube-dl servers for debugging.
7aa589a5
S
262 sleep_interval: Number of seconds to sleep before each download when
263 used alone or a lower bound of a range for randomized
264 sleep before each download (minimum possible number
265 of seconds to sleep) when used along with
266 max_sleep_interval.
267 max_sleep_interval:Upper bound of a range for randomized sleep before each
268 download (maximum possible number of seconds to sleep).
269 Must only be used along with sleep_interval.
270 Actual sleep time will be a random float from range
271 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
272 listformats: Print an overview of available video formats and exit.
273 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
274 match_filter: A function that gets called with the info_dict of
275 every video.
276 If it returns a message, the video is ignored.
277 If it returns None, the video is downloaded.
278 match_filter_func in utils.py is one example for this.
7e5db8c9 279 no_color: Do not emit color codes in output.
0a840f58 280 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
773f291d 281 HTTP header (experimental)
0a840f58 282 geo_bypass_country:
773f291d
S
283 Two-letter ISO 3166-2 country code that will be used for
284 explicit geographic restriction bypassing via faking
285 X-Forwarded-For HTTP header (experimental)
71b640cc 286
85729c51
PH
287 The following options determine which downloader is picked:
288 external_downloader: Executable of the external downloader to call.
289 None or unset for standard (built-in) downloader.
bf09af3a
S
290 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
291 if True, otherwise use ffmpeg/avconv if False, otherwise
292 use downloader suggested by extractor if None.
fe7e0c98 293
8222d8de 294 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 295 the downloader (see youtube_dl/downloader/common.py):
8222d8de 296 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 297 noresizebuffer, retries, continuedl, noprogress, consoletitle,
7d106a65 298 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
76b1bd67
JMF
299
300 The following options are used by the post processors:
301 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
302 otherwise prefer avconv.
f72b0a60
S
303 postprocessor_args: A list of additional command-line arguments for the
304 postprocessor.
8222d8de
JMF
305 """
306
c9969434
S
307 _NUMERIC_FIELDS = set((
308 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
309 'timestamp', 'upload_year', 'upload_month', 'upload_day',
310 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
311 'average_rating', 'comment_count', 'age_limit',
312 'start_time', 'end_time',
313 'chapter_number', 'season_number', 'episode_number',
314 'track_number', 'disc_number', 'release_year',
315 'playlist_index',
316 ))
317
8222d8de
JMF
318 params = None
319 _ies = []
320 _pps = []
321 _download_retcode = None
322 _num_downloads = None
323 _screen_file = None
324
3511266b 325 def __init__(self, params=None, auto_init=True):
8222d8de 326 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
327 if params is None:
328 params = {}
8222d8de 329 self._ies = []
56c73665 330 self._ies_instances = {}
8222d8de 331 self._pps = []
933605d7 332 self._progress_hooks = []
8222d8de
JMF
333 self._download_retcode = 0
334 self._num_downloads = 0
335 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 336 self._err_file = sys.stderr
4abf617b
S
337 self.params = {
338 # Default parameters
339 'nocheckcertificate': False,
340 }
341 self.params.update(params)
a0e07d31 342 self.cache = Cache(self)
34308b30 343
be5df5ee
S
344 def check_deprecated(param, option, suggestion):
345 if self.params.get(param) is not None:
346 self.report_warning(
347 '%s is deprecated. Use %s instead.' % (option, suggestion))
348 return True
349 return False
350
351 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
352 if self.params.get('geo_verification_proxy') is None:
353 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
354
be5df5ee
S
355 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
356 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
357 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
358
0783b09b 359 if params.get('bidi_workaround', False):
1c088fa8
PH
360 try:
361 import pty
362 master, slave = pty.openpty()
003c69a8 363 width = compat_get_terminal_size().columns
1c088fa8
PH
364 if width is None:
365 width_args = []
366 else:
367 width_args = ['-w', str(width)]
5d681e96 368 sp_kwargs = dict(
1c088fa8
PH
369 stdin=subprocess.PIPE,
370 stdout=slave,
371 stderr=self._err_file)
5d681e96
PH
372 try:
373 self._output_process = subprocess.Popen(
374 ['bidiv'] + width_args, **sp_kwargs
375 )
376 except OSError:
5d681e96
PH
377 self._output_process = subprocess.Popen(
378 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
379 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 380 except OSError as ose:
66e7ace1 381 if ose.errno == errno.ENOENT:
6febd1c1 382 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
383 else:
384 raise
0783b09b 385
e9137224 386 if (sys.platform != 'win32' and
8fb3ac36
PH
387 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
388 not params.get('restrictfilenames', False)):
e9137224 389 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 390 self.report_warning(
6febd1c1 391 'Assuming --restrict-filenames since file system encoding '
1b725173 392 'cannot encode all characters. '
6febd1c1 393 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 394 self.params['restrictfilenames'] = True
34308b30 395
486dd09e
PH
396 if isinstance(params.get('outtmpl'), bytes):
397 self.report_warning(
398 'Parameter outtmpl is bytes, but should be a unicode string. '
399 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
400
dca08720
PH
401 self._setup_opener()
402
3511266b
PH
403 if auto_init:
404 self.print_debug_header()
405 self.add_default_info_extractors()
406
4f026faf
PH
407 for pp_def_raw in self.params.get('postprocessors', []):
408 pp_class = get_postprocessor(pp_def_raw['key'])
409 pp_def = dict(pp_def_raw)
410 del pp_def['key']
411 pp = pp_class(self, **compat_kwargs(pp_def))
412 self.add_post_processor(pp)
413
71b640cc
PH
414 for ph in self.params.get('progress_hooks', []):
415 self.add_progress_hook(ph)
416
51fb4995
YCH
417 register_socks_protocols()
418
7d4111ed
PH
419 def warn_if_short_id(self, argv):
420 # short YouTube ID starting with dash?
421 idxs = [
422 i for i, a in enumerate(argv)
423 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
424 if idxs:
425 correct_argv = (
426 ['youtube-dl'] +
427 [a for i, a in enumerate(argv) if i not in idxs] +
428 ['--'] + [argv[i] for i in idxs]
429 )
430 self.report_warning(
431 'Long argument string detected. '
432 'Use -- to separate parameters and URLs, like this:\n%s\n' %
433 args_to_str(correct_argv))
434
8222d8de
JMF
435 def add_info_extractor(self, ie):
436 """Add an InfoExtractor object to the end of the list."""
437 self._ies.append(ie)
e52d7f85
JMF
438 if not isinstance(ie, type):
439 self._ies_instances[ie.ie_key()] = ie
440 ie.set_downloader(self)
8222d8de 441
56c73665
JMF
442 def get_info_extractor(self, ie_key):
443 """
444 Get an instance of an IE with name ie_key, it will try to get one from
445 the _ies list, if there's no instance it will create a new one and add
446 it to the extractor list.
447 """
448 ie = self._ies_instances.get(ie_key)
449 if ie is None:
450 ie = get_info_extractor(ie_key)()
451 self.add_info_extractor(ie)
452 return ie
453
023fa8c4
JMF
454 def add_default_info_extractors(self):
455 """
456 Add the InfoExtractors returned by gen_extractors to the end of the list
457 """
e52d7f85 458 for ie in gen_extractor_classes():
023fa8c4
JMF
459 self.add_info_extractor(ie)
460
8222d8de
JMF
461 def add_post_processor(self, pp):
462 """Add a PostProcessor object to the end of the chain."""
463 self._pps.append(pp)
464 pp.set_downloader(self)
465
933605d7
JMF
466 def add_progress_hook(self, ph):
467 """Add the progress hook (currently only for the file downloader)"""
468 self._progress_hooks.append(ph)
8ab470f1 469
1c088fa8 470 def _bidi_workaround(self, message):
5d681e96 471 if not hasattr(self, '_output_channel'):
1c088fa8
PH
472 return message
473
5d681e96 474 assert hasattr(self, '_output_process')
11b85ce6 475 assert isinstance(message, compat_str)
6febd1c1
PH
476 line_count = message.count('\n') + 1
477 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 478 self._output_process.stdin.flush()
6febd1c1 479 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 480 for _ in range(line_count))
6febd1c1 481 return res[:-len('\n')]
1c088fa8 482
8222d8de 483 def to_screen(self, message, skip_eol=False):
0783b09b
PH
484 """Print message to stdout if not in quiet mode."""
485 return self.to_stdout(message, skip_eol, check_quiet=True)
486
734f90bb 487 def _write_string(self, s, out=None):
b58ddb32 488 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 489
0783b09b 490 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 491 """Print message to stdout if not in quiet mode."""
8bf9319e 492 if self.params.get('logger'):
43afe285 493 self.params['logger'].debug(message)
0783b09b 494 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 495 message = self._bidi_workaround(message)
6febd1c1 496 terminator = ['\n', ''][skip_eol]
8222d8de 497 output = message + terminator
1c088fa8 498
734f90bb 499 self._write_string(output, self._screen_file)
8222d8de
JMF
500
501 def to_stderr(self, message):
502 """Print message to stderr."""
11b85ce6 503 assert isinstance(message, compat_str)
8bf9319e 504 if self.params.get('logger'):
43afe285
IB
505 self.params['logger'].error(message)
506 else:
1c088fa8 507 message = self._bidi_workaround(message)
6febd1c1 508 output = message + '\n'
734f90bb 509 self._write_string(output, self._err_file)
8222d8de 510
1e5b9a95
PH
511 def to_console_title(self, message):
512 if not self.params.get('consoletitle', False):
513 return
4bede0d8
C
514 if compat_os_name == 'nt':
515 if ctypes.windll.kernel32.GetConsoleWindow():
516 # c_wchar_p() might not be necessary if `message` is
517 # already of type unicode()
518 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 519 elif 'TERM' in os.environ:
734f90bb 520 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 521
bdde425c
PH
522 def save_console_title(self):
523 if not self.params.get('consoletitle', False):
524 return
4bede0d8 525 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 526 # Save the title on stack
734f90bb 527 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
528
529 def restore_console_title(self):
530 if not self.params.get('consoletitle', False):
531 return
4bede0d8 532 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 533 # Restore the title from stack
734f90bb 534 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
535
536 def __enter__(self):
537 self.save_console_title()
538 return self
539
540 def __exit__(self, *args):
541 self.restore_console_title()
f89197d7 542
dca08720
PH
543 if self.params.get('cookiefile') is not None:
544 self.cookiejar.save()
bdde425c 545
8222d8de
JMF
546 def trouble(self, message=None, tb=None):
547 """Determine action to take when a download problem appears.
548
549 Depending on if the downloader has been configured to ignore
550 download errors or not, this method may throw an exception or
551 not when errors are found, after printing the message.
552
553 tb, if given, is additional traceback information.
554 """
555 if message is not None:
556 self.to_stderr(message)
557 if self.params.get('verbose'):
558 if tb is None:
559 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 560 tb = ''
8222d8de 561 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 562 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 563 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
564 else:
565 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 566 tb = ''.join(tb_data)
8222d8de
JMF
567 self.to_stderr(tb)
568 if not self.params.get('ignoreerrors', False):
569 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
570 exc_info = sys.exc_info()[1].exc_info
571 else:
572 exc_info = sys.exc_info()
573 raise DownloadError(message, exc_info)
574 self._download_retcode = 1
575
576 def report_warning(self, message):
577 '''
578 Print the message to stderr, it will be prefixed with 'WARNING:'
579 If stderr is a tty file the 'WARNING:' will be colored
580 '''
6d07ce01
JMF
581 if self.params.get('logger') is not None:
582 self.params['logger'].warning(message)
8222d8de 583 else:
ad8915b7
PH
584 if self.params.get('no_warnings'):
585 return
e9c0cdd3 586 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
587 _msg_header = '\033[0;33mWARNING:\033[0m'
588 else:
589 _msg_header = 'WARNING:'
590 warning_message = '%s %s' % (_msg_header, message)
591 self.to_stderr(warning_message)
8222d8de
JMF
592
593 def report_error(self, message, tb=None):
594 '''
595 Do the same as trouble, but prefixes the message with 'ERROR:', colored
596 in red if stderr is a tty file.
597 '''
e9c0cdd3 598 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 599 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 600 else:
6febd1c1
PH
601 _msg_header = 'ERROR:'
602 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
603 self.trouble(error_message, tb)
604
8222d8de
JMF
605 def report_file_already_downloaded(self, file_name):
606 """Report file has already been fully downloaded."""
607 try:
6febd1c1 608 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 609 except UnicodeEncodeError:
6febd1c1 610 self.to_screen('[download] The file has already been downloaded')
8222d8de 611
8222d8de
JMF
612 def prepare_filename(self, info_dict):
613 """Generate the output filename."""
614 try:
615 template_dict = dict(info_dict)
616
617 template_dict['epoch'] = int(time.time())
618 autonumber_size = self.params.get('autonumber_size')
619 if autonumber_size is None:
620 autonumber_size = 5
89db639d 621 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
622 if template_dict.get('resolution') is None:
623 if template_dict.get('width') and template_dict.get('height'):
624 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
625 elif template_dict.get('height'):
805ef3c6 626 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 627 elif template_dict.get('width'):
51ce9117 628 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 629
586a91b6 630 sanitize = lambda k, v: sanitize_filename(
45598aab 631 compat_str(v),
1bb5c511 632 restricted=self.params.get('restrictfilenames'),
40df485f 633 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 634 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 635 for k, v in template_dict.items()
f0e14fdd 636 if v is not None and not isinstance(v, (list, tuple, dict)))
6febd1c1 637 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 638
b3613d36 639 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 640
89db639d
S
641 # For fields playlist_index and autonumber convert all occurrences
642 # of %(field)s to %(field)0Nd for backward compatibility
643 field_size_compat_map = {
644 'playlist_index': len(str(template_dict['n_entries'])),
645 'autonumber': autonumber_size,
646 }
647 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
648 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
649 if mobj:
650 outtmpl = re.sub(
651 FIELD_SIZE_COMPAT_RE,
652 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
653 outtmpl)
654
d0d9ade4
S
655 # Missing numeric fields used together with integer presentation types
656 # in format specification will break the argument substitution since
657 # string 'NA' is returned for missing fields. We will patch output
658 # template for missing fields to meet string presentation type.
c9969434 659 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
660 if numeric_field not in template_dict:
661 # As of [1] format syntax is:
662 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
663 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
664 FORMAT_RE = r'''(?x)
665 (?<!%)
666 %
667 \({0}\) # mapping key
668 (?:[#0\-+ ]+)? # conversion flags (optional)
669 (?:\d+)? # minimum field width (optional)
670 (?:\.\d+)? # precision (optional)
671 [hlL]? # length modifier (optional)
672 [diouxXeEfFgGcrs%] # conversion type
673 '''
674 outtmpl = re.sub(
675 FORMAT_RE.format(numeric_field),
676 r'%({0})s'.format(numeric_field), outtmpl)
677
15da37c7
S
678 # expand_path translates '%%' into '%' and '$$' into '$'
679 # correspondingly that is not what we want since we need to keep
680 # '%%' intact for template dict substitution step. Working around
681 # with boundary-alike separator hack.
682 sep = ''.join([random.choice(string.ascii_letters) for _ in range(32)])
683 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
684
685 # outtmpl should be expand_path'ed before template dict substitution
686 # because meta fields may contain env variables we don't want to
687 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
688 # title "Hello $PATH", we don't want `$PATH` to be expanded.
689 filename = expand_path(outtmpl).replace(sep, '') % template_dict
690
3a0d2f52
S
691 # Temporary fix for #4787
692 # 'Treat' all problem characters by passing filename through preferredencoding
693 # to workaround encoding issues with subprocess on python2 @ Windows
694 if sys.version_info < (3, 0) and sys.platform == 'win32':
695 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 696 return sanitize_path(filename)
8222d8de 697 except ValueError as err:
6febd1c1 698 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
699 return None
700
442c37b7 701 def _match_entry(self, info_dict, incomplete):
6ec6cb4e 702 """ Returns None iff the file should be downloaded """
8222d8de 703
6febd1c1 704 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
705 if 'title' in info_dict:
706 # This can happen when we're just evaluating the playlist
707 title = info_dict['title']
708 matchtitle = self.params.get('matchtitle', False)
709 if matchtitle:
710 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 711 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
712 rejecttitle = self.params.get('rejecttitle', False)
713 if rejecttitle:
714 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 715 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 716 date = info_dict.get('upload_date')
8222d8de
JMF
717 if date is not None:
718 dateRange = self.params.get('daterange', DateRange())
719 if date not in dateRange:
6febd1c1 720 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 721 view_count = info_dict.get('view_count')
5fe18bdb
PH
722 if view_count is not None:
723 min_views = self.params.get('min_views')
724 if min_views is not None and view_count < min_views:
6febd1c1 725 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
726 max_views = self.params.get('max_views')
727 if max_views is not None and view_count > max_views:
6febd1c1 728 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 729 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 730 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 731 if self.in_download_archive(info_dict):
6febd1c1 732 return '%s has already been recorded in archive' % video_title
347de493 733
442c37b7
PH
734 if not incomplete:
735 match_filter = self.params.get('match_filter')
736 if match_filter is not None:
737 ret = match_filter(info_dict)
738 if ret is not None:
739 return ret
347de493 740
8222d8de 741 return None
fe7e0c98 742
b6c45014
JMF
743 @staticmethod
744 def add_extra_info(info_dict, extra_info):
745 '''Set the keys from extra_info in info dict if they are missing'''
746 for key, value in extra_info.items():
747 info_dict.setdefault(key, value)
748
7fc3fa05 749 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 750 process=True, force_generic_extractor=False):
8222d8de
JMF
751 '''
752 Returns a list with a dictionary for each video we find.
753 If 'download', also downloads the videos.
754 extra_info is a dict containing the extra values to add to each result
613b2d9d 755 '''
fe7e0c98 756
61aa5ba3 757 if not ie_key and force_generic_extractor:
d22dec74
S
758 ie_key = 'Generic'
759
8222d8de 760 if ie_key:
56c73665 761 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
762 else:
763 ies = self._ies
764
765 for ie in ies:
766 if not ie.suitable(url):
767 continue
768
e52d7f85 769 ie = self.get_info_extractor(ie.ie_key())
8222d8de 770 if not ie.working():
6febd1c1
PH
771 self.report_warning('The program functionality for this site has been marked as broken, '
772 'and will probably not work.')
8222d8de
JMF
773
774 try:
775 ie_result = ie.extract(url)
5f6a1245 776 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
777 break
778 if isinstance(ie_result, list):
779 # Backwards compatibility: old IE result format
8222d8de
JMF
780 ie_result = {
781 '_type': 'compat_list',
782 'entries': ie_result,
783 }
ea38e55f 784 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
785 if process:
786 return self.process_ie_result(ie_result, download, extra_info)
787 else:
788 return ie_result
773f291d
S
789 except GeoRestrictedError as e:
790 msg = e.msg
791 if e.countries:
792 msg += '\nThis video is available in %s.' % ', '.join(
793 map(ISO3166Utils.short2full, e.countries))
794 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
795 self.report_error(msg)
796 break
fb043a6e 797 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 798 self.report_error(compat_str(e), e.format_traceback())
8222d8de 799 break
d3e5bbf4
PH
800 except MaxDownloadsReached:
801 raise
8222d8de
JMF
802 except Exception as e:
803 if self.params.get('ignoreerrors', False):
9b9c5355 804 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
805 break
806 else:
807 raise
808 else:
1a489545 809 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 810
ea38e55f
PH
811 def add_default_extra_info(self, ie_result, ie, url):
812 self.add_extra_info(ie_result, {
813 'extractor': ie.IE_NAME,
814 'webpage_url': url,
815 'webpage_url_basename': url_basename(url),
816 'extractor_key': ie.ie_key(),
817 })
818
8222d8de
JMF
819 def process_ie_result(self, ie_result, download=True, extra_info={}):
820 """
821 Take the result of the ie(may be modified) and resolve all unresolved
822 references (URLs, playlist items).
823
824 It will also download the videos if 'download'.
825 Returns the resolved ie_result.
826 """
e8ee972c
PH
827 result_type = ie_result.get('_type', 'video')
828
057a5206 829 if result_type in ('url', 'url_transparent'):
134c6ea8 830 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206
PH
831 extract_flat = self.params.get('extract_flat', False)
832 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
833 extract_flat is True):
057a5206
PH
834 if self.params.get('forcejson', False):
835 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
836 return ie_result
837
8222d8de 838 if result_type == 'video':
b6c45014 839 self.add_extra_info(ie_result, extra_info)
feee2ecf 840 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
841 elif result_type == 'url':
842 # We have to add extra_info to the results because it may be
843 # contained in a playlist
844 return self.extract_info(ie_result['url'],
845 download,
846 ie_key=ie_result.get('ie_key'),
847 extra_info=extra_info)
7fc3fa05
PH
848 elif result_type == 'url_transparent':
849 # Use the information from the embedding page
850 info = self.extract_info(
851 ie_result['url'], ie_key=ie_result.get('ie_key'),
852 extra_info=extra_info, download=False, process=False)
853
1640eb09
S
854 # extract_info may return None when ignoreerrors is enabled and
855 # extraction failed with an error, don't crash and return early
856 # in this case
857 if not info:
858 return info
859
412c617d
PH
860 force_properties = dict(
861 (k, v) for k, v in ie_result.items() if v is not None)
b286f201 862 for f in ('_type', 'url', 'ie_key'):
412c617d
PH
863 if f in force_properties:
864 del force_properties[f]
865 new_result = info.copy()
866 new_result.update(force_properties)
7fc3fa05 867
0563f7ac
S
868 # Extracted info may not be a video result (i.e.
869 # info.get('_type', 'video') != video) but rather an url or
870 # url_transparent. In such cases outer metadata (from ie_result)
871 # should be propagated to inner one (info). For this to happen
872 # _type of info should be overridden with url_transparent. This
873 # fixes issue from https://github.com/rg3/youtube-dl/pull/11163.
874 if new_result.get('_type') == 'url':
875 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
876
877 return self.process_ie_result(
878 new_result, download=download, extra_info=extra_info)
40fcba5e 879 elif result_type in ('playlist', 'multi_video'):
8222d8de 880 # We process each entry in the playlist
d800609c 881 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 882 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
883
884 playlist_results = []
885
8222d8de 886 playliststart = self.params.get('playliststart', 1) - 1
d800609c 887 playlistend = self.params.get('playlistend')
a19fd00c 888 # For backwards compatibility, interpret -1 as whole list
8222d8de 889 if playlistend == -1:
a19fd00c 890 playlistend = None
8222d8de 891
d800609c 892 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
893 playlistitems = None
894 if playlistitems_str is not None:
895 def iter_playlistitems(format):
896 for string_segment in format.split(','):
897 if '-' in string_segment:
898 start, end = string_segment.split('-')
899 for item in range(int(start), int(end) + 1):
900 yield int(item)
901 else:
902 yield int(string_segment)
903 playlistitems = iter_playlistitems(playlistitems_str)
904
b82f815f
PH
905 ie_entries = ie_result['entries']
906 if isinstance(ie_entries, list):
907 n_all_entries = len(ie_entries)
c14e88f0 908 if playlistitems:
3884dcf3
JMF
909 entries = [
910 ie_entries[i - 1] for i in playlistitems
911 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
912 else:
913 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
914 n_entries = len(entries)
915 self.to_screen(
611c1dd9 916 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 917 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 918 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
919 if playlistitems:
920 entries = []
921 for item in playlistitems:
922 entries.extend(ie_entries.getslice(
923 item - 1, item
924 ))
925 else:
926 entries = ie_entries.getslice(
927 playliststart, playlistend)
b7ab0590
PH
928 n_entries = len(entries)
929 self.to_screen(
611c1dd9 930 '[%s] playlist %s: Downloading %d videos' %
b7ab0590 931 (ie_result['extractor'], playlist, n_entries))
b82f815f 932 else: # iterable
c14e88f0
PH
933 if playlistitems:
934 entry_list = list(ie_entries)
935 entries = [entry_list[i - 1] for i in playlistitems]
936 else:
937 entries = list(itertools.islice(
938 ie_entries, playliststart, playlistend))
b82f815f
PH
939 n_entries = len(entries)
940 self.to_screen(
611c1dd9 941 '[%s] playlist %s: Downloading %d videos' %
b82f815f 942 (ie_result['extractor'], playlist, n_entries))
8222d8de 943
ff815fe6
MS
944 if self.params.get('playlistreverse', False):
945 entries = entries[::-1]
946
75822ca7
TC
947 if self.params.get('playlistrandom', False):
948 random.shuffle(entries)
949
0016b84e
S
950 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
951
fe7e0c98 952 for i, entry in enumerate(entries, 1):
734ea11e 953 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
0016b84e
S
954 # This __x_forwarded_for_ip thing is a bit ugly but requires
955 # minimal changes
956 if x_forwarded_for:
957 entry['__x_forwarded_for_ip'] = x_forwarded_for
8222d8de 958 extra = {
c6b4132a 959 'n_entries': n_entries,
fe7e0c98 960 'playlist': playlist,
a1cf99d0
PH
961 'playlist_id': ie_result.get('id'),
962 'playlist_title': ie_result.get('title'),
fe7e0c98 963 'playlist_index': i + playliststart,
b6c45014 964 'extractor': ie_result['extractor'],
9103bbc5 965 'webpage_url': ie_result['webpage_url'],
29eb5174 966 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 967 'extractor_key': ie_result['extractor_key'],
fe7e0c98 968 }
7012b23c 969
442c37b7 970 reason = self._match_entry(entry, incomplete=True)
7012b23c 971 if reason is not None:
6febd1c1 972 self.to_screen('[download] ' + reason)
7012b23c
PH
973 continue
974
8222d8de
JMF
975 entry_result = self.process_ie_result(entry,
976 download=download,
977 extra_info=extra)
978 playlist_results.append(entry_result)
979 ie_result['entries'] = playlist_results
371c3b79 980 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
981 return ie_result
982 elif result_type == 'compat_list':
c9bf4114
PH
983 self.report_warning(
984 'Extractor %s returned a compat_list result. '
985 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 986
8222d8de 987 def _fixup(r):
9e1a5b84
JW
988 self.add_extra_info(
989 r,
9103bbc5
JMF
990 {
991 'extractor': ie_result['extractor'],
992 'webpage_url': ie_result['webpage_url'],
29eb5174 993 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 994 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
995 }
996 )
8222d8de
JMF
997 return r
998 ie_result['entries'] = [
b6c45014 999 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1000 for r in ie_result['entries']
1001 ]
1002 return ie_result
1003 else:
1004 raise Exception('Invalid result type: %s' % result_type)
1005
67134eab
JMF
1006 def _build_format_filter(self, filter_spec):
1007 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1008
1009 OPERATORS = {
1010 '<': operator.lt,
1011 '<=': operator.le,
1012 '>': operator.gt,
1013 '>=': operator.ge,
1014 '=': operator.eq,
1015 '!=': operator.ne,
1016 }
67134eab 1017 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 1018 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
1019 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1020 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1021 $
083c9df9 1022 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1023 m = operator_rex.search(filter_spec)
9ddb6925
S
1024 if m:
1025 try:
1026 comparison_value = int(m.group('value'))
1027 except ValueError:
1028 comparison_value = parse_filesize(m.group('value'))
1029 if comparison_value is None:
1030 comparison_value = parse_filesize(m.group('value') + 'B')
1031 if comparison_value is None:
1032 raise ValueError(
1033 'Invalid value %r in format specification %r' % (
67134eab 1034 m.group('value'), filter_spec))
9ddb6925
S
1035 op = OPERATORS[m.group('op')]
1036
083c9df9 1037 if not m:
9ddb6925
S
1038 STR_OPERATORS = {
1039 '=': operator.eq,
1040 '!=': operator.ne,
10d33b34
YCH
1041 '^=': lambda attr, value: attr.startswith(value),
1042 '$=': lambda attr, value: attr.endswith(value),
1043 '*=': lambda attr, value: value in attr,
9ddb6925 1044 }
67134eab 1045 str_operator_rex = re.compile(r'''(?x)
d5aacf9a 1046 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
9ddb6925 1047 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1048 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1049 \s*$
9ddb6925 1050 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1051 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1052 if m:
1053 comparison_value = m.group('value')
1054 op = STR_OPERATORS[m.group('op')]
083c9df9 1055
9ddb6925 1056 if not m:
67134eab 1057 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1058
1059 def _filter(f):
1060 actual_value = f.get(m.group('key'))
1061 if actual_value is None:
1062 return m.group('none_inclusive')
1063 return op(actual_value, comparison_value)
67134eab
JMF
1064 return _filter
1065
1066 def build_format_selector(self, format_spec):
1067 def syntax_error(note, start):
1068 message = (
1069 'Invalid format specification: '
1070 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1071 return SyntaxError(message)
1072
1073 PICKFIRST = 'PICKFIRST'
1074 MERGE = 'MERGE'
1075 SINGLE = 'SINGLE'
0130afb7 1076 GROUP = 'GROUP'
67134eab
JMF
1077 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1078
1079 def _parse_filter(tokens):
1080 filter_parts = []
1081 for type, string, start, _, _ in tokens:
1082 if type == tokenize.OP and string == ']':
1083 return ''.join(filter_parts)
1084 else:
1085 filter_parts.append(string)
1086
232541df 1087 def _remove_unused_ops(tokens):
17cc1534 1088 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1089 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1090 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1091 last_string, last_start, last_end, last_line = None, None, None, None
1092 for type, string, start, end, line in tokens:
1093 if type == tokenize.OP and string == '[':
1094 if last_string:
1095 yield tokenize.NAME, last_string, last_start, last_end, last_line
1096 last_string = None
1097 yield type, string, start, end, line
1098 # everything inside brackets will be handled by _parse_filter
1099 for type, string, start, end, line in tokens:
1100 yield type, string, start, end, line
1101 if type == tokenize.OP and string == ']':
1102 break
1103 elif type == tokenize.OP and string in ALLOWED_OPS:
1104 if last_string:
1105 yield tokenize.NAME, last_string, last_start, last_end, last_line
1106 last_string = None
1107 yield type, string, start, end, line
1108 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1109 if not last_string:
1110 last_string = string
1111 last_start = start
1112 last_end = end
1113 else:
1114 last_string += string
1115 if last_string:
1116 yield tokenize.NAME, last_string, last_start, last_end, last_line
1117
cf2ac6df 1118 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1119 selectors = []
1120 current_selector = None
1121 for type, string, start, _, _ in tokens:
1122 # ENCODING is only defined in python 3.x
1123 if type == getattr(tokenize, 'ENCODING', None):
1124 continue
1125 elif type in [tokenize.NAME, tokenize.NUMBER]:
1126 current_selector = FormatSelector(SINGLE, string, [])
1127 elif type == tokenize.OP:
cf2ac6df
JMF
1128 if string == ')':
1129 if not inside_group:
1130 # ')' will be handled by the parentheses group
1131 tokens.restore_last_token()
67134eab 1132 break
cf2ac6df 1133 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1134 tokens.restore_last_token()
1135 break
cf2ac6df
JMF
1136 elif inside_choice and string == ',':
1137 tokens.restore_last_token()
1138 break
1139 elif string == ',':
0a31a350
JMF
1140 if not current_selector:
1141 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1142 selectors.append(current_selector)
1143 current_selector = None
1144 elif string == '/':
d96d604e
JMF
1145 if not current_selector:
1146 raise syntax_error('"/" must follow a format selector', start)
67134eab 1147 first_choice = current_selector
cf2ac6df 1148 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1149 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1150 elif string == '[':
1151 if not current_selector:
1152 current_selector = FormatSelector(SINGLE, 'best', [])
1153 format_filter = _parse_filter(tokens)
1154 current_selector.filters.append(format_filter)
0130afb7
JMF
1155 elif string == '(':
1156 if current_selector:
1157 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1158 group = _parse_format_selection(tokens, inside_group=True)
1159 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1160 elif string == '+':
1161 video_selector = current_selector
cf2ac6df 1162 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1163 if not video_selector or not audio_selector:
1164 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1165 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1166 else:
1167 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1168 elif type == tokenize.ENDMARKER:
1169 break
1170 if current_selector:
1171 selectors.append(current_selector)
1172 return selectors
1173
1174 def _build_selector_function(selector):
1175 if isinstance(selector, list):
1176 fs = [_build_selector_function(s) for s in selector]
1177
317f7ab6 1178 def selector_function(ctx):
67134eab 1179 for f in fs:
317f7ab6 1180 for format in f(ctx):
67134eab
JMF
1181 yield format
1182 return selector_function
0130afb7
JMF
1183 elif selector.type == GROUP:
1184 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1185 elif selector.type == PICKFIRST:
1186 fs = [_build_selector_function(s) for s in selector.selector]
1187
317f7ab6 1188 def selector_function(ctx):
67134eab 1189 for f in fs:
317f7ab6 1190 picked_formats = list(f(ctx))
67134eab
JMF
1191 if picked_formats:
1192 return picked_formats
1193 return []
1194 elif selector.type == SINGLE:
1195 format_spec = selector.selector
1196
317f7ab6
S
1197 def selector_function(ctx):
1198 formats = list(ctx['formats'])
bb8e5536
JMF
1199 if not formats:
1200 return
5acfa126
JMF
1201 if format_spec == 'all':
1202 for f in formats:
1203 yield f
1204 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1205 format_idx = 0 if format_spec == 'worst' else -1
1206 audiovideo_formats = [
1207 f for f in formats
1208 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1209 if audiovideo_formats:
1210 yield audiovideo_formats[format_idx]
317f7ab6
S
1211 # for extractors with incomplete formats (audio only (soundcloud)
1212 # or video only (imgur)) we will fallback to best/worst
1213 # {video,audio}-only format
1214 elif ctx['incomplete_formats']:
67134eab
JMF
1215 yield formats[format_idx]
1216 elif format_spec == 'bestaudio':
1217 audio_formats = [
1218 f for f in formats
1219 if f.get('vcodec') == 'none']
1220 if audio_formats:
1221 yield audio_formats[-1]
1222 elif format_spec == 'worstaudio':
1223 audio_formats = [
1224 f for f in formats
1225 if f.get('vcodec') == 'none']
1226 if audio_formats:
1227 yield audio_formats[0]
1228 elif format_spec == 'bestvideo':
1229 video_formats = [
1230 f for f in formats
1231 if f.get('acodec') == 'none']
1232 if video_formats:
1233 yield video_formats[-1]
1234 elif format_spec == 'worstvideo':
1235 video_formats = [
1236 f for f in formats
1237 if f.get('acodec') == 'none']
1238 if video_formats:
1239 yield video_formats[0]
1240 else:
1241 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1242 if format_spec in extensions:
1243 filter_f = lambda f: f['ext'] == format_spec
1244 else:
1245 filter_f = lambda f: f['format_id'] == format_spec
1246 matches = list(filter(filter_f, formats))
1247 if matches:
1248 yield matches[-1]
1249 elif selector.type == MERGE:
1250 def _merge(formats_info):
1251 format_1, format_2 = [f['format_id'] for f in formats_info]
1252 # The first format must contain the video and the
1253 # second the audio
1254 if formats_info[0].get('vcodec') == 'none':
1255 self.report_error('The first format must '
1256 'contain the video, try using '
1257 '"-f %s+%s"' % (format_2, format_1))
1258 return
3d24bbfb
S
1259 # Formats must be opposite (video+audio)
1260 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1261 self.report_error(
1262 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1263 % (format_1, format_2))
1264 return
67134eab
JMF
1265 output_ext = (
1266 formats_info[0]['ext']
1267 if self.params.get('merge_output_format') is None
1268 else self.params['merge_output_format'])
1269 return {
1270 'requested_formats': formats_info,
1271 'format': '%s+%s' % (formats_info[0].get('format'),
1272 formats_info[1].get('format')),
1273 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1274 formats_info[1].get('format_id')),
1275 'width': formats_info[0].get('width'),
1276 'height': formats_info[0].get('height'),
1277 'resolution': formats_info[0].get('resolution'),
1278 'fps': formats_info[0].get('fps'),
1279 'vcodec': formats_info[0].get('vcodec'),
1280 'vbr': formats_info[0].get('vbr'),
1281 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1282 'acodec': formats_info[1].get('acodec'),
1283 'abr': formats_info[1].get('abr'),
1284 'ext': output_ext,
1285 }
1286 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1287
317f7ab6
S
1288 def selector_function(ctx):
1289 for pair in itertools.product(
1290 video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
67134eab 1291 yield _merge(pair)
083c9df9 1292
67134eab 1293 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1294
317f7ab6
S
1295 def final_selector(ctx):
1296 ctx_copy = copy.deepcopy(ctx)
67134eab 1297 for _filter in filters:
317f7ab6
S
1298 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1299 return selector_function(ctx_copy)
67134eab 1300 return final_selector
083c9df9 1301
67134eab 1302 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1303 try:
232541df 1304 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1305 except tokenize.TokenError:
1306 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1307
1308 class TokenIterator(object):
1309 def __init__(self, tokens):
1310 self.tokens = tokens
1311 self.counter = 0
1312
1313 def __iter__(self):
1314 return self
1315
1316 def __next__(self):
1317 if self.counter >= len(self.tokens):
1318 raise StopIteration()
1319 value = self.tokens[self.counter]
1320 self.counter += 1
1321 return value
1322
1323 next = __next__
1324
1325 def restore_last_token(self):
1326 self.counter -= 1
1327
1328 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1329 return _build_selector_function(parsed_selector)
a9c58ad9 1330
e5660ee6
JMF
1331 def _calc_headers(self, info_dict):
1332 res = std_headers.copy()
1333
1334 add_headers = info_dict.get('http_headers')
1335 if add_headers:
1336 res.update(add_headers)
1337
1338 cookies = self._calc_cookies(info_dict)
1339 if cookies:
1340 res['Cookie'] = cookies
1341
0016b84e
S
1342 if 'X-Forwarded-For' not in res:
1343 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1344 if x_forwarded_for_ip:
1345 res['X-Forwarded-For'] = x_forwarded_for_ip
1346
e5660ee6
JMF
1347 return res
1348
1349 def _calc_cookies(self, info_dict):
5c2266df 1350 pr = sanitized_Request(info_dict['url'])
e5660ee6 1351 self.cookiejar.add_cookie_header(pr)
662435f7 1352 return pr.get_header('Cookie')
e5660ee6 1353
dd82ffea
JMF
1354 def process_video_result(self, info_dict, download=True):
1355 assert info_dict.get('_type', 'video') == 'video'
1356
bec1fad2
PH
1357 if 'id' not in info_dict:
1358 raise ExtractorError('Missing "id" field in extractor result')
1359 if 'title' not in info_dict:
1360 raise ExtractorError('Missing "title" field in extractor result')
1361
c9969434
S
1362 def report_force_conversion(field, field_not, conversion):
1363 self.report_warning(
1364 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1365 % (field, field_not, conversion))
1366
1367 def sanitize_string_field(info, string_field):
1368 field = info.get(string_field)
1369 if field is None or isinstance(field, compat_str):
1370 return
1371 report_force_conversion(string_field, 'a string', 'string')
1372 info[string_field] = compat_str(field)
1373
1374 def sanitize_numeric_fields(info):
1375 for numeric_field in self._NUMERIC_FIELDS:
1376 field = info.get(numeric_field)
1377 if field is None or isinstance(field, compat_numeric_types):
1378 continue
1379 report_force_conversion(numeric_field, 'numeric', 'int')
1380 info[numeric_field] = int_or_none(field)
1381
1382 sanitize_string_field(info_dict, 'id')
1383 sanitize_numeric_fields(info_dict)
be6217b2 1384
dd82ffea
JMF
1385 if 'playlist' not in info_dict:
1386 # It isn't part of a playlist
1387 info_dict['playlist'] = None
1388 info_dict['playlist_index'] = None
1389
d5519808 1390 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1391 if thumbnails is None:
1392 thumbnail = info_dict.get('thumbnail')
1393 if thumbnail:
a7a14d95 1394 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1395 if thumbnails:
be6d7229 1396 thumbnails.sort(key=lambda t: (
d37708fc
RA
1397 t.get('preference') if t.get('preference') is not None else -1,
1398 t.get('width') if t.get('width') is not None else -1,
1399 t.get('height') if t.get('height') is not None else -1,
1400 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1401 for i, t in enumerate(thumbnails):
dcf77cf1 1402 t['url'] = sanitize_url(t['url'])
9603e8a7 1403 if t.get('width') and t.get('height'):
d5519808 1404 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1405 if t.get('id') is None:
1406 t['id'] = '%d' % i
d5519808 1407
b7b72db9 1408 if self.params.get('list_thumbnails'):
1409 self.list_thumbnails(info_dict)
1410 return
1411
536a55da
S
1412 thumbnail = info_dict.get('thumbnail')
1413 if thumbnail:
1414 info_dict['thumbnail'] = sanitize_url(thumbnail)
1415 elif thumbnails:
d5519808
PH
1416 info_dict['thumbnail'] = thumbnails[-1]['url']
1417
c9ae7b95 1418 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1419 info_dict['display_id'] = info_dict['id']
1420
955c4514 1421 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1422 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1423 # see http://bugs.python.org/issue1646728)
1424 try:
1425 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1426 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1427 except (ValueError, OverflowError, OSError):
1428 pass
9d2ecdbc 1429
33d2fc2f
S
1430 # Auto generate title fields corresponding to the *_number fields when missing
1431 # in order to always have clean titles. This is very common for TV series.
1432 for field in ('chapter', 'season', 'episode'):
1433 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1434 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1435
4bba3716
S
1436 subtitles = info_dict.get('subtitles')
1437 if subtitles:
1438 for _, subtitle in subtitles.items():
1439 for subtitle_format in subtitle:
33f3040a
S
1440 if subtitle_format.get('url'):
1441 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
5b1d8575 1442 if subtitle_format.get('ext') is None:
4bba3716
S
1443 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1444
a504ced0 1445 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1446 if 'automatic_captions' in info_dict:
1447 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
4bba3716 1448 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1449 return
360e1ca5 1450 info_dict['requested_subtitles'] = self.process_subtitles(
4bba3716 1451 info_dict['id'], subtitles,
360e1ca5 1452 info_dict.get('automatic_captions'))
a504ced0 1453
dd82ffea
JMF
1454 # We now pick which formats have to be downloaded
1455 if info_dict.get('formats') is None:
1456 # There's only one format available
1457 formats = [info_dict]
1458 else:
1459 formats = info_dict['formats']
1460
db95dc13
PH
1461 if not formats:
1462 raise ExtractorError('No video formats found!')
1463
73af5cc8
S
1464 def is_wellformed(f):
1465 url = f.get('url')
1466 valid_url = url and isinstance(url, compat_str)
1467 if not valid_url:
1468 self.report_warning(
1469 '"url" field is missing or empty - skipping format, '
1470 'there is an error in extractor')
1471 return valid_url
1472
1473 # Filter out malformed formats for better extraction robustness
1474 formats = list(filter(is_wellformed, formats))
1475
181c7053
S
1476 formats_dict = {}
1477
dd82ffea 1478 # We check that all the formats have the format and format_id fields
db95dc13 1479 for i, format in enumerate(formats):
c9969434
S
1480 sanitize_string_field(format, 'format_id')
1481 sanitize_numeric_fields(format)
dcf77cf1 1482 format['url'] = sanitize_url(format['url'])
dd82ffea 1483 if format.get('format_id') is None:
8016c922 1484 format['format_id'] = compat_str(i)
e2effb08
S
1485 else:
1486 # Sanitize format_id from characters used in format selector expression
ec85ded8 1487 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1488 format_id = format['format_id']
1489 if format_id not in formats_dict:
1490 formats_dict[format_id] = []
1491 formats_dict[format_id].append(format)
1492
1493 # Make sure all formats have unique format_id
1494 for format_id, ambiguous_formats in formats_dict.items():
1495 if len(ambiguous_formats) > 1:
1496 for i, format in enumerate(ambiguous_formats):
1497 format['format_id'] = '%s-%d' % (format_id, i)
1498
1499 for i, format in enumerate(formats):
8c51aa65 1500 if format.get('format') is None:
6febd1c1 1501 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1502 id=format['format_id'],
1503 res=self.format_resolution(format),
6febd1c1 1504 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1505 )
c1002e96 1506 # Automatically determine file extension if missing
5b1d8575 1507 if format.get('ext') is None:
cce929ea 1508 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1509 # Automatically determine protocol if missing (useful for format
1510 # selection purposes)
6f0be937 1511 if format.get('protocol') is None:
b5559424 1512 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1513 # Add HTTP headers, so that external programs can use them from the
1514 # json output
1515 full_format_info = info_dict.copy()
1516 full_format_info.update(format)
1517 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1518 # Remove private housekeeping stuff
1519 if '__x_forwarded_for_ip' in info_dict:
1520 del info_dict['__x_forwarded_for_ip']
dd82ffea 1521
4bcc7bd1 1522 # TODO Central sorting goes here
99e206d5 1523
f89197d7 1524 if formats[0] is not info_dict:
b3d9ef88
JMF
1525 # only set the 'formats' fields if the original info_dict list them
1526 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1527 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1528 # which can't be exported to json
b3d9ef88 1529 info_dict['formats'] = formats
cfb56d1a 1530 if self.params.get('listformats'):
bfaae0a7 1531 self.list_formats(info_dict)
1532 return
1533
de3ef3ed 1534 req_format = self.params.get('format')
a9c58ad9 1535 if req_format is None:
feccf29c 1536 req_format_list = []
3749e36e 1537 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f 1538 not info_dict.get('is_live')):
7fcb605b 1539 merger = FFmpegMergerPP(self)
97fcf1bb 1540 if merger.available and merger.can_merge():
7fcb605b 1541 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1542 req_format_list.append('best')
1543 req_format = '/'.join(req_format_list)
5acfa126 1544 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1545
1546 # While in format selection we may need to have an access to the original
1547 # format set in order to calculate some metrics or do some processing.
1548 # For now we need to be able to guess whether original formats provided
1549 # by extractor are incomplete or not (i.e. whether extractor provides only
1550 # video-only or audio-only formats) for proper formats selection for
1551 # extractors with such incomplete formats (see
1552 # https://github.com/rg3/youtube-dl/pull/5556).
1553 # Since formats may be filtered during format selection and may not match
1554 # the original formats the results may be incorrect. Thus original formats
1555 # or pre-calculated metrics should be passed to format selection routines
1556 # as well.
1557 # We will pass a context object containing all necessary additional data
1558 # instead of just formats.
1559 # This fixes incorrect format selection issue (see
1560 # https://github.com/rg3/youtube-dl/issues/10083).
2e221ca3 1561 incomplete_formats = (
317f7ab6 1562 # All formats are video-only or
2e221ca3 1563 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
317f7ab6 1564 # all formats are audio-only
2e221ca3 1565 all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1566
1567 ctx = {
1568 'formats': formats,
1569 'incomplete_formats': incomplete_formats,
1570 }
1571
1572 formats_to_download = list(format_selector(ctx))
dd82ffea 1573 if not formats_to_download:
6febd1c1 1574 raise ExtractorError('requested format not available',
78a3a9f8 1575 expected=True)
dd82ffea
JMF
1576
1577 if download:
1578 if len(formats_to_download) > 1:
6febd1c1 1579 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1580 for format in formats_to_download:
1581 new_info = dict(info_dict)
1582 new_info.update(format)
1583 self.process_info(new_info)
1584 # We update the info dict with the best quality format (backwards compatibility)
1585 info_dict.update(formats_to_download[-1])
1586 return info_dict
1587
98c70d6f 1588 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1589 """Select the requested subtitles and their format"""
98c70d6f
JMF
1590 available_subs = {}
1591 if normal_subtitles and self.params.get('writesubtitles'):
1592 available_subs.update(normal_subtitles)
1593 if automatic_captions and self.params.get('writeautomaticsub'):
1594 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1595 if lang not in available_subs:
1596 available_subs[lang] = cap_info
1597
4d171848
JMF
1598 if (not self.params.get('writesubtitles') and not
1599 self.params.get('writeautomaticsub') or not
1600 available_subs):
1601 return None
a504ced0
JMF
1602
1603 if self.params.get('allsubtitles', False):
1604 requested_langs = available_subs.keys()
1605 else:
1606 if self.params.get('subtitleslangs', False):
1607 requested_langs = self.params.get('subtitleslangs')
1608 elif 'en' in available_subs:
1609 requested_langs = ['en']
1610 else:
1611 requested_langs = [list(available_subs.keys())[0]]
1612
1613 formats_query = self.params.get('subtitlesformat', 'best')
1614 formats_preference = formats_query.split('/') if formats_query else []
1615 subs = {}
1616 for lang in requested_langs:
1617 formats = available_subs.get(lang)
1618 if formats is None:
1619 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1620 continue
a504ced0
JMF
1621 for ext in formats_preference:
1622 if ext == 'best':
1623 f = formats[-1]
1624 break
1625 matches = list(filter(lambda f: f['ext'] == ext, formats))
1626 if matches:
1627 f = matches[-1]
1628 break
1629 else:
1630 f = formats[-1]
1631 self.report_warning(
1632 'No subtitle format found matching "%s" for language %s, '
1633 'using %s' % (formats_query, lang, f['ext']))
1634 subs[lang] = f
1635 return subs
1636
8222d8de
JMF
1637 def process_info(self, info_dict):
1638 """Process a single resolved IE result."""
1639
1640 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1641
1642 max_downloads = self.params.get('max_downloads')
1643 if max_downloads is not None:
1644 if self._num_downloads >= int(max_downloads):
1645 raise MaxDownloadsReached()
8222d8de
JMF
1646
1647 info_dict['fulltitle'] = info_dict['title']
1648 if len(info_dict['title']) > 200:
6febd1c1 1649 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1650
11b85ce6 1651 if 'format' not in info_dict:
8222d8de
JMF
1652 info_dict['format'] = info_dict['ext']
1653
442c37b7 1654 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1655 if reason is not None:
6febd1c1 1656 self.to_screen('[download] ' + reason)
8222d8de
JMF
1657 return
1658
fd288278 1659 self._num_downloads += 1
8222d8de 1660
e72c7e41 1661 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1662
1663 # Forced printings
1664 if self.params.get('forcetitle', False):
0783b09b 1665 self.to_stdout(info_dict['fulltitle'])
8222d8de 1666 if self.params.get('forceid', False):
0783b09b 1667 self.to_stdout(info_dict['id'])
8222d8de 1668 if self.params.get('forceurl', False):
16ae61f6 1669 if info_dict.get('requested_formats') is not None:
1670 for f in info_dict['requested_formats']:
1671 self.to_stdout(f['url'] + f.get('play_path', ''))
1672 else:
1673 # For RTMP URLs, also include the playpath
1674 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1675 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1676 self.to_stdout(info_dict['thumbnail'])
216d71d0 1677 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1678 self.to_stdout(info_dict['description'])
8222d8de 1679 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1680 self.to_stdout(filename)
525ef922
PH
1681 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1682 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1683 if self.params.get('forceformat', False):
0783b09b 1684 self.to_stdout(info_dict['format'])
9d153818 1685 if self.params.get('forcejson', False):
0783b09b 1686 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1687
1688 # Do nothing else if in simulate mode
1689 if self.params.get('simulate', False):
1690 return
1691
1692 if filename is None:
1693 return
1694
1695 try:
e5a11a22 1696 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1697 if dn and not os.path.exists(dn):
8222d8de
JMF
1698 os.makedirs(dn)
1699 except (OSError, IOError) as err:
9b9c5355 1700 self.report_error('unable to create directory ' + error_to_compat_str(err))
8222d8de
JMF
1701 return
1702
1703 if self.params.get('writedescription', False):
2699da80 1704 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1705 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1706 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1707 elif info_dict.get('description') is None:
1708 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1709 else:
1710 try:
6febd1c1 1711 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1712 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1713 descfile.write(info_dict['description'])
7b6fefc9 1714 except (OSError, IOError):
6febd1c1 1715 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1716 return
8222d8de 1717
1fb07d10 1718 if self.params.get('writeannotations', False):
98727e12 1719 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1720 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1721 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1722 else:
1723 try:
6febd1c1 1724 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1725 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1726 annofile.write(info_dict['annotations'])
1727 except (KeyError, TypeError):
6febd1c1 1728 self.report_warning('There are no annotations to write.')
7b6fefc9 1729 except (OSError, IOError):
6febd1c1 1730 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1731 return
1fb07d10 1732
c4a91be7 1733 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1734 self.params.get('writeautomaticsub')])
c4a91be7 1735
c84dd8a9 1736 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1737 # subtitles download errors are already managed as troubles in relevant IE
1738 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1739 subtitles = info_dict['requested_subtitles']
0f2c0d33 1740 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1741 for sub_lang, sub_info in subtitles.items():
1742 sub_format = sub_info['ext']
1743 if sub_info.get('data') is not None:
1744 sub_data = sub_info['data']
1745 else:
1746 try:
0f2c0d33
JMF
1747 sub_data = ie._download_webpage(
1748 sub_info['url'], info_dict['id'], note=False)
1749 except ExtractorError as err:
a504ced0 1750 self.report_warning('Unable to download subtitle for "%s": %s' %
9b9c5355 1751 (sub_lang, error_to_compat_str(err.cause)))
a504ced0 1752 continue
8222d8de 1753 try:
d4051a8e 1754 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1755 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1756 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1757 else:
6febd1c1 1758 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
b1927f4e
YCH
1759 # Use newline='' to prevent conversion of newline characters
1760 # See https://github.com/rg3/youtube-dl/issues/10268
7f832413 1761 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
a504ced0 1762 subfile.write(sub_data)
8222d8de 1763 except (OSError, IOError):
e4db1951 1764 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1765 return
1766
8222d8de 1767 if self.params.get('writeinfojson', False):
b29e0000 1768 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1769 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1770 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1771 else:
6febd1c1 1772 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1773 try:
cb202fd2 1774 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1775 except (OSError, IOError):
6febd1c1 1776 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1777 return
8222d8de 1778
ec82d85a 1779 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1780
1781 if not self.params.get('skip_download', False):
4340deca
P
1782 try:
1783 def dl(name, info):
a055469f 1784 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1785 for ph in self._progress_hooks:
1786 fd.add_progress_hook(ph)
1787 if self.params.get('verbose'):
1788 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1789 return fd.download(name, info)
ee69b99a 1790
4340deca
P
1791 if info_dict.get('requested_formats') is not None:
1792 downloaded = []
1793 success = True
d47aeb22 1794 merger = FFmpegMergerPP(self)
f740fae2 1795 if not merger.available:
4340deca
P
1796 postprocessors = []
1797 self.report_warning('You have requested multiple '
1798 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1799 ' The formats won\'t be merged.')
6350728b 1800 else:
4340deca 1801 postprocessors = [merger]
81cd954a
S
1802
1803 def compatible_formats(formats):
1804 video, audio = formats
1805 # Check extension
1806 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1807 if video_ext and audio_ext:
1808 COMPATIBLE_EXTS = (
b2758123 1809 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
81cd954a
S
1810 ('webm')
1811 )
1812 for exts in COMPATIBLE_EXTS:
1813 if video_ext in exts and audio_ext in exts:
1814 return True
1815 # TODO: Check acodec/vcodec
1816 return False
1817
38c6902b
S
1818 filename_real_ext = os.path.splitext(filename)[1][1:]
1819 filename_wo_ext = (
1820 os.path.splitext(filename)[0]
1821 if filename_real_ext == info_dict['ext']
1822 else filename)
81cd954a 1823 requested_formats = info_dict['requested_formats']
c0dea0a7 1824 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1825 info_dict['ext'] = 'mkv'
4a5a898a
S
1826 self.report_warning(
1827 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1828 # Ensure filename always has a correct extension for successful merge
1829 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1830 if os.path.exists(encodeFilename(filename)):
1831 self.to_screen(
1832 '[download] %s has already been downloaded and '
1833 'merged' % filename)
1834 else:
81cd954a 1835 for f in requested_formats:
5b5fbc08
JMF
1836 new_info = dict(info_dict)
1837 new_info.update(f)
1838 fname = self.prepare_filename(new_info)
666a9a2b 1839 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1840 downloaded.append(fname)
1841 partial_success = dl(fname, new_info)
1842 success = success and partial_success
1843 info_dict['__postprocessors'] = postprocessors
1844 info_dict['__files_to_merge'] = downloaded
4340deca
P
1845 else:
1846 # Just a single file
1847 success = dl(filename, info_dict)
1848 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 1849 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
1850 return
1851 except (OSError, IOError) as err:
1852 raise UnavailableVideoError(err)
1853 except (ContentTooShortError, ) as err:
1854 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1855 return
8222d8de 1856
e38cafe9 1857 if success and filename != '-':
6271f1ca 1858 # Fixup content
62cd676c
PH
1859 fixup_policy = self.params.get('fixup')
1860 if fixup_policy is None:
1861 fixup_policy = 'detect_or_warn'
1862
d1e4a464
S
1863 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
1864
6271f1ca
PH
1865 stretched_ratio = info_dict.get('stretched_ratio')
1866 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1867 if fixup_policy == 'warn':
1868 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1869 info_dict['id'], stretched_ratio))
1870 elif fixup_policy == 'detect_or_warn':
1871 stretched_pp = FFmpegFixupStretchedPP(self)
1872 if stretched_pp.available:
1873 info_dict.setdefault('__postprocessors', [])
1874 info_dict['__postprocessors'].append(stretched_pp)
1875 else:
1876 self.report_warning(
d1e4a464
S
1877 '%s: Non-uniform pixel ratio (%s). %s'
1878 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 1879 else:
62cd676c
PH
1880 assert fixup_policy in ('ignore', 'never')
1881
d1e4a464
S
1882 if (info_dict.get('requested_formats') is None and
1883 info_dict.get('container') == 'm4a_dash'):
62cd676c 1884 if fixup_policy == 'warn':
d1e4a464
S
1885 self.report_warning(
1886 '%s: writing DASH m4a. '
1887 'Only some players support this container.'
1888 % info_dict['id'])
62cd676c
PH
1889 elif fixup_policy == 'detect_or_warn':
1890 fixup_pp = FFmpegFixupM4aPP(self)
1891 if fixup_pp.available:
1892 info_dict.setdefault('__postprocessors', [])
1893 info_dict['__postprocessors'].append(fixup_pp)
1894 else:
1895 self.report_warning(
d1e4a464
S
1896 '%s: writing DASH m4a. '
1897 'Only some players support this container. %s'
1898 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
1899 else:
1900 assert fixup_policy in ('ignore', 'never')
6271f1ca 1901
d1e4a464
S
1902 if (info_dict.get('protocol') == 'm3u8_native' or
1903 info_dict.get('protocol') == 'm3u8' and
1904 self.params.get('hls_prefer_native')):
f17f8651 1905 if fixup_policy == 'warn':
a02682fd 1906 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 1907 info_dict['id']))
1908 elif fixup_policy == 'detect_or_warn':
1909 fixup_pp = FFmpegFixupM3u8PP(self)
1910 if fixup_pp.available:
1911 info_dict.setdefault('__postprocessors', [])
1912 info_dict['__postprocessors'].append(fixup_pp)
1913 else:
1914 self.report_warning(
a02682fd 1915 '%s: malformed AAC bitstream detected. %s'
d1e4a464 1916 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 1917 else:
1918 assert fixup_policy in ('ignore', 'never')
1919
8222d8de
JMF
1920 try:
1921 self.post_process(filename, info_dict)
1922 except (PostProcessingError) as err:
6febd1c1 1923 self.report_error('postprocessing: %s' % str(err))
8222d8de 1924 return
cd58dc3e 1925 self.record_download_archive(info_dict)
8222d8de
JMF
1926
1927 def download(self, url_list):
1928 """Download a given list of URLs."""
acd69589 1929 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1930 if (len(url_list) > 1 and
9487ce03 1931 outtmpl != '-' and
8fb3ac36
PH
1932 '%' not in outtmpl and
1933 self.params.get('max_downloads') != 1):
acd69589 1934 raise SameFileError(outtmpl)
8222d8de
JMF
1935
1936 for url in url_list:
1937 try:
5f6a1245 1938 # It also downloads the videos
61aa5ba3
S
1939 res = self.extract_info(
1940 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1941 except UnavailableVideoError:
6febd1c1 1942 self.report_error('unable to download video')
8222d8de 1943 except MaxDownloadsReached:
6febd1c1 1944 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1945 raise
63e0be34
PH
1946 else:
1947 if self.params.get('dump_single_json', False):
1948 self.to_stdout(json.dumps(res))
8222d8de
JMF
1949
1950 return self._download_retcode
1951
1dcc4c0c 1952 def download_with_info_file(self, info_filename):
31bd3925
JMF
1953 with contextlib.closing(fileinput.FileInput(
1954 [info_filename], mode='r',
1955 openhook=fileinput.hook_encoded('utf-8'))) as f:
1956 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1957 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1958 try:
1959 self.process_ie_result(info, download=True)
1960 except DownloadError:
1961 webpage_url = info.get('webpage_url')
1962 if webpage_url is not None:
6febd1c1 1963 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1964 return self.download([webpage_url])
1965 else:
1966 raise
1967 return self._download_retcode
1dcc4c0c 1968
cb202fd2
S
1969 @staticmethod
1970 def filter_requested_info(info_dict):
1971 return dict(
1972 (k, v) for k, v in info_dict.items()
1973 if k not in ['requested_formats', 'requested_subtitles'])
1974
8222d8de
JMF
1975 def post_process(self, filename, ie_info):
1976 """Run all the postprocessors on the given file."""
1977 info = dict(ie_info)
1978 info['filepath'] = filename
6350728b
JMF
1979 pps_chain = []
1980 if ie_info.get('__postprocessors') is not None:
1981 pps_chain.extend(ie_info['__postprocessors'])
1982 pps_chain.extend(self._pps)
1983 for pp in pps_chain:
71646e46 1984 files_to_delete = []
8222d8de 1985 try:
592e97e8 1986 files_to_delete, info = pp.run(info)
8222d8de 1987 except PostProcessingError as e:
bbcbf4d4 1988 self.report_error(e.msg)
592e97e8
JMF
1989 if files_to_delete and not self.params.get('keepvideo', False):
1990 for old_filename in files_to_delete:
f3ff1a36 1991 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1992 try:
1993 os.remove(encodeFilename(old_filename))
1994 except (IOError, OSError):
1995 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1996
5db07df6
PH
1997 def _make_archive_id(self, info_dict):
1998 # Future-proof against any change in case
1999 # and backwards compatibility with prior versions
d31209a1 2000 extractor = info_dict.get('extractor_key')
7012b23c
PH
2001 if extractor is None:
2002 if 'id' in info_dict:
2003 extractor = info_dict.get('ie_key') # key in a playlist
2004 if extractor is None:
5db07df6 2005 return None # Incomplete video information
6febd1c1 2006 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
2007
2008 def in_download_archive(self, info_dict):
2009 fn = self.params.get('download_archive')
2010 if fn is None:
2011 return False
2012
2013 vid_id = self._make_archive_id(info_dict)
2014 if vid_id is None:
7012b23c 2015 return False # Incomplete video information
5db07df6 2016
c1c9a79c
PH
2017 try:
2018 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2019 for line in archive_file:
2020 if line.strip() == vid_id:
2021 return True
2022 except IOError as ioe:
2023 if ioe.errno != errno.ENOENT:
2024 raise
2025 return False
2026
2027 def record_download_archive(self, info_dict):
2028 fn = self.params.get('download_archive')
2029 if fn is None:
2030 return
5db07df6
PH
2031 vid_id = self._make_archive_id(info_dict)
2032 assert vid_id
c1c9a79c 2033 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2034 archive_file.write(vid_id + '\n')
dd82ffea 2035
8c51aa65 2036 @staticmethod
8abeeb94 2037 def format_resolution(format, default='unknown'):
fb04e403
PH
2038 if format.get('vcodec') == 'none':
2039 return 'audio only'
f49d89ee
PH
2040 if format.get('resolution') is not None:
2041 return format['resolution']
8c51aa65
JMF
2042 if format.get('height') is not None:
2043 if format.get('width') is not None:
6febd1c1 2044 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2045 else:
6febd1c1 2046 res = '%sp' % format['height']
f49d89ee 2047 elif format.get('width') is not None:
388ae76b 2048 res = '%dx?' % format['width']
8c51aa65 2049 else:
8abeeb94 2050 res = default
8c51aa65
JMF
2051 return res
2052
c57f7757
PH
2053 def _format_note(self, fdict):
2054 res = ''
2055 if fdict.get('ext') in ['f4f', 'f4m']:
2056 res += '(unsupported) '
32f90364
PH
2057 if fdict.get('language'):
2058 if res:
2059 res += ' '
9016d76f 2060 res += '[%s] ' % fdict['language']
c57f7757
PH
2061 if fdict.get('format_note') is not None:
2062 res += fdict['format_note'] + ' '
2063 if fdict.get('tbr') is not None:
2064 res += '%4dk ' % fdict['tbr']
2065 if fdict.get('container') is not None:
2066 if res:
2067 res += ', '
2068 res += '%s container' % fdict['container']
2069 if (fdict.get('vcodec') is not None and
2070 fdict.get('vcodec') != 'none'):
2071 if res:
2072 res += ', '
2073 res += fdict['vcodec']
91c7271a 2074 if fdict.get('vbr') is not None:
c57f7757
PH
2075 res += '@'
2076 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2077 res += 'video@'
2078 if fdict.get('vbr') is not None:
2079 res += '%4dk' % fdict['vbr']
fbb21cf5 2080 if fdict.get('fps') is not None:
5d583bdf
S
2081 if res:
2082 res += ', '
2083 res += '%sfps' % fdict['fps']
c57f7757
PH
2084 if fdict.get('acodec') is not None:
2085 if res:
2086 res += ', '
2087 if fdict['acodec'] == 'none':
2088 res += 'video only'
2089 else:
2090 res += '%-5s' % fdict['acodec']
2091 elif fdict.get('abr') is not None:
2092 if res:
2093 res += ', '
2094 res += 'audio'
2095 if fdict.get('abr') is not None:
2096 res += '@%3dk' % fdict['abr']
2097 if fdict.get('asr') is not None:
2098 res += ' (%5dHz)' % fdict['asr']
2099 if fdict.get('filesize') is not None:
2100 if res:
2101 res += ', '
2102 res += format_bytes(fdict['filesize'])
9732d77e
PH
2103 elif fdict.get('filesize_approx') is not None:
2104 if res:
2105 res += ', '
2106 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2107 return res
91c7271a 2108
c57f7757 2109 def list_formats(self, info_dict):
94badb25 2110 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
2111 table = [
2112 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2113 for f in formats
e65566a9 2114 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 2115 if len(formats) > 1:
b81a359e 2116 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 2117
b81a359e 2118 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 2119 self.to_screen(
b81a359e
PH
2120 '[info] Available formats for %s:\n%s' %
2121 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
2122
2123 def list_thumbnails(self, info_dict):
2124 thumbnails = info_dict.get('thumbnails')
2125 if not thumbnails:
b7b72db9 2126 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2127 return
cfb56d1a
PH
2128
2129 self.to_screen(
2130 '[info] Thumbnails for %s:' % info_dict['id'])
2131 self.to_screen(render_table(
2132 ['ID', 'width', 'height', 'URL'],
2133 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2134
360e1ca5 2135 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2136 if not subtitles:
360e1ca5 2137 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2138 return
a504ced0 2139 self.to_screen(
edab9dbf
JMF
2140 'Available %s for %s:' % (name, video_id))
2141 self.to_screen(render_table(
2142 ['Language', 'formats'],
2143 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2144 for lang, formats in subtitles.items()]))
a504ced0 2145
dca08720
PH
2146 def urlopen(self, req):
2147 """ Start an HTTP download """
82d8a8b6 2148 if isinstance(req, compat_basestring):
67dda517 2149 req = sanitized_Request(req)
19a41fc6 2150 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2151
2152 def print_debug_header(self):
2153 if not self.params.get('verbose'):
2154 return
62fec3b2 2155
4192b51c
PH
2156 if type('') is not compat_str:
2157 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
2158 self.report_warning(
2159 'Your Python is broken! Update to a newer and supported version')
2160
c6afed48
PH
2161 stdout_encoding = getattr(
2162 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2163 encoding_str = (
734f90bb
PH
2164 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2165 locale.getpreferredencoding(),
2166 sys.getfilesystemencoding(),
c6afed48 2167 stdout_encoding,
b0472057 2168 self.get_encoding()))
4192b51c 2169 write_string(encoding_str, encoding=None)
734f90bb
PH
2170
2171 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
e0986e31
JMF
2172 if _LAZY_LOADER:
2173 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
dca08720
PH
2174 try:
2175 sp = subprocess.Popen(
2176 ['git', 'rev-parse', '--short', 'HEAD'],
2177 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2178 cwd=os.path.dirname(os.path.abspath(__file__)))
2179 out, err = sp.communicate()
2180 out = out.decode().strip()
2181 if re.match('[0-9a-f]+', out):
734f90bb 2182 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 2183 except Exception:
dca08720
PH
2184 try:
2185 sys.exc_clear()
70a1165b 2186 except Exception:
dca08720 2187 pass
d28b5171
PH
2188 self._write_string('[debug] Python version %s - %s\n' % (
2189 platform.python_version(), platform_name()))
2190
73fac4e9 2191 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2192 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
2193 exe_str = ', '.join(
2194 '%s %s' % (exe, v)
2195 for exe, v in sorted(exe_versions.items())
2196 if v
2197 )
2198 if not exe_str:
2199 exe_str = 'none'
2200 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2201
2202 proxy_map = {}
2203 for handler in self._opener.handlers:
2204 if hasattr(handler, 'proxies'):
2205 proxy_map.update(handler.proxies)
734f90bb 2206 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2207
58b1f00d
PH
2208 if self.params.get('call_home', False):
2209 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2210 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2211 latest_version = self.urlopen(
2212 'https://yt-dl.org/latest/version').read().decode('utf-8')
2213 if version_tuple(latest_version) > version_tuple(__version__):
2214 self.report_warning(
2215 'You are using an outdated version (newest version: %s)! '
2216 'See https://yt-dl.org/update if you need help updating.' %
2217 latest_version)
2218
e344693b 2219 def _setup_opener(self):
6ad14cab 2220 timeout_val = self.params.get('socket_timeout')
19a41fc6 2221 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2222
dca08720
PH
2223 opts_cookiefile = self.params.get('cookiefile')
2224 opts_proxy = self.params.get('proxy')
2225
2226 if opts_cookiefile is None:
2227 self.cookiejar = compat_cookiejar.CookieJar()
2228 else:
590bc6f6 2229 opts_cookiefile = expand_path(opts_cookiefile)
dca08720
PH
2230 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2231 opts_cookiefile)
2232 if os.access(opts_cookiefile, os.R_OK):
2233 self.cookiejar.load()
2234
6a3f4c3f 2235 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2236 if opts_proxy is not None:
2237 if opts_proxy == '':
2238 proxies = {}
2239 else:
2240 proxies = {'http': opts_proxy, 'https': opts_proxy}
2241 else:
2242 proxies = compat_urllib_request.getproxies()
2243 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2244 if 'http' in proxies and 'https' not in proxies:
2245 proxies['https'] = proxies['http']
91410c9b 2246 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2247
2248 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2249 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2250 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
8b172c2e 2251 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2252
2253 # When passing our own FileHandler instance, build_opener won't add the
2254 # default FileHandler and allows us to disable the file protocol, which
2255 # can be used for malicious purposes (see
e37afbe0 2256 # https://github.com/rg3/youtube-dl/issues/8227)
6240b0a2
JMF
2257 file_handler = compat_urllib_request.FileHandler()
2258
2259 def file_open(*args, **kwargs):
30e2f2d7 2260 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
6240b0a2
JMF
2261 file_handler.file_open = file_open
2262
2263 opener = compat_urllib_request.build_opener(
2264 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2461f79d 2265
dca08720
PH
2266 # Delete the default user-agent header, which would otherwise apply in
2267 # cases where our custom HTTP handler doesn't come into play
2268 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2269 opener.addheaders = []
2270 self._opener = opener
62fec3b2
PH
2271
2272 def encode(self, s):
2273 if isinstance(s, bytes):
2274 return s # Already encoded
2275
2276 try:
2277 return s.encode(self.get_encoding())
2278 except UnicodeEncodeError as err:
2279 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2280 raise
2281
2282 def get_encoding(self):
2283 encoding = self.params.get('encoding')
2284 if encoding is None:
2285 encoding = preferredencoding()
2286 return encoding
ec82d85a
PH
2287
2288 def _write_thumbnails(self, info_dict, filename):
2289 if self.params.get('writethumbnail', False):
2290 thumbnails = info_dict.get('thumbnails')
2291 if thumbnails:
2292 thumbnails = [thumbnails[-1]]
2293 elif self.params.get('write_all_thumbnails', False):
2294 thumbnails = info_dict.get('thumbnails')
2295 else:
2296 return
2297
2298 if not thumbnails:
2299 # No thumbnails present, so return immediately
2300 return
2301
2302 for t in thumbnails:
2303 thumb_ext = determine_ext(t['url'], 'jpg')
2304 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2305 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2306 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2307
2308 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2309 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2310 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2311 else:
2312 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2313 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2314 try:
2315 uf = self.urlopen(t['url'])
d3d89c32 2316 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2317 shutil.copyfileobj(uf, thumbf)
2318 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2319 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2320 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2321 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2322 (t['url'], error_to_compat_str(err)))