]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
Linter and misc cleanup
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
9b9c5355 60 error_to_compat_str,
8b0d7497 61 ExistingVideoReached,
590bc6f6 62 expand_path,
ce02ed60 63 ExtractorError,
02dbf93f 64 format_bytes,
76d321f6 65 format_field,
525ef922 66 formatSeconds,
773f291d 67 GeoRestrictedError,
c9969434 68 int_or_none,
732044af 69 iri_to_uri,
773f291d 70 ISO3166Utils,
ce02ed60 71 locked_file,
0202b52a 72 make_dir,
dca08720 73 make_HTTPS_handler,
ce02ed60 74 MaxDownloadsReached,
cd6fc19e 75 orderedSet,
b7ab0590 76 PagedList,
083c9df9 77 parse_filesize,
91410c9b 78 PerRequestProxyHandler,
dca08720 79 platform_name,
eedb7ba5 80 PostProcessingError,
ce02ed60 81 preferredencoding,
eedb7ba5 82 prepend_extension,
51fb4995 83 register_socks_protocols,
cfb56d1a 84 render_table,
eedb7ba5 85 replace_extension,
8b0d7497 86 RejectedVideoReached,
ce02ed60
PH
87 SameFileError,
88 sanitize_filename,
1bb5c511 89 sanitize_path,
dcf77cf1 90 sanitize_url,
67dda517 91 sanitized_Request,
e5660ee6 92 std_headers,
1211bb6d 93 str_or_none,
ce02ed60 94 subtitles_filename,
732044af 95 to_high_limit_path,
ce02ed60 96 UnavailableVideoError,
29eb5174 97 url_basename,
58b1f00d 98 version_tuple,
ce02ed60
PH
99 write_json_file,
100 write_string,
1bab3437 101 YoutubeDLCookieJar,
6a3f4c3f 102 YoutubeDLCookieProcessor,
dca08720 103 YoutubeDLHandler,
fca6dba8 104 YoutubeDLRedirectHandler,
f5b1bca9 105 process_communicate_or_kill,
ce02ed60 106)
a0e07d31 107from .cache import Cache
f74980cb 108from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
4c54b89e 109from .extractor.openload import PhantomJSwrapper
3bc2ddcc 110from .downloader import get_suitable_downloader
4c83c967 111from .downloader.rtmp import rtmpdump_version
4f026faf 112from .postprocessor import (
f17f8651 113 FFmpegFixupM3u8PP,
62cd676c 114 FFmpegFixupM4aPP,
6271f1ca 115 FFmpegFixupStretchedPP,
4f026faf
PH
116 FFmpegMergerPP,
117 FFmpegPostProcessor,
0202b52a 118 # FFmpegSubtitlesConvertorPP,
4f026faf 119 get_postprocessor,
0202b52a 120 MoveFilesAfterDownloadPP,
4f026faf 121)
dca08720 122from .version import __version__
8222d8de 123
e9c0cdd3
YCH
124if compat_os_name == 'nt':
125 import ctypes
126
2459b6e1 127
8222d8de
JMF
128class YoutubeDL(object):
129 """YoutubeDL class.
130
131 YoutubeDL objects are the ones responsible of downloading the
132 actual video file and writing it to disk if the user has requested
133 it, among some other tasks. In most cases there should be one per
134 program. As, given a video URL, the downloader doesn't know how to
135 extract all the needed information, task that InfoExtractors do, it
136 has to pass the URL to one of them.
137
138 For this, YoutubeDL objects have a method that allows
139 InfoExtractors to be registered in a given order. When it is passed
140 a URL, the YoutubeDL object handles it to the first InfoExtractor it
141 finds that reports being able to handle it. The InfoExtractor extracts
142 all the information about the video or videos the URL refers to, and
143 YoutubeDL process the extracted information, possibly using a File
144 Downloader to download the video.
145
146 YoutubeDL objects accept a lot of parameters. In order not to saturate
147 the object constructor with arguments, it receives a dictionary of
148 options instead. These options are available through the params
149 attribute for the InfoExtractors to use. The YoutubeDL also
150 registers itself as the downloader in charge for the InfoExtractors
151 that are added to it, so this is a "mutual registration".
152
153 Available options:
154
155 username: Username for authentication purposes.
156 password: Password for authentication purposes.
180940e0 157 videopassword: Password for accessing a video.
1da50aa3
S
158 ap_mso: Adobe Pass multiple-system operator identifier.
159 ap_username: Multiple-system operator account username.
160 ap_password: Multiple-system operator account password.
8222d8de
JMF
161 usenetrc: Use netrc for authentication instead.
162 verbose: Print additional info to stdout.
163 quiet: Do not print messages to stdout.
ad8915b7 164 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
165 forceurl: Force printing final URL.
166 forcetitle: Force printing title.
167 forceid: Force printing ID.
168 forcethumbnail: Force printing thumbnail URL.
169 forcedescription: Force printing description.
170 forcefilename: Force printing final filename.
525ef922 171 forceduration: Force printing duration.
8694c600 172 forcejson: Force printing info_dict as JSON.
63e0be34
PH
173 dump_single_json: Force printing the info_dict of the whole playlist
174 (or video) as a single JSON line.
2d30509f 175 force_write_download_archive: Force writing download archive regardless of
176 'skip_download' or 'simulate'.
8222d8de 177 simulate: Do not download the video files.
eb8a4433 178 format: Video format code. see "FORMAT SELECTION" for more details.
179 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
180 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
909d24dd 181 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
182 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
8222d8de 183 outtmpl: Template for output names.
a820dc72
RA
184 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
185 restrictfilenames: Do not allow "&" and spaces in file names
186 trim_file_name: Limit length of filename (extension excluded)
187 ignoreerrors: Do not stop on download errors
188 (Default True when running youtube-dlc,
189 but False when directly accessing YoutubeDL class)
d22dec74 190 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 191 overwrites: Overwrite all video and metadata files if True,
192 overwrite only non-video files if None
193 and don't overwrite any file if False
8222d8de
JMF
194 playliststart: Playlist item to start at.
195 playlistend: Playlist item to end at.
c14e88f0 196 playlist_items: Specific indices of playlist to download.
ff815fe6 197 playlistreverse: Download playlist items in reverse order.
75822ca7 198 playlistrandom: Download playlist items in random order.
8222d8de
JMF
199 matchtitle: Download only matching titles.
200 rejecttitle: Reject downloads for matching titles.
8bf9319e 201 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
202 logtostderr: Log messages to stderr instead of stdout.
203 writedescription: Write the video description to a .description file
204 writeinfojson: Write the video description to a .info.json file
06167fbb 205 writecomments: Extract video comments. This will not be written to disk
206 unless writeinfojson is also given
1fb07d10 207 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 208 writethumbnail: Write the thumbnail image to a file
ec82d85a 209 write_all_thumbnails: Write all thumbnail formats to files
732044af 210 writelink: Write an internet shortcut file, depending on the
211 current platform (.url/.webloc/.desktop)
212 writeurllink: Write a Windows internet shortcut file (.url)
213 writewebloclink: Write a macOS internet shortcut file (.webloc)
214 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 215 writesubtitles: Write the video subtitles to a file
741dd8ea 216 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 217 allsubtitles: Downloads all the subtitles of the video
0b7f3118 218 (requires writesubtitles or writeautomaticsub)
8222d8de 219 listsubtitles: Lists all available subtitles for the video
a504ced0 220 subtitlesformat: The format code for subtitles
aa6a10c4 221 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
222 keepvideo: Keep the video file after post-processing
223 daterange: A DateRange object, download only if the upload_date is in the range.
224 skip_download: Skip the actual download of the video file
c35f9e72 225 cachedir: Location of the cache files in the filesystem.
a0e07d31 226 False to disable filesystem cache.
47192f92 227 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
228 age_limit: An integer representing the user's age in years.
229 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
230 min_views: An integer representing the minimum view count the video
231 must have in order to not be skipped.
232 Videos without view count information are always
233 downloaded. None for no limit.
234 max_views: An integer representing the maximum view count.
235 Videos that are more popular than that are not
236 downloaded.
237 Videos without view count information are always
238 downloaded. None for no limit.
239 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
240 Videos already present in the file are not downloaded
241 again.
8a51f564 242 break_on_existing: Stop the download process after attempting to download a
243 file that is in the archive.
244 break_on_reject: Stop the download process when encountering a video that
245 has been filtered out.
246 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 247 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
248 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
249 At the moment, this is only supported by YouTube.
a1ee09e8 250 proxy: URL of the proxy server to use
38cce791 251 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 252 on geo-restricted sites.
e344693b 253 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
254 bidi_workaround: Work around buggy terminals without bidirectional text
255 support, using fridibi
a0ddb8a2 256 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 257 include_ads: Download ads as well
04b4d394
PH
258 default_search: Prepend this string if an input url is not valid.
259 'auto' for elaborate guessing
62fec3b2 260 encoding: Use this encoding instead of the system-specified.
e8ee972c 261 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
262 Pass in 'in_playlist' to only show this behavior for
263 playlist items.
4f026faf 264 postprocessors: A list of dictionaries, each with an entry
71b640cc 265 * key: The name of the postprocessor. See
cefecac1 266 youtube_dlc/postprocessor/__init__.py for a list.
0202b52a 267 * _after_move: Optional. If True, run this post_processor
268 after 'MoveFilesAfterDownload'
4f026faf
PH
269 as well as any further keyword arguments for the
270 postprocessor.
ab8e5e51
AM
271 post_hooks: A list of functions that get called as the final step
272 for each video file, after all postprocessors have been
273 called. The filename will be passed as the only argument.
71b640cc
PH
274 progress_hooks: A list of functions that get called on download
275 progress, with a dictionary with the entries
5cda4eda 276 * status: One of "downloading", "error", or "finished".
ee69b99a 277 Check this first and ignore unknown values.
71b640cc 278
5cda4eda 279 If status is one of "downloading", or "finished", the
ee69b99a
PH
280 following properties may also be present:
281 * filename: The final filename (always present)
5cda4eda 282 * tmpfilename: The filename we're currently writing to
71b640cc
PH
283 * downloaded_bytes: Bytes on disk
284 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
285 * total_bytes_estimate: Guess of the eventual file size,
286 None if unavailable.
287 * elapsed: The number of seconds since download started.
71b640cc
PH
288 * eta: The estimated time in seconds, None if unknown
289 * speed: The download speed in bytes/second, None if
290 unknown
5cda4eda
PH
291 * fragment_index: The counter of the currently
292 downloaded video fragment.
293 * fragment_count: The number of fragments (= individual
294 files that will be merged)
71b640cc
PH
295
296 Progress hooks are guaranteed to be called at least once
297 (with status "finished") if the download is successful.
45598f15 298 merge_output_format: Extension to use when merging formats.
6b591b29 299 final_ext: Expected final extension; used to detect when the file was
300 already downloaded and converted. "merge_output_format" is
301 replaced by this extension when given
6271f1ca
PH
302 fixup: Automatically correct known faults of the file.
303 One of:
304 - "never": do nothing
305 - "warn": only emit a warning
306 - "detect_or_warn": check whether we can do anything
62cd676c 307 about it, warn otherwise (default)
504f20dd 308 source_address: Client-side IP address to bind to.
6ec6cb4e 309 call_home: Boolean, true iff we are allowed to contact the
cefecac1 310 youtube-dlc servers for debugging.
7aa589a5
S
311 sleep_interval: Number of seconds to sleep before each download when
312 used alone or a lower bound of a range for randomized
313 sleep before each download (minimum possible number
314 of seconds to sleep) when used along with
315 max_sleep_interval.
316 max_sleep_interval:Upper bound of a range for randomized sleep before each
317 download (maximum possible number of seconds to sleep).
318 Must only be used along with sleep_interval.
319 Actual sleep time will be a random float from range
320 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
321 listformats: Print an overview of available video formats and exit.
322 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
323 match_filter: A function that gets called with the info_dict of
324 every video.
325 If it returns a message, the video is ignored.
326 If it returns None, the video is downloaded.
327 match_filter_func in utils.py is one example for this.
7e5db8c9 328 no_color: Do not emit color codes in output.
0a840f58 329 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 330 HTTP header
0a840f58 331 geo_bypass_country:
773f291d
S
332 Two-letter ISO 3166-2 country code that will be used for
333 explicit geographic restriction bypassing via faking
504f20dd 334 X-Forwarded-For HTTP header
5f95927a
S
335 geo_bypass_ip_block:
336 IP range in CIDR notation that will be used similarly to
504f20dd 337 geo_bypass_country
71b640cc 338
85729c51
PH
339 The following options determine which downloader is picked:
340 external_downloader: Executable of the external downloader to call.
341 None or unset for standard (built-in) downloader.
bf09af3a
S
342 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
343 if True, otherwise use ffmpeg/avconv if False, otherwise
344 use downloader suggested by extractor if None.
fe7e0c98 345
8222d8de 346 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 347 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 348 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 349 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
350 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
351 http_chunk_size.
76b1bd67
JMF
352
353 The following options are used by the post processors:
d4a24f40 354 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 355 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
356 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
357 to the binary or its containing directory.
43820c03 358 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
359 and a list of additional command-line arguments for the
360 postprocessor/executable. The dict can also have "PP+EXE" keys
361 which are used when the given exe is used by the given PP.
362 Use 'default' as the name for arguments to passed to all PP
3600fd59
S
363 The following options are used by the Youtube extractor:
364 youtube_include_dash_manifest: If True (default), DASH manifests and related
365 data will be downloaded and processed by extractor.
366 You can reduce network I/O by disabling it if you don't
367 care about DASH.
8222d8de
JMF
368 """
369
c9969434
S
370 _NUMERIC_FIELDS = set((
371 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
372 'timestamp', 'upload_year', 'upload_month', 'upload_day',
373 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
374 'average_rating', 'comment_count', 'age_limit',
375 'start_time', 'end_time',
376 'chapter_number', 'season_number', 'episode_number',
377 'track_number', 'disc_number', 'release_year',
378 'playlist_index',
379 ))
380
8222d8de
JMF
381 params = None
382 _ies = []
5bfa4862 383 _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 384 __prepare_filename_warned = False
8222d8de
JMF
385 _download_retcode = None
386 _num_downloads = None
30a074c2 387 _playlist_level = 0
388 _playlist_urls = set()
8222d8de
JMF
389 _screen_file = None
390
3511266b 391 def __init__(self, params=None, auto_init=True):
8222d8de 392 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
393 if params is None:
394 params = {}
8222d8de 395 self._ies = []
56c73665 396 self._ies_instances = {}
5bfa4862 397 self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 398 self.__prepare_filename_warned = False
ab8e5e51 399 self._post_hooks = []
933605d7 400 self._progress_hooks = []
8222d8de
JMF
401 self._download_retcode = 0
402 self._num_downloads = 0
403 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 404 self._err_file = sys.stderr
4abf617b
S
405 self.params = {
406 # Default parameters
407 'nocheckcertificate': False,
408 }
409 self.params.update(params)
a0e07d31 410 self.cache = Cache(self)
a45e8619 411 self.archive = set()
ecdec191
JB
412
413 """Preload the archive, if any is specified"""
414 def preload_download_archive(self):
415 fn = self.params.get('download_archive')
416 if fn is None:
417 return False
418 try:
419 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
420 for line in archive_file:
a45e8619 421 self.archive.add(line.strip())
ecdec191
JB
422 except IOError as ioe:
423 if ioe.errno != errno.ENOENT:
424 raise
1d74d8d9 425 return False
ecdec191 426 return True
34308b30 427
be5df5ee
S
428 def check_deprecated(param, option, suggestion):
429 if self.params.get(param) is not None:
430 self.report_warning(
431 '%s is deprecated. Use %s instead.' % (option, suggestion))
432 return True
433 return False
434
1de7ea76
JB
435 if self.params.get('verbose'):
436 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
437
ecdec191
JB
438 preload_download_archive(self)
439
be5df5ee 440 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
441 if self.params.get('geo_verification_proxy') is None:
442 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
443
6b591b29 444 if self.params.get('final_ext'):
445 if self.params.get('merge_output_format'):
446 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
447 self.params['merge_output_format'] = self.params['final_ext']
448
be5df5ee
S
449 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
450 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
451 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
452
0783b09b 453 if params.get('bidi_workaround', False):
1c088fa8
PH
454 try:
455 import pty
456 master, slave = pty.openpty()
003c69a8 457 width = compat_get_terminal_size().columns
1c088fa8
PH
458 if width is None:
459 width_args = []
460 else:
461 width_args = ['-w', str(width)]
5d681e96 462 sp_kwargs = dict(
1c088fa8
PH
463 stdin=subprocess.PIPE,
464 stdout=slave,
465 stderr=self._err_file)
5d681e96
PH
466 try:
467 self._output_process = subprocess.Popen(
468 ['bidiv'] + width_args, **sp_kwargs
469 )
470 except OSError:
5d681e96
PH
471 self._output_process = subprocess.Popen(
472 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
473 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 474 except OSError as ose:
66e7ace1 475 if ose.errno == errno.ENOENT:
6febd1c1 476 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
477 else:
478 raise
0783b09b 479
3089bc74
S
480 if (sys.platform != 'win32'
481 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
482 and not params.get('restrictfilenames', False)):
e9137224 483 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 484 self.report_warning(
6febd1c1 485 'Assuming --restrict-filenames since file system encoding '
1b725173 486 'cannot encode all characters. '
6febd1c1 487 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 488 self.params['restrictfilenames'] = True
34308b30 489
486dd09e
PH
490 if isinstance(params.get('outtmpl'), bytes):
491 self.report_warning(
492 'Parameter outtmpl is bytes, but should be a unicode string. '
493 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
494
dca08720
PH
495 self._setup_opener()
496
3511266b
PH
497 if auto_init:
498 self.print_debug_header()
499 self.add_default_info_extractors()
500
4f026faf
PH
501 for pp_def_raw in self.params.get('postprocessors', []):
502 pp_class = get_postprocessor(pp_def_raw['key'])
503 pp_def = dict(pp_def_raw)
504 del pp_def['key']
5bfa4862 505 if 'when' in pp_def:
506 when = pp_def['when']
507 del pp_def['when']
508 else:
509 when = 'normal'
4f026faf 510 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 511 self.add_post_processor(pp, when=when)
4f026faf 512
ab8e5e51
AM
513 for ph in self.params.get('post_hooks', []):
514 self.add_post_hook(ph)
515
71b640cc
PH
516 for ph in self.params.get('progress_hooks', []):
517 self.add_progress_hook(ph)
518
51fb4995
YCH
519 register_socks_protocols()
520
7d4111ed
PH
521 def warn_if_short_id(self, argv):
522 # short YouTube ID starting with dash?
523 idxs = [
524 i for i, a in enumerate(argv)
525 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
526 if idxs:
527 correct_argv = (
cefecac1 528 ['youtube-dlc']
3089bc74
S
529 + [a for i, a in enumerate(argv) if i not in idxs]
530 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
531 )
532 self.report_warning(
533 'Long argument string detected. '
534 'Use -- to separate parameters and URLs, like this:\n%s\n' %
535 args_to_str(correct_argv))
536
8222d8de
JMF
537 def add_info_extractor(self, ie):
538 """Add an InfoExtractor object to the end of the list."""
539 self._ies.append(ie)
e52d7f85
JMF
540 if not isinstance(ie, type):
541 self._ies_instances[ie.ie_key()] = ie
542 ie.set_downloader(self)
8222d8de 543
56c73665
JMF
544 def get_info_extractor(self, ie_key):
545 """
546 Get an instance of an IE with name ie_key, it will try to get one from
547 the _ies list, if there's no instance it will create a new one and add
548 it to the extractor list.
549 """
550 ie = self._ies_instances.get(ie_key)
551 if ie is None:
552 ie = get_info_extractor(ie_key)()
553 self.add_info_extractor(ie)
554 return ie
555
023fa8c4
JMF
556 def add_default_info_extractors(self):
557 """
558 Add the InfoExtractors returned by gen_extractors to the end of the list
559 """
e52d7f85 560 for ie in gen_extractor_classes():
023fa8c4
JMF
561 self.add_info_extractor(ie)
562
5bfa4862 563 def add_post_processor(self, pp, when='normal'):
8222d8de 564 """Add a PostProcessor object to the end of the chain."""
5bfa4862 565 self._pps[when].append(pp)
8222d8de
JMF
566 pp.set_downloader(self)
567
ab8e5e51
AM
568 def add_post_hook(self, ph):
569 """Add the post hook"""
570 self._post_hooks.append(ph)
571
933605d7
JMF
572 def add_progress_hook(self, ph):
573 """Add the progress hook (currently only for the file downloader)"""
574 self._progress_hooks.append(ph)
8ab470f1 575
1c088fa8 576 def _bidi_workaround(self, message):
5d681e96 577 if not hasattr(self, '_output_channel'):
1c088fa8
PH
578 return message
579
5d681e96 580 assert hasattr(self, '_output_process')
11b85ce6 581 assert isinstance(message, compat_str)
6febd1c1
PH
582 line_count = message.count('\n') + 1
583 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 584 self._output_process.stdin.flush()
6febd1c1 585 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 586 for _ in range(line_count))
6febd1c1 587 return res[:-len('\n')]
1c088fa8 588
8222d8de 589 def to_screen(self, message, skip_eol=False):
0783b09b
PH
590 """Print message to stdout if not in quiet mode."""
591 return self.to_stdout(message, skip_eol, check_quiet=True)
592
734f90bb 593 def _write_string(self, s, out=None):
b58ddb32 594 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 595
0783b09b 596 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 597 """Print message to stdout if not in quiet mode."""
8bf9319e 598 if self.params.get('logger'):
43afe285 599 self.params['logger'].debug(message)
0783b09b 600 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 601 message = self._bidi_workaround(message)
6febd1c1 602 terminator = ['\n', ''][skip_eol]
8222d8de 603 output = message + terminator
1c088fa8 604
734f90bb 605 self._write_string(output, self._screen_file)
8222d8de
JMF
606
607 def to_stderr(self, message):
608 """Print message to stderr."""
11b85ce6 609 assert isinstance(message, compat_str)
8bf9319e 610 if self.params.get('logger'):
43afe285
IB
611 self.params['logger'].error(message)
612 else:
1c088fa8 613 message = self._bidi_workaround(message)
6febd1c1 614 output = message + '\n'
734f90bb 615 self._write_string(output, self._err_file)
8222d8de 616
1e5b9a95
PH
617 def to_console_title(self, message):
618 if not self.params.get('consoletitle', False):
619 return
4bede0d8
C
620 if compat_os_name == 'nt':
621 if ctypes.windll.kernel32.GetConsoleWindow():
622 # c_wchar_p() might not be necessary if `message` is
623 # already of type unicode()
624 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 625 elif 'TERM' in os.environ:
b46696bd 626 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 627
bdde425c
PH
628 def save_console_title(self):
629 if not self.params.get('consoletitle', False):
630 return
94c3442e
S
631 if self.params.get('simulate', False):
632 return
4bede0d8 633 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 634 # Save the title on stack
734f90bb 635 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
636
637 def restore_console_title(self):
638 if not self.params.get('consoletitle', False):
639 return
94c3442e
S
640 if self.params.get('simulate', False):
641 return
4bede0d8 642 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 643 # Restore the title from stack
734f90bb 644 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
645
646 def __enter__(self):
647 self.save_console_title()
648 return self
649
650 def __exit__(self, *args):
651 self.restore_console_title()
f89197d7 652
dca08720 653 if self.params.get('cookiefile') is not None:
1bab3437 654 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 655
8222d8de
JMF
656 def trouble(self, message=None, tb=None):
657 """Determine action to take when a download problem appears.
658
659 Depending on if the downloader has been configured to ignore
660 download errors or not, this method may throw an exception or
661 not when errors are found, after printing the message.
662
663 tb, if given, is additional traceback information.
664 """
665 if message is not None:
666 self.to_stderr(message)
667 if self.params.get('verbose'):
668 if tb is None:
669 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 670 tb = ''
8222d8de 671 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 672 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 673 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
674 else:
675 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 676 tb = ''.join(tb_data)
8222d8de
JMF
677 self.to_stderr(tb)
678 if not self.params.get('ignoreerrors', False):
679 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
680 exc_info = sys.exc_info()[1].exc_info
681 else:
682 exc_info = sys.exc_info()
683 raise DownloadError(message, exc_info)
684 self._download_retcode = 1
685
686 def report_warning(self, message):
687 '''
688 Print the message to stderr, it will be prefixed with 'WARNING:'
689 If stderr is a tty file the 'WARNING:' will be colored
690 '''
6d07ce01
JMF
691 if self.params.get('logger') is not None:
692 self.params['logger'].warning(message)
8222d8de 693 else:
ad8915b7
PH
694 if self.params.get('no_warnings'):
695 return
e9c0cdd3 696 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
697 _msg_header = '\033[0;33mWARNING:\033[0m'
698 else:
699 _msg_header = 'WARNING:'
700 warning_message = '%s %s' % (_msg_header, message)
701 self.to_stderr(warning_message)
8222d8de
JMF
702
703 def report_error(self, message, tb=None):
704 '''
705 Do the same as trouble, but prefixes the message with 'ERROR:', colored
706 in red if stderr is a tty file.
707 '''
e9c0cdd3 708 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 709 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 710 else:
6febd1c1
PH
711 _msg_header = 'ERROR:'
712 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
713 self.trouble(error_message, tb)
714
8222d8de
JMF
715 def report_file_already_downloaded(self, file_name):
716 """Report file has already been fully downloaded."""
717 try:
6febd1c1 718 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 719 except UnicodeEncodeError:
6febd1c1 720 self.to_screen('[download] The file has already been downloaded')
8222d8de 721
0c3d0f51 722 def report_file_delete(self, file_name):
723 """Report that existing file will be deleted."""
724 try:
725 self.to_screen('Deleting already existent file %s' % file_name)
726 except UnicodeEncodeError:
727 self.to_screen('Deleting already existent file')
728
0202b52a 729 def prepare_filename(self, info_dict, warn=False):
8222d8de
JMF
730 """Generate the output filename."""
731 try:
732 template_dict = dict(info_dict)
733
734 template_dict['epoch'] = int(time.time())
735 autonumber_size = self.params.get('autonumber_size')
736 if autonumber_size is None:
737 autonumber_size = 5
89db639d 738 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
739 if template_dict.get('resolution') is None:
740 if template_dict.get('width') and template_dict.get('height'):
741 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
742 elif template_dict.get('height'):
805ef3c6 743 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 744 elif template_dict.get('width'):
51ce9117 745 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 746
586a91b6 747 sanitize = lambda k, v: sanitize_filename(
45598aab 748 compat_str(v),
1bb5c511 749 restricted=self.params.get('restrictfilenames'),
40df485f 750 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 751 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 752 for k, v in template_dict.items()
f0e14fdd 753 if v is not None and not isinstance(v, (list, tuple, dict)))
a820dc72 754 template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
8222d8de 755
b3613d36 756 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
d0d9ade4 757
89db639d
S
758 # For fields playlist_index and autonumber convert all occurrences
759 # of %(field)s to %(field)0Nd for backward compatibility
760 field_size_compat_map = {
761 'playlist_index': len(str(template_dict['n_entries'])),
762 'autonumber': autonumber_size,
763 }
764 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
765 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
766 if mobj:
767 outtmpl = re.sub(
768 FIELD_SIZE_COMPAT_RE,
769 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
770 outtmpl)
771
d0d9ade4
S
772 # Missing numeric fields used together with integer presentation types
773 # in format specification will break the argument substitution since
a820dc72
RA
774 # string NA placeholder is returned for missing fields. We will patch
775 # output template for missing fields to meet string presentation type.
c9969434 776 for numeric_field in self._NUMERIC_FIELDS:
d0d9ade4
S
777 if numeric_field not in template_dict:
778 # As of [1] format syntax is:
779 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
780 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
781 FORMAT_RE = r'''(?x)
782 (?<!%)
783 %
784 \({0}\) # mapping key
785 (?:[#0\-+ ]+)? # conversion flags (optional)
786 (?:\d+)? # minimum field width (optional)
787 (?:\.\d+)? # precision (optional)
788 [hlL]? # length modifier (optional)
789 [diouxXeEfFgGcrs%] # conversion type
790 '''
791 outtmpl = re.sub(
792 FORMAT_RE.format(numeric_field),
793 r'%({0})s'.format(numeric_field), outtmpl)
794
15da37c7
S
795 # expand_path translates '%%' into '%' and '$$' into '$'
796 # correspondingly that is not what we want since we need to keep
797 # '%%' intact for template dict substitution step. Working around
798 # with boundary-alike separator hack.
961ea474 799 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
800 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
801
802 # outtmpl should be expand_path'ed before template dict substitution
803 # because meta fields may contain env variables we don't want to
804 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
805 # title "Hello $PATH", we don't want `$PATH` to be expanded.
806 filename = expand_path(outtmpl).replace(sep, '') % template_dict
807
bdc3fd2f
U
808 # https://github.com/blackjack4494/youtube-dlc/issues/85
809 trim_file_name = self.params.get('trim_file_name', False)
810 if trim_file_name:
811 fn_groups = filename.rsplit('.')
812 ext = fn_groups[-1]
813 sub_ext = ''
814 if len(fn_groups) > 2:
815 sub_ext = fn_groups[-2]
816 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
817
3a0d2f52
S
818 # Temporary fix for #4787
819 # 'Treat' all problem characters by passing filename through preferredencoding
820 # to workaround encoding issues with subprocess on python2 @ Windows
821 if sys.version_info < (3, 0) and sys.platform == 'win32':
822 filename = encodeFilename(filename, True).decode(preferredencoding())
0202b52a 823 filename = sanitize_path(filename)
824
825 if warn and not self.__prepare_filename_warned:
826 if not self.params.get('paths'):
827 pass
828 elif filename == '-':
829 self.report_warning('--paths is ignored when an outputting to stdout')
830 elif os.path.isabs(filename):
831 self.report_warning('--paths is ignored since an absolute path is given in output template')
832 self.__prepare_filename_warned = True
833
834 return filename
8222d8de 835 except ValueError as err:
6febd1c1 836 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
837 return None
838
0202b52a 839 def prepare_filepath(self, filename, dir_type=''):
840 if filename == '-':
841 return filename
842 paths = self.params.get('paths', {})
843 assert isinstance(paths, dict)
844 homepath = expand_path(paths.get('home', '').strip())
845 assert isinstance(homepath, compat_str)
846 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
847 assert isinstance(subdir, compat_str)
848 return sanitize_path(os.path.join(homepath, subdir, filename))
849
442c37b7 850 def _match_entry(self, info_dict, incomplete):
ecdec191 851 """ Returns None if the file should be downloaded """
8222d8de 852
8b0d7497 853 def check_filter():
854 video_title = info_dict.get('title', info_dict.get('id', 'video'))
855 if 'title' in info_dict:
856 # This can happen when we're just evaluating the playlist
857 title = info_dict['title']
858 matchtitle = self.params.get('matchtitle', False)
859 if matchtitle:
860 if not re.search(matchtitle, title, re.IGNORECASE):
861 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
862 rejecttitle = self.params.get('rejecttitle', False)
863 if rejecttitle:
864 if re.search(rejecttitle, title, re.IGNORECASE):
865 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
866 date = info_dict.get('upload_date')
867 if date is not None:
868 dateRange = self.params.get('daterange', DateRange())
869 if date not in dateRange:
870 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
871 view_count = info_dict.get('view_count')
872 if view_count is not None:
873 min_views = self.params.get('min_views')
874 if min_views is not None and view_count < min_views:
875 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
876 max_views = self.params.get('max_views')
877 if max_views is not None and view_count > max_views:
878 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
879 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
880 return 'Skipping "%s" because it is age restricted' % video_title
881 if self.in_download_archive(info_dict):
882 return '%s has already been recorded in archive' % video_title
883
884 if not incomplete:
885 match_filter = self.params.get('match_filter')
886 if match_filter is not None:
887 ret = match_filter(info_dict)
888 if ret is not None:
889 return ret
890 return None
891
892 reason = check_filter()
893 if reason is not None:
894 self.to_screen('[download] ' + reason)
d83cb531 895 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 896 raise ExistingVideoReached()
d83cb531 897 elif self.params.get('break_on_reject', False):
8b0d7497 898 raise RejectedVideoReached()
899 return reason
fe7e0c98 900
b6c45014
JMF
901 @staticmethod
902 def add_extra_info(info_dict, extra_info):
903 '''Set the keys from extra_info in info dict if they are missing'''
904 for key, value in extra_info.items():
905 info_dict.setdefault(key, value)
906
0704d222 907 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 908 process=True, force_generic_extractor=False):
8222d8de
JMF
909 '''
910 Returns a list with a dictionary for each video we find.
911 If 'download', also downloads the videos.
912 extra_info is a dict containing the extra values to add to each result
613b2d9d 913 '''
fe7e0c98 914
61aa5ba3 915 if not ie_key and force_generic_extractor:
d22dec74
S
916 ie_key = 'Generic'
917
8222d8de 918 if ie_key:
56c73665 919 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
920 else:
921 ies = self._ies
922
923 for ie in ies:
924 if not ie.suitable(url):
925 continue
926
9a68de12 927 ie_key = ie.ie_key()
928 ie = self.get_info_extractor(ie_key)
8222d8de 929 if not ie.working():
6febd1c1
PH
930 self.report_warning('The program functionality for this site has been marked as broken, '
931 'and will probably not work.')
8222d8de
JMF
932
933 try:
d0757229 934 temp_id = str_or_none(
63be1aab 935 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
936 else ie._match_id(url))
a0566bbf 937 except (AssertionError, IndexError, AttributeError):
938 temp_id = None
939 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
940 self.to_screen("[%s] %s: has already been recorded in archive" % (
941 ie_key, temp_id))
942 break
a0566bbf 943 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
a0566bbf 944 else:
945 self.report_error('no suitable InfoExtractor for URL %s' % url)
946
947 def __handle_extraction_exceptions(func):
948 def wrapper(self, *args, **kwargs):
949 try:
950 return func(self, *args, **kwargs)
773f291d
S
951 except GeoRestrictedError as e:
952 msg = e.msg
953 if e.countries:
954 msg += '\nThis video is available in %s.' % ', '.join(
955 map(ISO3166Utils.short2full, e.countries))
956 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
957 self.report_error(msg)
fb043a6e 958 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 959 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 960 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 961 raise
8222d8de
JMF
962 except Exception as e:
963 if self.params.get('ignoreerrors', False):
9b9c5355 964 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
965 else:
966 raise
a0566bbf 967 return wrapper
968
969 @__handle_extraction_exceptions
970 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
971 ie_result = ie.extract(url)
972 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
973 return
974 if isinstance(ie_result, list):
975 # Backwards compatibility: old IE result format
976 ie_result = {
977 '_type': 'compat_list',
978 'entries': ie_result,
979 }
980 if info_dict:
981 if info_dict.get('id'):
982 ie_result['id'] = info_dict['id']
983 if info_dict.get('title'):
984 ie_result['title'] = info_dict['title']
985 self.add_default_extra_info(ie_result, ie, url)
986 if process:
987 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 988 else:
a0566bbf 989 return ie_result
fe7e0c98 990
ea38e55f
PH
991 def add_default_extra_info(self, ie_result, ie, url):
992 self.add_extra_info(ie_result, {
993 'extractor': ie.IE_NAME,
994 'webpage_url': url,
dbbbe555 995 'duration_string': (
996 formatSeconds(ie_result['duration'], '-')
997 if ie_result.get('duration', None) is not None
998 else None),
ea38e55f
PH
999 'webpage_url_basename': url_basename(url),
1000 'extractor_key': ie.ie_key(),
1001 })
1002
8222d8de
JMF
1003 def process_ie_result(self, ie_result, download=True, extra_info={}):
1004 """
1005 Take the result of the ie(may be modified) and resolve all unresolved
1006 references (URLs, playlist items).
1007
1008 It will also download the videos if 'download'.
1009 Returns the resolved ie_result.
1010 """
e8ee972c
PH
1011 result_type = ie_result.get('_type', 'video')
1012
057a5206 1013 if result_type in ('url', 'url_transparent'):
134c6ea8 1014 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1015 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1016 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1017 or extract_flat is True):
d06daf23 1018 self.__forced_printings(
0202b52a 1019 ie_result,
1020 self.prepare_filepath(self.prepare_filename(ie_result)),
d06daf23 1021 incomplete=True)
e8ee972c
PH
1022 return ie_result
1023
8222d8de 1024 if result_type == 'video':
b6c45014 1025 self.add_extra_info(ie_result, extra_info)
feee2ecf 1026 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1027 elif result_type == 'url':
1028 # We have to add extra_info to the results because it may be
1029 # contained in a playlist
1030 return self.extract_info(ie_result['url'],
0704d222 1031 download, info_dict=ie_result,
8222d8de
JMF
1032 ie_key=ie_result.get('ie_key'),
1033 extra_info=extra_info)
7fc3fa05
PH
1034 elif result_type == 'url_transparent':
1035 # Use the information from the embedding page
1036 info = self.extract_info(
1037 ie_result['url'], ie_key=ie_result.get('ie_key'),
1038 extra_info=extra_info, download=False, process=False)
1039
1640eb09
S
1040 # extract_info may return None when ignoreerrors is enabled and
1041 # extraction failed with an error, don't crash and return early
1042 # in this case
1043 if not info:
1044 return info
1045
412c617d
PH
1046 force_properties = dict(
1047 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1048 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1049 if f in force_properties:
1050 del force_properties[f]
1051 new_result = info.copy()
1052 new_result.update(force_properties)
7fc3fa05 1053
0563f7ac
S
1054 # Extracted info may not be a video result (i.e.
1055 # info.get('_type', 'video') != video) but rather an url or
1056 # url_transparent. In such cases outer metadata (from ie_result)
1057 # should be propagated to inner one (info). For this to happen
1058 # _type of info should be overridden with url_transparent. This
067aa17e 1059 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1060 if new_result.get('_type') == 'url':
1061 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1062
1063 return self.process_ie_result(
1064 new_result, download=download, extra_info=extra_info)
40fcba5e 1065 elif result_type in ('playlist', 'multi_video'):
30a074c2 1066 # Protect from infinite recursion due to recursively nested playlists
1067 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1068 webpage_url = ie_result['webpage_url']
1069 if webpage_url in self._playlist_urls:
7e85e872 1070 self.to_screen(
30a074c2 1071 '[download] Skipping already downloaded playlist: %s'
1072 % ie_result.get('title') or ie_result.get('id'))
1073 return
7e85e872 1074
30a074c2 1075 self._playlist_level += 1
1076 self._playlist_urls.add(webpage_url)
1077 try:
1078 return self.__process_playlist(ie_result, download)
1079 finally:
1080 self._playlist_level -= 1
1081 if not self._playlist_level:
1082 self._playlist_urls.clear()
8222d8de 1083 elif result_type == 'compat_list':
c9bf4114
PH
1084 self.report_warning(
1085 'Extractor %s returned a compat_list result. '
1086 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1087
8222d8de 1088 def _fixup(r):
9e1a5b84
JW
1089 self.add_extra_info(
1090 r,
9103bbc5
JMF
1091 {
1092 'extractor': ie_result['extractor'],
1093 'webpage_url': ie_result['webpage_url'],
29eb5174 1094 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1095 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1096 }
1097 )
8222d8de
JMF
1098 return r
1099 ie_result['entries'] = [
b6c45014 1100 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1101 for r in ie_result['entries']
1102 ]
1103 return ie_result
1104 else:
1105 raise Exception('Invalid result type: %s' % result_type)
1106
30a074c2 1107 def __process_playlist(self, ie_result, download):
1108 # We process each entry in the playlist
1109 playlist = ie_result.get('title') or ie_result.get('id')
1110 self.to_screen('[download] Downloading playlist: %s' % playlist)
1111
02fd60d3 1112 def ensure_dir_exists(path):
1113 return make_dir(path, self.report_error)
1114
06167fbb 1115 if self.params.get('writeinfojson', False):
1116 infofn = replace_extension(
1117 self.prepare_filepath(self.prepare_filename(ie_result), 'infojson'),
1118 'info.json', ie_result.get('ext'))
02fd60d3 1119 if not ensure_dir_exists(encodeFilename(infofn)):
1120 return
06167fbb 1121 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 1122 self.to_screen('[info] Playlist metadata is already present')
06167fbb 1123 else:
66c935fb 1124 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
06167fbb 1125 playlist_info = dict(ie_result)
1126 playlist_info.pop('entries')
1127 try:
1128 write_json_file(self.filter_requested_info(playlist_info), infofn)
1129 except (OSError, IOError):
66c935fb 1130 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
06167fbb 1131
02fd60d3 1132 if self.params.get('writedescription', False):
1133 descfn = replace_extension(
1134 self.prepare_filepath(self.prepare_filename(ie_result), 'description'),
1135 'description', ie_result.get('ext'))
1136 if not ensure_dir_exists(encodeFilename(descfn)):
1137 return
1138 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1139 self.to_screen('[info] Playlist description is already present')
1140 elif ie_result.get('description') is None:
1141 self.report_warning('There\'s no playlist description to write.')
1142 else:
1143 try:
1144 self.to_screen('[info] Writing playlist description to: ' + descfn)
1145 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1146 descfile.write(ie_result['description'])
1147 except (OSError, IOError):
1148 self.report_error('Cannot write playlist description file ' + descfn)
1149 return
1150
30a074c2 1151 playlist_results = []
1152
1153 playliststart = self.params.get('playliststart', 1) - 1
1154 playlistend = self.params.get('playlistend')
1155 # For backwards compatibility, interpret -1 as whole list
1156 if playlistend == -1:
1157 playlistend = None
1158
1159 playlistitems_str = self.params.get('playlist_items')
1160 playlistitems = None
1161 if playlistitems_str is not None:
1162 def iter_playlistitems(format):
1163 for string_segment in format.split(','):
1164 if '-' in string_segment:
1165 start, end = string_segment.split('-')
1166 for item in range(int(start), int(end) + 1):
1167 yield int(item)
1168 else:
1169 yield int(string_segment)
1170 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1171
1172 ie_entries = ie_result['entries']
1173
1174 def make_playlistitems_entries(list_ie_entries):
1175 num_entries = len(list_ie_entries)
1176 return [
1177 list_ie_entries[i - 1] for i in playlistitems
1178 if -num_entries <= i - 1 < num_entries]
1179
1180 def report_download(num_entries):
1181 self.to_screen(
1182 '[%s] playlist %s: Downloading %d videos' %
1183 (ie_result['extractor'], playlist, num_entries))
1184
1185 if isinstance(ie_entries, list):
1186 n_all_entries = len(ie_entries)
1187 if playlistitems:
1188 entries = make_playlistitems_entries(ie_entries)
1189 else:
1190 entries = ie_entries[playliststart:playlistend]
1191 n_entries = len(entries)
1192 self.to_screen(
1193 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1194 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1195 elif isinstance(ie_entries, PagedList):
1196 if playlistitems:
1197 entries = []
1198 for item in playlistitems:
1199 entries.extend(ie_entries.getslice(
1200 item - 1, item
1201 ))
1202 else:
1203 entries = ie_entries.getslice(
1204 playliststart, playlistend)
1205 n_entries = len(entries)
1206 report_download(n_entries)
1207 else: # iterable
1208 if playlistitems:
1209 entries = make_playlistitems_entries(list(itertools.islice(
1210 ie_entries, 0, max(playlistitems))))
1211 else:
1212 entries = list(itertools.islice(
1213 ie_entries, playliststart, playlistend))
1214 n_entries = len(entries)
1215 report_download(n_entries)
1216
1217 if self.params.get('playlistreverse', False):
1218 entries = entries[::-1]
1219
1220 if self.params.get('playlistrandom', False):
1221 random.shuffle(entries)
1222
1223 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1224
1225 for i, entry in enumerate(entries, 1):
1226 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1227 # This __x_forwarded_for_ip thing is a bit ugly but requires
1228 # minimal changes
1229 if x_forwarded_for:
1230 entry['__x_forwarded_for_ip'] = x_forwarded_for
1231 extra = {
1232 'n_entries': n_entries,
1233 'playlist': playlist,
1234 'playlist_id': ie_result.get('id'),
1235 'playlist_title': ie_result.get('title'),
1236 'playlist_uploader': ie_result.get('uploader'),
1237 'playlist_uploader_id': ie_result.get('uploader_id'),
1238 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1239 'extractor': ie_result['extractor'],
1240 'webpage_url': ie_result['webpage_url'],
1241 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1242 'extractor_key': ie_result['extractor_key'],
1243 }
1244
1245 if self._match_entry(entry, incomplete=True) is not None:
1246 continue
1247
1248 entry_result = self.__process_iterable_entry(entry, download, extra)
1249 # TODO: skip failed (empty) entries?
1250 playlist_results.append(entry_result)
1251 ie_result['entries'] = playlist_results
1252 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1253 return ie_result
1254
a0566bbf 1255 @__handle_extraction_exceptions
1256 def __process_iterable_entry(self, entry, download, extra_info):
1257 return self.process_ie_result(
1258 entry, download=download, extra_info=extra_info)
1259
67134eab
JMF
1260 def _build_format_filter(self, filter_spec):
1261 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1262
1263 OPERATORS = {
1264 '<': operator.lt,
1265 '<=': operator.le,
1266 '>': operator.gt,
1267 '>=': operator.ge,
1268 '=': operator.eq,
1269 '!=': operator.ne,
1270 }
67134eab 1271 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1272 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1273 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1274 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1275 $
083c9df9 1276 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1277 m = operator_rex.search(filter_spec)
9ddb6925
S
1278 if m:
1279 try:
1280 comparison_value = int(m.group('value'))
1281 except ValueError:
1282 comparison_value = parse_filesize(m.group('value'))
1283 if comparison_value is None:
1284 comparison_value = parse_filesize(m.group('value') + 'B')
1285 if comparison_value is None:
1286 raise ValueError(
1287 'Invalid value %r in format specification %r' % (
67134eab 1288 m.group('value'), filter_spec))
9ddb6925
S
1289 op = OPERATORS[m.group('op')]
1290
083c9df9 1291 if not m:
9ddb6925
S
1292 STR_OPERATORS = {
1293 '=': operator.eq,
10d33b34
YCH
1294 '^=': lambda attr, value: attr.startswith(value),
1295 '$=': lambda attr, value: attr.endswith(value),
1296 '*=': lambda attr, value: value in attr,
9ddb6925 1297 }
67134eab 1298 str_operator_rex = re.compile(r'''(?x)
f96bff99 1299 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1300 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1301 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1302 \s*$
9ddb6925 1303 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1304 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1305 if m:
1306 comparison_value = m.group('value')
2cc779f4
S
1307 str_op = STR_OPERATORS[m.group('op')]
1308 if m.group('negation'):
e118a879 1309 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1310 else:
1311 op = str_op
083c9df9 1312
9ddb6925 1313 if not m:
67134eab 1314 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1315
1316 def _filter(f):
1317 actual_value = f.get(m.group('key'))
1318 if actual_value is None:
1319 return m.group('none_inclusive')
1320 return op(actual_value, comparison_value)
67134eab
JMF
1321 return _filter
1322
0017d9ad 1323 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1324
af0f7428
S
1325 def can_merge():
1326 merger = FFmpegMergerPP(self)
1327 return merger.available and merger.can_merge()
1328
91ebc640 1329 prefer_best = (
1330 not self.params.get('simulate', False)
1331 and download
1332 and (
1333 not can_merge()
19807826 1334 or info_dict.get('is_live', False)
91ebc640 1335 or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
1336
1337 return (
1338 'best/bestvideo+bestaudio'
1339 if prefer_best
1340 else 'bestvideo*+bestaudio/best'
19807826 1341 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1342 else 'bestvideo+bestaudio/best')
0017d9ad 1343
67134eab
JMF
1344 def build_format_selector(self, format_spec):
1345 def syntax_error(note, start):
1346 message = (
1347 'Invalid format specification: '
1348 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1349 return SyntaxError(message)
1350
1351 PICKFIRST = 'PICKFIRST'
1352 MERGE = 'MERGE'
1353 SINGLE = 'SINGLE'
0130afb7 1354 GROUP = 'GROUP'
67134eab
JMF
1355 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1356
91ebc640 1357 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1358 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1359
67134eab
JMF
1360 def _parse_filter(tokens):
1361 filter_parts = []
1362 for type, string, start, _, _ in tokens:
1363 if type == tokenize.OP and string == ']':
1364 return ''.join(filter_parts)
1365 else:
1366 filter_parts.append(string)
1367
232541df 1368 def _remove_unused_ops(tokens):
17cc1534 1369 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1370 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1371 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1372 last_string, last_start, last_end, last_line = None, None, None, None
1373 for type, string, start, end, line in tokens:
1374 if type == tokenize.OP and string == '[':
1375 if last_string:
1376 yield tokenize.NAME, last_string, last_start, last_end, last_line
1377 last_string = None
1378 yield type, string, start, end, line
1379 # everything inside brackets will be handled by _parse_filter
1380 for type, string, start, end, line in tokens:
1381 yield type, string, start, end, line
1382 if type == tokenize.OP and string == ']':
1383 break
1384 elif type == tokenize.OP and string in ALLOWED_OPS:
1385 if last_string:
1386 yield tokenize.NAME, last_string, last_start, last_end, last_line
1387 last_string = None
1388 yield type, string, start, end, line
1389 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1390 if not last_string:
1391 last_string = string
1392 last_start = start
1393 last_end = end
1394 else:
1395 last_string += string
1396 if last_string:
1397 yield tokenize.NAME, last_string, last_start, last_end, last_line
1398
cf2ac6df 1399 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1400 selectors = []
1401 current_selector = None
1402 for type, string, start, _, _ in tokens:
1403 # ENCODING is only defined in python 3.x
1404 if type == getattr(tokenize, 'ENCODING', None):
1405 continue
1406 elif type in [tokenize.NAME, tokenize.NUMBER]:
1407 current_selector = FormatSelector(SINGLE, string, [])
1408 elif type == tokenize.OP:
cf2ac6df
JMF
1409 if string == ')':
1410 if not inside_group:
1411 # ')' will be handled by the parentheses group
1412 tokens.restore_last_token()
67134eab 1413 break
cf2ac6df 1414 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1415 tokens.restore_last_token()
1416 break
cf2ac6df
JMF
1417 elif inside_choice and string == ',':
1418 tokens.restore_last_token()
1419 break
1420 elif string == ',':
0a31a350
JMF
1421 if not current_selector:
1422 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1423 selectors.append(current_selector)
1424 current_selector = None
1425 elif string == '/':
d96d604e
JMF
1426 if not current_selector:
1427 raise syntax_error('"/" must follow a format selector', start)
67134eab 1428 first_choice = current_selector
cf2ac6df 1429 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1430 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1431 elif string == '[':
1432 if not current_selector:
1433 current_selector = FormatSelector(SINGLE, 'best', [])
1434 format_filter = _parse_filter(tokens)
1435 current_selector.filters.append(format_filter)
0130afb7
JMF
1436 elif string == '(':
1437 if current_selector:
1438 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1439 group = _parse_format_selection(tokens, inside_group=True)
1440 current_selector = FormatSelector(GROUP, group, [])
67134eab 1441 elif string == '+':
d03cfdce 1442 if not current_selector:
1443 raise syntax_error('Unexpected "+"', start)
1444 selector_1 = current_selector
1445 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1446 if not selector_2:
1447 raise syntax_error('Expected a selector', start)
1448 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1449 else:
1450 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1451 elif type == tokenize.ENDMARKER:
1452 break
1453 if current_selector:
1454 selectors.append(current_selector)
1455 return selectors
1456
1457 def _build_selector_function(selector):
909d24dd 1458 if isinstance(selector, list): # ,
67134eab
JMF
1459 fs = [_build_selector_function(s) for s in selector]
1460
317f7ab6 1461 def selector_function(ctx):
67134eab 1462 for f in fs:
317f7ab6 1463 for format in f(ctx):
67134eab
JMF
1464 yield format
1465 return selector_function
909d24dd 1466
1467 elif selector.type == GROUP: # ()
0130afb7 1468 selector_function = _build_selector_function(selector.selector)
909d24dd 1469
1470 elif selector.type == PICKFIRST: # /
67134eab
JMF
1471 fs = [_build_selector_function(s) for s in selector.selector]
1472
317f7ab6 1473 def selector_function(ctx):
67134eab 1474 for f in fs:
317f7ab6 1475 picked_formats = list(f(ctx))
67134eab
JMF
1476 if picked_formats:
1477 return picked_formats
1478 return []
67134eab 1479
909d24dd 1480 elif selector.type == SINGLE: # atom
1481 format_spec = selector.selector if selector.selector is not None else 'best'
1482
1483 if format_spec == 'all':
1484 def selector_function(ctx):
1485 formats = list(ctx['formats'])
1486 if formats:
1487 for f in formats:
1488 yield f
1489
1490 else:
1491 format_fallback = False
1492 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1493 if format_spec_obj is not None:
1494 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1495 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1496 not_format_type = 'v' if format_type == 'a' else 'a'
1497 format_modified = format_spec_obj.group(3) is not None
1498
1499 format_fallback = not format_type and not format_modified # for b, w
1500 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1501 if format_type and format_modified # bv*, ba*, wv*, wa*
1502 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1503 if format_type # bv, ba, wv, wa
1504 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1505 if not format_modified # b, w
1506 else None) # b*, w*
67134eab 1507 else:
909d24dd 1508 format_idx = -1
1509 filter_f = ((lambda f: f.get('ext') == format_spec)
1510 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1511 else (lambda f: f.get('format_id') == format_spec)) # id
1512
1513 def selector_function(ctx):
1514 formats = list(ctx['formats'])
1515 if not formats:
1516 return
1517 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1518 if matches:
909d24dd 1519 yield matches[format_idx]
1520 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1521 # for extractors with incomplete formats (audio only (soundcloud)
1522 # or video only (imgur)) best/worst will fallback to
1523 # best/worst {video,audio}-only format
1524 yield formats[format_idx]
1525
1526 elif selector.type == MERGE: # +
d03cfdce 1527 def _merge(formats_pair):
1528 format_1, format_2 = formats_pair
1529
1530 formats_info = []
1531 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1532 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1533
909d24dd 1534 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1535 get_no_more = {"video": False, "audio": False}
1536 for (i, fmt_info) in enumerate(formats_info):
1537 for aud_vid in ["audio", "video"]:
1538 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1539 if get_no_more[aud_vid]:
1540 formats_info.pop(i)
1541 get_no_more[aud_vid] = True
1542
1543 if len(formats_info) == 1:
1544 return formats_info[0]
1545
d03cfdce 1546 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1547 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1548
1549 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1550 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1551
1552 output_ext = self.params.get('merge_output_format')
1553 if not output_ext:
1554 if the_only_video:
1555 output_ext = the_only_video['ext']
1556 elif the_only_audio and not video_fmts:
1557 output_ext = the_only_audio['ext']
1558 else:
1559 output_ext = 'mkv'
1560
1561 new_dict = {
67134eab 1562 'requested_formats': formats_info,
d03cfdce 1563 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1564 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1565 'ext': output_ext,
1566 }
d03cfdce 1567
1568 if the_only_video:
1569 new_dict.update({
1570 'width': the_only_video.get('width'),
1571 'height': the_only_video.get('height'),
1572 'resolution': the_only_video.get('resolution'),
1573 'fps': the_only_video.get('fps'),
1574 'vcodec': the_only_video.get('vcodec'),
1575 'vbr': the_only_video.get('vbr'),
1576 'stretched_ratio': the_only_video.get('stretched_ratio'),
1577 })
1578
1579 if the_only_audio:
1580 new_dict.update({
1581 'acodec': the_only_audio.get('acodec'),
1582 'abr': the_only_audio.get('abr'),
1583 })
1584
1585 return new_dict
1586
1587 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1588
317f7ab6
S
1589 def selector_function(ctx):
1590 for pair in itertools.product(
d03cfdce 1591 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1592 yield _merge(pair)
083c9df9 1593
67134eab 1594 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1595
317f7ab6
S
1596 def final_selector(ctx):
1597 ctx_copy = copy.deepcopy(ctx)
67134eab 1598 for _filter in filters:
317f7ab6
S
1599 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1600 return selector_function(ctx_copy)
67134eab 1601 return final_selector
083c9df9 1602
67134eab 1603 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1604 try:
232541df 1605 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1606 except tokenize.TokenError:
1607 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1608
1609 class TokenIterator(object):
1610 def __init__(self, tokens):
1611 self.tokens = tokens
1612 self.counter = 0
1613
1614 def __iter__(self):
1615 return self
1616
1617 def __next__(self):
1618 if self.counter >= len(self.tokens):
1619 raise StopIteration()
1620 value = self.tokens[self.counter]
1621 self.counter += 1
1622 return value
1623
1624 next = __next__
1625
1626 def restore_last_token(self):
1627 self.counter -= 1
1628
1629 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1630 return _build_selector_function(parsed_selector)
a9c58ad9 1631
e5660ee6
JMF
1632 def _calc_headers(self, info_dict):
1633 res = std_headers.copy()
1634
1635 add_headers = info_dict.get('http_headers')
1636 if add_headers:
1637 res.update(add_headers)
1638
1639 cookies = self._calc_cookies(info_dict)
1640 if cookies:
1641 res['Cookie'] = cookies
1642
0016b84e
S
1643 if 'X-Forwarded-For' not in res:
1644 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1645 if x_forwarded_for_ip:
1646 res['X-Forwarded-For'] = x_forwarded_for_ip
1647
e5660ee6
JMF
1648 return res
1649
1650 def _calc_cookies(self, info_dict):
5c2266df 1651 pr = sanitized_Request(info_dict['url'])
e5660ee6 1652 self.cookiejar.add_cookie_header(pr)
662435f7 1653 return pr.get_header('Cookie')
e5660ee6 1654
dd82ffea
JMF
1655 def process_video_result(self, info_dict, download=True):
1656 assert info_dict.get('_type', 'video') == 'video'
1657
bec1fad2
PH
1658 if 'id' not in info_dict:
1659 raise ExtractorError('Missing "id" field in extractor result')
1660 if 'title' not in info_dict:
1661 raise ExtractorError('Missing "title" field in extractor result')
1662
c9969434
S
1663 def report_force_conversion(field, field_not, conversion):
1664 self.report_warning(
1665 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1666 % (field, field_not, conversion))
1667
1668 def sanitize_string_field(info, string_field):
1669 field = info.get(string_field)
1670 if field is None or isinstance(field, compat_str):
1671 return
1672 report_force_conversion(string_field, 'a string', 'string')
1673 info[string_field] = compat_str(field)
1674
1675 def sanitize_numeric_fields(info):
1676 for numeric_field in self._NUMERIC_FIELDS:
1677 field = info.get(numeric_field)
1678 if field is None or isinstance(field, compat_numeric_types):
1679 continue
1680 report_force_conversion(numeric_field, 'numeric', 'int')
1681 info[numeric_field] = int_or_none(field)
1682
1683 sanitize_string_field(info_dict, 'id')
1684 sanitize_numeric_fields(info_dict)
be6217b2 1685
dd82ffea
JMF
1686 if 'playlist' not in info_dict:
1687 # It isn't part of a playlist
1688 info_dict['playlist'] = None
1689 info_dict['playlist_index'] = None
1690
d5519808 1691 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1692 if thumbnails is None:
1693 thumbnail = info_dict.get('thumbnail')
1694 if thumbnail:
a7a14d95 1695 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1696 if thumbnails:
be6d7229 1697 thumbnails.sort(key=lambda t: (
d37708fc
RA
1698 t.get('preference') if t.get('preference') is not None else -1,
1699 t.get('width') if t.get('width') is not None else -1,
1700 t.get('height') if t.get('height') is not None else -1,
1701 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1702 for i, t in enumerate(thumbnails):
dcf77cf1 1703 t['url'] = sanitize_url(t['url'])
9603e8a7 1704 if t.get('width') and t.get('height'):
d5519808 1705 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1706 if t.get('id') is None:
1707 t['id'] = '%d' % i
d5519808 1708
b7b72db9 1709 if self.params.get('list_thumbnails'):
1710 self.list_thumbnails(info_dict)
1711 return
1712
536a55da
S
1713 thumbnail = info_dict.get('thumbnail')
1714 if thumbnail:
1715 info_dict['thumbnail'] = sanitize_url(thumbnail)
1716 elif thumbnails:
d5519808
PH
1717 info_dict['thumbnail'] = thumbnails[-1]['url']
1718
c9ae7b95 1719 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1720 info_dict['display_id'] = info_dict['id']
1721
955c4514 1722 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1723 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1724 # see http://bugs.python.org/issue1646728)
1725 try:
1726 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1727 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1728 except (ValueError, OverflowError, OSError):
1729 pass
9d2ecdbc 1730
33d2fc2f
S
1731 # Auto generate title fields corresponding to the *_number fields when missing
1732 # in order to always have clean titles. This is very common for TV series.
1733 for field in ('chapter', 'season', 'episode'):
1734 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1735 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1736
05108a49
S
1737 for cc_kind in ('subtitles', 'automatic_captions'):
1738 cc = info_dict.get(cc_kind)
1739 if cc:
1740 for _, subtitle in cc.items():
1741 for subtitle_format in subtitle:
1742 if subtitle_format.get('url'):
1743 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1744 if subtitle_format.get('ext') is None:
1745 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1746
1747 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1748 subtitles = info_dict.get('subtitles')
4bba3716 1749
a504ced0 1750 if self.params.get('listsubtitles', False):
360e1ca5 1751 if 'automatic_captions' in info_dict:
05108a49
S
1752 self.list_subtitles(
1753 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1754 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1755 return
05108a49 1756
360e1ca5 1757 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1758 info_dict['id'], subtitles, automatic_captions)
a504ced0 1759
dd82ffea
JMF
1760 # We now pick which formats have to be downloaded
1761 if info_dict.get('formats') is None:
1762 # There's only one format available
1763 formats = [info_dict]
1764 else:
1765 formats = info_dict['formats']
1766
db95dc13
PH
1767 if not formats:
1768 raise ExtractorError('No video formats found!')
1769
73af5cc8
S
1770 def is_wellformed(f):
1771 url = f.get('url')
a5ac0c47 1772 if not url:
73af5cc8
S
1773 self.report_warning(
1774 '"url" field is missing or empty - skipping format, '
1775 'there is an error in extractor')
a5ac0c47
S
1776 return False
1777 if isinstance(url, bytes):
1778 sanitize_string_field(f, 'url')
1779 return True
73af5cc8
S
1780
1781 # Filter out malformed formats for better extraction robustness
1782 formats = list(filter(is_wellformed, formats))
1783
181c7053
S
1784 formats_dict = {}
1785
dd82ffea 1786 # We check that all the formats have the format and format_id fields
db95dc13 1787 for i, format in enumerate(formats):
c9969434
S
1788 sanitize_string_field(format, 'format_id')
1789 sanitize_numeric_fields(format)
dcf77cf1 1790 format['url'] = sanitize_url(format['url'])
e74e3b63 1791 if not format.get('format_id'):
8016c922 1792 format['format_id'] = compat_str(i)
e2effb08
S
1793 else:
1794 # Sanitize format_id from characters used in format selector expression
ec85ded8 1795 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1796 format_id = format['format_id']
1797 if format_id not in formats_dict:
1798 formats_dict[format_id] = []
1799 formats_dict[format_id].append(format)
1800
1801 # Make sure all formats have unique format_id
1802 for format_id, ambiguous_formats in formats_dict.items():
1803 if len(ambiguous_formats) > 1:
1804 for i, format in enumerate(ambiguous_formats):
1805 format['format_id'] = '%s-%d' % (format_id, i)
1806
1807 for i, format in enumerate(formats):
8c51aa65 1808 if format.get('format') is None:
6febd1c1 1809 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1810 id=format['format_id'],
1811 res=self.format_resolution(format),
6febd1c1 1812 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1813 )
c1002e96 1814 # Automatically determine file extension if missing
5b1d8575 1815 if format.get('ext') is None:
cce929ea 1816 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1817 # Automatically determine protocol if missing (useful for format
1818 # selection purposes)
6f0be937 1819 if format.get('protocol') is None:
b5559424 1820 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1821 # Add HTTP headers, so that external programs can use them from the
1822 # json output
1823 full_format_info = info_dict.copy()
1824 full_format_info.update(format)
1825 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1826 # Remove private housekeeping stuff
1827 if '__x_forwarded_for_ip' in info_dict:
1828 del info_dict['__x_forwarded_for_ip']
dd82ffea 1829
4bcc7bd1 1830 # TODO Central sorting goes here
99e206d5 1831
f89197d7 1832 if formats[0] is not info_dict:
b3d9ef88
JMF
1833 # only set the 'formats' fields if the original info_dict list them
1834 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1835 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1836 # which can't be exported to json
b3d9ef88 1837 info_dict['formats'] = formats
cfb56d1a 1838 if self.params.get('listformats'):
bfaae0a7 1839 self.list_formats(info_dict)
1840 return
1841
de3ef3ed 1842 req_format = self.params.get('format')
a9c58ad9 1843 if req_format is None:
0017d9ad
S
1844 req_format = self._default_format_spec(info_dict, download=download)
1845 if self.params.get('verbose'):
29f7c58a 1846 self._write_string('[debug] Default format spec: %s\n' % req_format)
0017d9ad 1847
5acfa126 1848 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1849
1850 # While in format selection we may need to have an access to the original
1851 # format set in order to calculate some metrics or do some processing.
1852 # For now we need to be able to guess whether original formats provided
1853 # by extractor are incomplete or not (i.e. whether extractor provides only
1854 # video-only or audio-only formats) for proper formats selection for
1855 # extractors with such incomplete formats (see
067aa17e 1856 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1857 # Since formats may be filtered during format selection and may not match
1858 # the original formats the results may be incorrect. Thus original formats
1859 # or pre-calculated metrics should be passed to format selection routines
1860 # as well.
1861 # We will pass a context object containing all necessary additional data
1862 # instead of just formats.
1863 # This fixes incorrect format selection issue (see
067aa17e 1864 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1865 incomplete_formats = (
317f7ab6 1866 # All formats are video-only or
3089bc74 1867 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1868 # all formats are audio-only
3089bc74 1869 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1870
1871 ctx = {
1872 'formats': formats,
1873 'incomplete_formats': incomplete_formats,
1874 }
1875
1876 formats_to_download = list(format_selector(ctx))
dd82ffea 1877 if not formats_to_download:
6febd1c1 1878 raise ExtractorError('requested format not available',
78a3a9f8 1879 expected=True)
dd82ffea
JMF
1880
1881 if download:
909d24dd 1882 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1883 if len(formats_to_download) > 1:
6febd1c1 1884 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1885 for format in formats_to_download:
1886 new_info = dict(info_dict)
1887 new_info.update(format)
1888 self.process_info(new_info)
1889 # We update the info dict with the best quality format (backwards compatibility)
1890 info_dict.update(formats_to_download[-1])
1891 return info_dict
1892
98c70d6f 1893 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1894 """Select the requested subtitles and their format"""
98c70d6f
JMF
1895 available_subs = {}
1896 if normal_subtitles and self.params.get('writesubtitles'):
1897 available_subs.update(normal_subtitles)
1898 if automatic_captions and self.params.get('writeautomaticsub'):
1899 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1900 if lang not in available_subs:
1901 available_subs[lang] = cap_info
1902
4d171848
JMF
1903 if (not self.params.get('writesubtitles') and not
1904 self.params.get('writeautomaticsub') or not
1905 available_subs):
1906 return None
a504ced0
JMF
1907
1908 if self.params.get('allsubtitles', False):
1909 requested_langs = available_subs.keys()
1910 else:
1911 if self.params.get('subtitleslangs', False):
1912 requested_langs = self.params.get('subtitleslangs')
1913 elif 'en' in available_subs:
1914 requested_langs = ['en']
1915 else:
1916 requested_langs = [list(available_subs.keys())[0]]
1917
1918 formats_query = self.params.get('subtitlesformat', 'best')
1919 formats_preference = formats_query.split('/') if formats_query else []
1920 subs = {}
1921 for lang in requested_langs:
1922 formats = available_subs.get(lang)
1923 if formats is None:
1924 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1925 continue
a504ced0
JMF
1926 for ext in formats_preference:
1927 if ext == 'best':
1928 f = formats[-1]
1929 break
1930 matches = list(filter(lambda f: f['ext'] == ext, formats))
1931 if matches:
1932 f = matches[-1]
1933 break
1934 else:
1935 f = formats[-1]
1936 self.report_warning(
1937 'No subtitle format found matching "%s" for language %s, '
1938 'using %s' % (formats_query, lang, f['ext']))
1939 subs[lang] = f
1940 return subs
1941
d06daf23
S
1942 def __forced_printings(self, info_dict, filename, incomplete):
1943 def print_mandatory(field):
1944 if (self.params.get('force%s' % field, False)
1945 and (not incomplete or info_dict.get(field) is not None)):
1946 self.to_stdout(info_dict[field])
1947
1948 def print_optional(field):
1949 if (self.params.get('force%s' % field, False)
1950 and info_dict.get(field) is not None):
1951 self.to_stdout(info_dict[field])
1952
1953 print_mandatory('title')
1954 print_mandatory('id')
1955 if self.params.get('forceurl', False) and not incomplete:
1956 if info_dict.get('requested_formats') is not None:
1957 for f in info_dict['requested_formats']:
1958 self.to_stdout(f['url'] + f.get('play_path', ''))
1959 else:
1960 # For RTMP URLs, also include the playpath
1961 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1962 print_optional('thumbnail')
1963 print_optional('description')
1964 if self.params.get('forcefilename', False) and filename is not None:
1965 self.to_stdout(filename)
1966 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1967 self.to_stdout(formatSeconds(info_dict['duration']))
1968 print_mandatory('format')
1969 if self.params.get('forcejson', False):
1970 self.to_stdout(json.dumps(info_dict))
1971
8222d8de
JMF
1972 def process_info(self, info_dict):
1973 """Process a single resolved IE result."""
1974
1975 assert info_dict.get('_type', 'video') == 'video'
fd288278 1976
0202b52a 1977 info_dict.setdefault('__postprocessors', [])
1978
fd288278
PH
1979 max_downloads = self.params.get('max_downloads')
1980 if max_downloads is not None:
1981 if self._num_downloads >= int(max_downloads):
1982 raise MaxDownloadsReached()
8222d8de 1983
d06daf23 1984 # TODO: backward compatibility, to be removed
8222d8de 1985 info_dict['fulltitle'] = info_dict['title']
8222d8de 1986
11b85ce6 1987 if 'format' not in info_dict:
8222d8de
JMF
1988 info_dict['format'] = info_dict['ext']
1989
8b0d7497 1990 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
1991 return
1992
fd288278 1993 self._num_downloads += 1
8222d8de 1994
5bfa4862 1995 info_dict = self.pre_process(info_dict)
1996
0202b52a 1997 filename = self.prepare_filename(info_dict, warn=True)
1998 info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
1999 temp_filename = self.prepare_filepath(filename, 'temp')
2000 files_to_move = {}
8222d8de
JMF
2001
2002 # Forced printings
0202b52a 2003 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2004
8222d8de 2005 if self.params.get('simulate', False):
2d30509f 2006 if self.params.get('force_write_download_archive', False):
2007 self.record_download_archive(info_dict)
2008
2009 # Do nothing else if in simulate mode
8222d8de
JMF
2010 return
2011
2012 if filename is None:
2013 return
2014
c5c9bf0c 2015 def ensure_dir_exists(path):
0202b52a 2016 return make_dir(path, self.report_error)
c5c9bf0c 2017
0202b52a 2018 if not ensure_dir_exists(encodeFilename(full_filename)):
2019 return
2020 if not ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2021 return
2022
2023 if self.params.get('writedescription', False):
0202b52a 2024 descfn = replace_extension(
2025 self.prepare_filepath(filename, 'description'),
2026 'description', info_dict.get('ext'))
2027 if not ensure_dir_exists(encodeFilename(descfn)):
2028 return
0c3d0f51 2029 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2030 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2031 elif info_dict.get('description') is None:
2032 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2033 else:
2034 try:
6febd1c1 2035 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2036 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2037 descfile.write(info_dict['description'])
7b6fefc9 2038 except (OSError, IOError):
6febd1c1 2039 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2040 return
8222d8de 2041
1fb07d10 2042 if self.params.get('writeannotations', False):
0202b52a 2043 annofn = replace_extension(
2044 self.prepare_filepath(filename, 'annotation'),
2045 'annotations.xml', info_dict.get('ext'))
2046 if not ensure_dir_exists(encodeFilename(annofn)):
2047 return
0c3d0f51 2048 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2049 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2050 elif not info_dict.get('annotations'):
2051 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2052 else:
2053 try:
6febd1c1 2054 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2055 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2056 annofile.write(info_dict['annotations'])
2057 except (KeyError, TypeError):
6febd1c1 2058 self.report_warning('There are no annotations to write.')
7b6fefc9 2059 except (OSError, IOError):
6febd1c1 2060 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2061 return
1fb07d10 2062
9f448fcb 2063 def dl(name, info, subtitle=False):
98b69821 2064 fd = get_suitable_downloader(info, self.params)(self, self.params)
2065 for ph in self._progress_hooks:
2066 fd.add_progress_hook(ph)
2067 if self.params.get('verbose'):
29f7c58a 2068 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 2069 return fd.download(name, info, subtitle)
98b69821 2070
c4a91be7 2071 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2072 self.params.get('writeautomaticsub')])
c4a91be7 2073
c84dd8a9 2074 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2075 # subtitles download errors are already managed as troubles in relevant IE
2076 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2077 subtitles = info_dict['requested_subtitles']
fa57af1e 2078 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2079 for sub_lang, sub_info in subtitles.items():
2080 sub_format = sub_info['ext']
0202b52a 2081 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2082 sub_filename_final = subtitles_filename(
2083 self.prepare_filepath(filename, 'subtitle'),
2084 sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2085 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2086 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
0202b52a 2087 files_to_move[sub_filename] = sub_filename_final
a504ced0 2088 else:
0c9df79e 2089 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2090 if sub_info.get('data') is not None:
2091 try:
2092 # Use newline='' to prevent conversion of newline characters
067aa17e 2093 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2094 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2095 subfile.write(sub_info['data'])
0202b52a 2096 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2097 except (OSError, IOError):
2098 self.report_error('Cannot write subtitles file ' + sub_filename)
2099 return
7b6fefc9 2100 else:
5ff1bc0c 2101 try:
9f448fcb
U
2102 dl(sub_filename, sub_info, subtitle=True)
2103 '''
0c9df79e
U
2104 if self.params.get('sleep_interval_subtitles', False):
2105 dl(sub_filename, sub_info)
2106 else:
2107 sub_data = ie._request_webpage(
2108 sub_info['url'], info_dict['id'], note=False).read()
2109 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2110 subfile.write(sub_data)
9f448fcb 2111 '''
0202b52a 2112 files_to_move[sub_filename] = sub_filename_final
0c9df79e 2113 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
2114 self.report_warning('Unable to download subtitle for "%s": %s' %
2115 (sub_lang, error_to_compat_str(err)))
2116 continue
8222d8de 2117
57df9f53
U
2118 if self.params.get('skip_download', False):
2119 if self.params.get('convertsubtitles', False):
0202b52a 2120 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
57df9f53
U
2121 filename_real_ext = os.path.splitext(filename)[1][1:]
2122 filename_wo_ext = (
0202b52a 2123 os.path.splitext(full_filename)[0]
57df9f53 2124 if filename_real_ext == info_dict['ext']
0202b52a 2125 else full_filename)
57df9f53 2126 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
0202b52a 2127 # if subconv.available:
2128 # info_dict['__postprocessors'].append(subconv)
57df9f53 2129 if os.path.exists(encodeFilename(afilename)):
f791b419
U
2130 self.to_screen(
2131 '[download] %s has already been downloaded and '
2132 'converted' % afilename)
57df9f53
U
2133 else:
2134 try:
0202b52a 2135 self.post_process(full_filename, info_dict, files_to_move)
57df9f53
U
2136 except (PostProcessingError) as err:
2137 self.report_error('postprocessing: %s' % str(err))
2138 return
2139
8222d8de 2140 if self.params.get('writeinfojson', False):
0202b52a 2141 infofn = replace_extension(
2142 self.prepare_filepath(filename, 'infojson'),
2143 'info.json', info_dict.get('ext'))
2144 if not ensure_dir_exists(encodeFilename(infofn)):
2145 return
0c3d0f51 2146 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2147 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2148 else:
66c935fb 2149 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2150 try:
cb202fd2 2151 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 2152 except (OSError, IOError):
66c935fb 2153 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2154 return
06167fbb 2155 info_dict['__infojson_filepath'] = infofn
8222d8de 2156
0202b52a 2157 thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
2158 for thumbfn in self._write_thumbnails(info_dict, temp_filename):
2159 files_to_move[thumbfn] = os.path.join(thumbdir, os.path.basename(thumbfn))
8222d8de 2160
732044af 2161 # Write internet shortcut files
2162 url_link = webloc_link = desktop_link = False
2163 if self.params.get('writelink', False):
2164 if sys.platform == "darwin": # macOS.
2165 webloc_link = True
2166 elif sys.platform.startswith("linux"):
2167 desktop_link = True
2168 else: # if sys.platform in ['win32', 'cygwin']:
2169 url_link = True
2170 if self.params.get('writeurllink', False):
2171 url_link = True
2172 if self.params.get('writewebloclink', False):
2173 webloc_link = True
2174 if self.params.get('writedesktoplink', False):
2175 desktop_link = True
2176
2177 if url_link or webloc_link or desktop_link:
2178 if 'webpage_url' not in info_dict:
2179 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2180 return
2181 ascii_url = iri_to_uri(info_dict['webpage_url'])
2182
2183 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2184 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2185 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2186 self.to_screen('[info] Internet shortcut is already present')
2187 else:
2188 try:
2189 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2190 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2191 template_vars = {'url': ascii_url}
2192 if embed_filename:
2193 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2194 linkfile.write(template % template_vars)
2195 except (OSError, IOError):
2196 self.report_error('Cannot write internet shortcut ' + linkfn)
2197 return False
2198 return True
2199
2200 if url_link:
2201 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2202 return
2203 if webloc_link:
2204 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2205 return
2206 if desktop_link:
2207 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2208 return
2209
2210 # Download
2211 must_record_download_archive = False
8222d8de 2212 if not self.params.get('skip_download', False):
4340deca 2213 try:
0202b52a 2214
6b591b29 2215 def existing_file(*filepaths):
2216 ext = info_dict.get('ext')
2217 final_ext = self.params.get('final_ext', ext)
2218 existing_files = []
2219 for file in orderedSet(filepaths):
2220 if final_ext != ext:
2221 converted = replace_extension(file, final_ext, ext)
2222 if os.path.exists(encodeFilename(converted)):
2223 existing_files.append(converted)
2224 if os.path.exists(encodeFilename(file)):
2225 existing_files.append(file)
2226
2227 if not existing_files or self.params.get('overwrites', False):
2228 for file in orderedSet(existing_files):
2229 self.report_file_delete(file)
2230 os.remove(encodeFilename(file))
2231 return None
2232
2233 self.report_file_already_downloaded(existing_files[0])
2234 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2235 return existing_files[0]
0202b52a 2236
2237 success = True
4340deca
P
2238 if info_dict.get('requested_formats') is not None:
2239 downloaded = []
d47aeb22 2240 merger = FFmpegMergerPP(self)
f740fae2 2241 if not merger.available:
4340deca
P
2242 postprocessors = []
2243 self.report_warning('You have requested multiple '
e4172ac9 2244 'formats but ffmpeg is not installed.'
4a5a898a 2245 ' The formats won\'t be merged.')
6350728b 2246 else:
4340deca 2247 postprocessors = [merger]
81cd954a
S
2248
2249 def compatible_formats(formats):
d03cfdce 2250 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2251 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2252 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2253 if len(video_formats) > 2 or len(audio_formats) > 2:
2254 return False
2255
81cd954a 2256 # Check extension
d03cfdce 2257 exts = set(format.get('ext') for format in formats)
2258 COMPATIBLE_EXTS = (
2259 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2260 set(('webm',)),
2261 )
2262 for ext_sets in COMPATIBLE_EXTS:
2263 if ext_sets.issuperset(exts):
2264 return True
81cd954a
S
2265 # TODO: Check acodec/vcodec
2266 return False
2267
2268 requested_formats = info_dict['requested_formats']
0202b52a 2269 old_ext = info_dict['ext']
c0dea0a7 2270 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2271 info_dict['ext'] = 'mkv'
4a5a898a
S
2272 self.report_warning(
2273 'Requested formats are incompatible for merge and will be merged into mkv.')
0202b52a 2274
2275 def correct_ext(filename):
2276 filename_real_ext = os.path.splitext(filename)[1][1:]
2277 filename_wo_ext = (
2278 os.path.splitext(filename)[0]
2279 if filename_real_ext == old_ext
2280 else filename)
2281 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2282
38c6902b 2283 # Ensure filename always has a correct extension for successful merge
0202b52a 2284 full_filename = correct_ext(full_filename)
2285 temp_filename = correct_ext(temp_filename)
2286 dl_filename = existing_file(full_filename, temp_filename)
2287 if dl_filename is None:
81cd954a 2288 for f in requested_formats:
5b5fbc08
JMF
2289 new_info = dict(info_dict)
2290 new_info.update(f)
c5c9bf0c 2291 fname = prepend_extension(
0202b52a 2292 self.prepare_filepath(self.prepare_filename(new_info), 'temp'),
c5c9bf0c
S
2293 'f%s' % f['format_id'], new_info['ext'])
2294 if not ensure_dir_exists(fname):
2295 return
5b5fbc08 2296 downloaded.append(fname)
a9e7f546 2297 partial_success, real_download = dl(fname, new_info)
5b5fbc08
JMF
2298 success = success and partial_success
2299 info_dict['__postprocessors'] = postprocessors
2300 info_dict['__files_to_merge'] = downloaded
a9e7f546 2301 # Even if there were no downloads, it is being merged only now
2302 info_dict['__real_download'] = True
4340deca
P
2303 else:
2304 # Just a single file
0202b52a 2305 dl_filename = existing_file(full_filename, temp_filename)
2306 if dl_filename is None:
2307 success, real_download = dl(temp_filename, info_dict)
2308 info_dict['__real_download'] = real_download
2309
0202b52a 2310 dl_filename = dl_filename or temp_filename
c571435f 2311 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2312
4340deca 2313 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2314 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2315 return
2316 except (OSError, IOError) as err:
2317 raise UnavailableVideoError(err)
2318 except (ContentTooShortError, ) as err:
2319 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2320 return
8222d8de 2321
e38cafe9 2322 if success and filename != '-':
6271f1ca 2323 # Fixup content
62cd676c
PH
2324 fixup_policy = self.params.get('fixup')
2325 if fixup_policy is None:
2326 fixup_policy = 'detect_or_warn'
2327
e4172ac9 2328 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2329
6271f1ca
PH
2330 stretched_ratio = info_dict.get('stretched_ratio')
2331 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2332 if fixup_policy == 'warn':
2333 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2334 info_dict['id'], stretched_ratio))
2335 elif fixup_policy == 'detect_or_warn':
2336 stretched_pp = FFmpegFixupStretchedPP(self)
2337 if stretched_pp.available:
6271f1ca
PH
2338 info_dict['__postprocessors'].append(stretched_pp)
2339 else:
2340 self.report_warning(
d1e4a464
S
2341 '%s: Non-uniform pixel ratio (%s). %s'
2342 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2343 else:
62cd676c
PH
2344 assert fixup_policy in ('ignore', 'never')
2345
3089bc74 2346 if (info_dict.get('requested_formats') is None
6b591b29 2347 and info_dict.get('container') == 'm4a_dash'
2348 and info_dict.get('ext') == 'm4a'):
62cd676c 2349 if fixup_policy == 'warn':
d1e4a464
S
2350 self.report_warning(
2351 '%s: writing DASH m4a. '
2352 'Only some players support this container.'
2353 % info_dict['id'])
62cd676c
PH
2354 elif fixup_policy == 'detect_or_warn':
2355 fixup_pp = FFmpegFixupM4aPP(self)
2356 if fixup_pp.available:
62cd676c
PH
2357 info_dict['__postprocessors'].append(fixup_pp)
2358 else:
2359 self.report_warning(
d1e4a464
S
2360 '%s: writing DASH m4a. '
2361 'Only some players support this container. %s'
2362 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2363 else:
2364 assert fixup_policy in ('ignore', 'never')
6271f1ca 2365
3089bc74
S
2366 if (info_dict.get('protocol') == 'm3u8_native'
2367 or info_dict.get('protocol') == 'm3u8'
2368 and self.params.get('hls_prefer_native')):
f17f8651 2369 if fixup_policy == 'warn':
a02682fd 2370 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2371 info_dict['id']))
2372 elif fixup_policy == 'detect_or_warn':
2373 fixup_pp = FFmpegFixupM3u8PP(self)
2374 if fixup_pp.available:
f17f8651 2375 info_dict['__postprocessors'].append(fixup_pp)
2376 else:
2377 self.report_warning(
a02682fd 2378 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2379 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2380 else:
2381 assert fixup_policy in ('ignore', 'never')
2382
8222d8de 2383 try:
0202b52a 2384 self.post_process(dl_filename, info_dict, files_to_move)
8222d8de 2385 except (PostProcessingError) as err:
6febd1c1 2386 self.report_error('postprocessing: %s' % str(err))
8222d8de 2387 return
ab8e5e51
AM
2388 try:
2389 for ph in self._post_hooks:
0202b52a 2390 ph(full_filename)
ab8e5e51
AM
2391 except Exception as err:
2392 self.report_error('post hooks: %s' % str(err))
2393 return
2d30509f 2394 must_record_download_archive = True
2395
2396 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2397 self.record_download_archive(info_dict)
c3e6ffba 2398 max_downloads = self.params.get('max_downloads')
2399 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2400 raise MaxDownloadsReached()
8222d8de
JMF
2401
2402 def download(self, url_list):
2403 """Download a given list of URLs."""
acd69589 2404 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
3089bc74
S
2405 if (len(url_list) > 1
2406 and outtmpl != '-'
2407 and '%' not in outtmpl
2408 and self.params.get('max_downloads') != 1):
acd69589 2409 raise SameFileError(outtmpl)
8222d8de
JMF
2410
2411 for url in url_list:
2412 try:
5f6a1245 2413 # It also downloads the videos
61aa5ba3
S
2414 res = self.extract_info(
2415 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2416 except UnavailableVideoError:
6febd1c1 2417 self.report_error('unable to download video')
8222d8de 2418 except MaxDownloadsReached:
8b0d7497 2419 self.to_screen('[info] Maximum number of downloaded files reached')
2420 raise
2421 except ExistingVideoReached:
d83cb531 2422 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2423 raise
2424 except RejectedVideoReached:
d83cb531 2425 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2426 raise
63e0be34
PH
2427 else:
2428 if self.params.get('dump_single_json', False):
2429 self.to_stdout(json.dumps(res))
8222d8de
JMF
2430
2431 return self._download_retcode
2432
1dcc4c0c 2433 def download_with_info_file(self, info_filename):
31bd3925
JMF
2434 with contextlib.closing(fileinput.FileInput(
2435 [info_filename], mode='r',
2436 openhook=fileinput.hook_encoded('utf-8'))) as f:
2437 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2438 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2439 try:
2440 self.process_ie_result(info, download=True)
2441 except DownloadError:
2442 webpage_url = info.get('webpage_url')
2443 if webpage_url is not None:
6febd1c1 2444 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2445 return self.download([webpage_url])
2446 else:
2447 raise
2448 return self._download_retcode
1dcc4c0c 2449
cb202fd2
S
2450 @staticmethod
2451 def filter_requested_info(info_dict):
2452 return dict(
2453 (k, v) for k, v in info_dict.items()
2454 if k not in ['requested_formats', 'requested_subtitles'])
2455
5bfa4862 2456 def run_pp(self, pp, infodict, files_to_move={}):
2457 files_to_delete = []
2458 try:
2459 files_to_delete, infodict = pp.run(infodict)
2460 except PostProcessingError as e:
2461 self.report_error(e.msg)
2462 if not files_to_delete:
2463 return files_to_move, infodict
2464
2465 if self.params.get('keepvideo', False):
2466 for f in files_to_delete:
2467 files_to_move.setdefault(f, '')
2468 else:
2469 for old_filename in set(files_to_delete):
2470 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2471 try:
2472 os.remove(encodeFilename(old_filename))
2473 except (IOError, OSError):
2474 self.report_warning('Unable to remove downloaded original file')
2475 if old_filename in files_to_move:
2476 del files_to_move[old_filename]
2477 return files_to_move, infodict
2478
2479 def pre_process(self, ie_info):
2480 info = dict(ie_info)
2481 for pp in self._pps['beforedl']:
2482 info = self.run_pp(pp, info)[1]
2483 return info
2484
0202b52a 2485 def post_process(self, filename, ie_info, files_to_move={}):
8222d8de
JMF
2486 """Run all the postprocessors on the given file."""
2487 info = dict(ie_info)
2488 info['filepath'] = filename
0202b52a 2489
5bfa4862 2490 for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2491 files_to_move, info = self.run_pp(pp, info, files_to_move)
2492 info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info, files_to_move)[1]
2493 for pp in self._pps['aftermove']:
2494 files_to_move, info = self.run_pp(pp, info, {})
c1c9a79c 2495
5db07df6 2496 def _make_archive_id(self, info_dict):
e9fef7ee
S
2497 video_id = info_dict.get('id')
2498 if not video_id:
2499 return
5db07df6
PH
2500 # Future-proof against any change in case
2501 # and backwards compatibility with prior versions
e9fef7ee 2502 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2503 if extractor is None:
1211bb6d
S
2504 url = str_or_none(info_dict.get('url'))
2505 if not url:
2506 return
e9fef7ee
S
2507 # Try to find matching extractor for the URL and take its ie_key
2508 for ie in self._ies:
1211bb6d 2509 if ie.suitable(url):
e9fef7ee
S
2510 extractor = ie.ie_key()
2511 break
2512 else:
2513 return
d0757229 2514 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2515
2516 def in_download_archive(self, info_dict):
2517 fn = self.params.get('download_archive')
2518 if fn is None:
2519 return False
2520
2521 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2522 if not vid_id:
7012b23c 2523 return False # Incomplete video information
5db07df6 2524
a45e8619 2525 return vid_id in self.archive
c1c9a79c
PH
2526
2527 def record_download_archive(self, info_dict):
2528 fn = self.params.get('download_archive')
2529 if fn is None:
2530 return
5db07df6
PH
2531 vid_id = self._make_archive_id(info_dict)
2532 assert vid_id
c1c9a79c 2533 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2534 archive_file.write(vid_id + '\n')
a45e8619 2535 self.archive.add(vid_id)
dd82ffea 2536
8c51aa65 2537 @staticmethod
8abeeb94 2538 def format_resolution(format, default='unknown'):
fb04e403
PH
2539 if format.get('vcodec') == 'none':
2540 return 'audio only'
f49d89ee
PH
2541 if format.get('resolution') is not None:
2542 return format['resolution']
8c51aa65
JMF
2543 if format.get('height') is not None:
2544 if format.get('width') is not None:
6febd1c1 2545 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2546 else:
6febd1c1 2547 res = '%sp' % format['height']
f49d89ee 2548 elif format.get('width') is not None:
388ae76b 2549 res = '%dx?' % format['width']
8c51aa65 2550 else:
8abeeb94 2551 res = default
8c51aa65
JMF
2552 return res
2553
c57f7757
PH
2554 def _format_note(self, fdict):
2555 res = ''
2556 if fdict.get('ext') in ['f4f', 'f4m']:
2557 res += '(unsupported) '
32f90364
PH
2558 if fdict.get('language'):
2559 if res:
2560 res += ' '
9016d76f 2561 res += '[%s] ' % fdict['language']
c57f7757
PH
2562 if fdict.get('format_note') is not None:
2563 res += fdict['format_note'] + ' '
2564 if fdict.get('tbr') is not None:
2565 res += '%4dk ' % fdict['tbr']
2566 if fdict.get('container') is not None:
2567 if res:
2568 res += ', '
2569 res += '%s container' % fdict['container']
3089bc74
S
2570 if (fdict.get('vcodec') is not None
2571 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2572 if res:
2573 res += ', '
2574 res += fdict['vcodec']
91c7271a 2575 if fdict.get('vbr') is not None:
c57f7757
PH
2576 res += '@'
2577 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2578 res += 'video@'
2579 if fdict.get('vbr') is not None:
2580 res += '%4dk' % fdict['vbr']
fbb21cf5 2581 if fdict.get('fps') is not None:
5d583bdf
S
2582 if res:
2583 res += ', '
2584 res += '%sfps' % fdict['fps']
c57f7757
PH
2585 if fdict.get('acodec') is not None:
2586 if res:
2587 res += ', '
2588 if fdict['acodec'] == 'none':
2589 res += 'video only'
2590 else:
2591 res += '%-5s' % fdict['acodec']
2592 elif fdict.get('abr') is not None:
2593 if res:
2594 res += ', '
2595 res += 'audio'
2596 if fdict.get('abr') is not None:
2597 res += '@%3dk' % fdict['abr']
2598 if fdict.get('asr') is not None:
2599 res += ' (%5dHz)' % fdict['asr']
2600 if fdict.get('filesize') is not None:
2601 if res:
2602 res += ', '
2603 res += format_bytes(fdict['filesize'])
9732d77e
PH
2604 elif fdict.get('filesize_approx') is not None:
2605 if res:
2606 res += ', '
2607 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2608 return res
91c7271a 2609
76d321f6 2610 def _format_note_table(self, f):
2611 def join_fields(*vargs):
2612 return ', '.join((val for val in vargs if val != ''))
2613
2614 return join_fields(
2615 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2616 format_field(f, 'language', '[%s]'),
2617 format_field(f, 'format_note'),
2618 format_field(f, 'container', ignore=(None, f.get('ext'))),
2619 format_field(f, 'asr', '%5dHz'))
2620
c57f7757 2621 def list_formats(self, info_dict):
94badb25 2622 formats = info_dict.get('formats', [info_dict])
76d321f6 2623 new_format = self.params.get('listformats_table', False)
2624 if new_format:
2625 table = [
2626 [
2627 format_field(f, 'format_id'),
2628 format_field(f, 'ext'),
2629 self.format_resolution(f),
2630 format_field(f, 'fps', '%d'),
2631 '|',
2632 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2633 format_field(f, 'tbr', '%4dk'),
2634 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2635 '|',
2636 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2637 format_field(f, 'vbr', '%4dk'),
2638 format_field(f, 'acodec', default='unknown').replace('none', ''),
2639 format_field(f, 'abr', '%3dk'),
2640 format_field(f, 'asr', '%5dHz'),
2641 self._format_note_table(f)]
2642 for f in formats
2643 if f.get('preference') is None or f['preference'] >= -1000]
2644 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2645 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2646 else:
2647 table = [
2648 [
2649 format_field(f, 'format_id'),
2650 format_field(f, 'ext'),
2651 self.format_resolution(f),
2652 self._format_note(f)]
2653 for f in formats
2654 if f.get('preference') is None or f['preference'] >= -1000]
2655 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2656
76d321f6 2657 # if len(formats) > 1:
2658 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
cfb56d1a 2659 self.to_screen(
76d321f6 2660 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2661 header_line,
2662 table,
2663 delim=new_format,
2664 extraGap=(0 if new_format else 1),
2665 hideEmpty=new_format)))
cfb56d1a
PH
2666
2667 def list_thumbnails(self, info_dict):
2668 thumbnails = info_dict.get('thumbnails')
2669 if not thumbnails:
b7b72db9 2670 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2671 return
cfb56d1a
PH
2672
2673 self.to_screen(
2674 '[info] Thumbnails for %s:' % info_dict['id'])
2675 self.to_screen(render_table(
2676 ['ID', 'width', 'height', 'URL'],
2677 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2678
360e1ca5 2679 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2680 if not subtitles:
360e1ca5 2681 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2682 return
a504ced0 2683 self.to_screen(
edab9dbf
JMF
2684 'Available %s for %s:' % (name, video_id))
2685 self.to_screen(render_table(
2686 ['Language', 'formats'],
2687 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2688 for lang, formats in subtitles.items()]))
a504ced0 2689
dca08720
PH
2690 def urlopen(self, req):
2691 """ Start an HTTP download """
82d8a8b6 2692 if isinstance(req, compat_basestring):
67dda517 2693 req = sanitized_Request(req)
19a41fc6 2694 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2695
2696 def print_debug_header(self):
2697 if not self.params.get('verbose'):
2698 return
62fec3b2 2699
4192b51c 2700 if type('') is not compat_str:
067aa17e 2701 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2702 self.report_warning(
2703 'Your Python is broken! Update to a newer and supported version')
2704
c6afed48
PH
2705 stdout_encoding = getattr(
2706 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2707 encoding_str = (
734f90bb
PH
2708 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2709 locale.getpreferredencoding(),
2710 sys.getfilesystemencoding(),
c6afed48 2711 stdout_encoding,
b0472057 2712 self.get_encoding()))
4192b51c 2713 write_string(encoding_str, encoding=None)
734f90bb 2714
f74980cb 2715 self._write_string('[debug] yt-dlp version %s\n' % __version__)
e0986e31 2716 if _LAZY_LOADER:
f74980cb 2717 self._write_string('[debug] Lazy loading extractors enabled\n')
2718 if _PLUGIN_CLASSES:
2719 self._write_string(
2720 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
dca08720
PH
2721 try:
2722 sp = subprocess.Popen(
2723 ['git', 'rev-parse', '--short', 'HEAD'],
2724 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2725 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2726 out, err = process_communicate_or_kill(sp)
dca08720
PH
2727 out = out.decode().strip()
2728 if re.match('[0-9a-f]+', out):
f74980cb 2729 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 2730 except Exception:
dca08720
PH
2731 try:
2732 sys.exc_clear()
70a1165b 2733 except Exception:
dca08720 2734 pass
b300cda4
S
2735
2736 def python_implementation():
2737 impl_name = platform.python_implementation()
2738 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2739 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2740 return impl_name
2741
2742 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2743 platform.python_version(), python_implementation(),
2744 platform_name()))
d28b5171 2745
73fac4e9 2746 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2747 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2748 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2749 exe_str = ', '.join(
2750 '%s %s' % (exe, v)
2751 for exe, v in sorted(exe_versions.items())
2752 if v
2753 )
2754 if not exe_str:
2755 exe_str = 'none'
2756 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2757
2758 proxy_map = {}
2759 for handler in self._opener.handlers:
2760 if hasattr(handler, 'proxies'):
2761 proxy_map.update(handler.proxies)
734f90bb 2762 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2763
58b1f00d
PH
2764 if self.params.get('call_home', False):
2765 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2766 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2767 return
58b1f00d
PH
2768 latest_version = self.urlopen(
2769 'https://yt-dl.org/latest/version').read().decode('utf-8')
2770 if version_tuple(latest_version) > version_tuple(__version__):
2771 self.report_warning(
2772 'You are using an outdated version (newest version: %s)! '
2773 'See https://yt-dl.org/update if you need help updating.' %
2774 latest_version)
2775
e344693b 2776 def _setup_opener(self):
6ad14cab 2777 timeout_val = self.params.get('socket_timeout')
19a41fc6 2778 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2779
dca08720
PH
2780 opts_cookiefile = self.params.get('cookiefile')
2781 opts_proxy = self.params.get('proxy')
2782
2783 if opts_cookiefile is None:
2784 self.cookiejar = compat_cookiejar.CookieJar()
2785 else:
590bc6f6 2786 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2787 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2788 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2789 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2790
6a3f4c3f 2791 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2792 if opts_proxy is not None:
2793 if opts_proxy == '':
2794 proxies = {}
2795 else:
2796 proxies = {'http': opts_proxy, 'https': opts_proxy}
2797 else:
2798 proxies = compat_urllib_request.getproxies()
067aa17e 2799 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2800 if 'http' in proxies and 'https' not in proxies:
2801 proxies['https'] = proxies['http']
91410c9b 2802 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2803
2804 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2805 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2806 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2807 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2808 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2809
2810 # When passing our own FileHandler instance, build_opener won't add the
2811 # default FileHandler and allows us to disable the file protocol, which
2812 # can be used for malicious purposes (see
067aa17e 2813 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2814 file_handler = compat_urllib_request.FileHandler()
2815
2816 def file_open(*args, **kwargs):
cefecac1 2817 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2818 file_handler.file_open = file_open
2819
2820 opener = compat_urllib_request.build_opener(
fca6dba8 2821 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2822
dca08720
PH
2823 # Delete the default user-agent header, which would otherwise apply in
2824 # cases where our custom HTTP handler doesn't come into play
067aa17e 2825 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2826 opener.addheaders = []
2827 self._opener = opener
62fec3b2
PH
2828
2829 def encode(self, s):
2830 if isinstance(s, bytes):
2831 return s # Already encoded
2832
2833 try:
2834 return s.encode(self.get_encoding())
2835 except UnicodeEncodeError as err:
2836 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2837 raise
2838
2839 def get_encoding(self):
2840 encoding = self.params.get('encoding')
2841 if encoding is None:
2842 encoding = preferredencoding()
2843 return encoding
ec82d85a
PH
2844
2845 def _write_thumbnails(self, info_dict, filename):
2846 if self.params.get('writethumbnail', False):
2847 thumbnails = info_dict.get('thumbnails')
2848 if thumbnails:
2849 thumbnails = [thumbnails[-1]]
2850 elif self.params.get('write_all_thumbnails', False):
0202b52a 2851 thumbnails = info_dict.get('thumbnails') or []
ec82d85a 2852 else:
0202b52a 2853 thumbnails = []
ec82d85a 2854
0202b52a 2855 ret = []
ec82d85a
PH
2856 for t in thumbnails:
2857 thumb_ext = determine_ext(t['url'], 'jpg')
2858 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2859 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
29f7c58a 2860 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
ec82d85a 2861
0c3d0f51 2862 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
0202b52a 2863 ret.append(thumb_filename)
ec82d85a
PH
2864 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2865 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2866 else:
2867 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2868 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2869 try:
2870 uf = self.urlopen(t['url'])
d3d89c32 2871 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 2872 shutil.copyfileobj(uf, thumbf)
0202b52a 2873 ret.append(thumb_filename)
ec82d85a
PH
2874 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2875 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2876 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2877 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2878 (t['url'], error_to_compat_str(err)))
0202b52a 2879 return ret