]> jfr.im git - yt-dlp.git/blame - youtube_dlc/YoutubeDL.py
Multiple output templates for different file types
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
CommitLineData
8222d8de 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de
JMF
22import socket
23import sys
24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474
S
29from string import ascii_letters
30
8c25f81b 31from .compat import (
82d8a8b6 32 compat_basestring,
dca08720 33 compat_cookiejar,
003c69a8 34 compat_get_terminal_size,
ce02ed60 35 compat_http_client,
4f026faf 36 compat_kwargs,
d0d9ade4 37 compat_numeric_types,
e9c0cdd3 38 compat_os_name,
ce02ed60 39 compat_str,
67134eab 40 compat_tokenize_tokenize,
ce02ed60
PH
41 compat_urllib_error,
42 compat_urllib_request,
8b172c2e 43 compat_urllib_request_DataHandler,
8c25f81b
PH
44)
45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
de6000d9 52 OUTTMPL_TYPES,
ce02ed60 53 determine_ext,
b5559424 54 determine_protocol,
732044af 55 DOT_DESKTOP_LINK_TEMPLATE,
56 DOT_URL_LINK_TEMPLATE,
57 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 58 DownloadError,
c0384f22 59 encode_compat_str,
ce02ed60 60 encodeFilename,
9b9c5355 61 error_to_compat_str,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
525ef922 68 formatSeconds,
773f291d 69 GeoRestrictedError,
c9969434 70 int_or_none,
732044af 71 iri_to_uri,
773f291d 72 ISO3166Utils,
ce02ed60 73 locked_file,
0202b52a 74 make_dir,
dca08720 75 make_HTTPS_handler,
ce02ed60 76 MaxDownloadsReached,
cd6fc19e 77 orderedSet,
b7ab0590 78 PagedList,
083c9df9 79 parse_filesize,
91410c9b 80 PerRequestProxyHandler,
dca08720 81 platform_name,
eedb7ba5 82 PostProcessingError,
ce02ed60 83 preferredencoding,
eedb7ba5 84 prepend_extension,
51fb4995 85 register_socks_protocols,
cfb56d1a 86 render_table,
eedb7ba5 87 replace_extension,
8b0d7497 88 RejectedVideoReached,
ce02ed60
PH
89 SameFileError,
90 sanitize_filename,
1bb5c511 91 sanitize_path,
dcf77cf1 92 sanitize_url,
67dda517 93 sanitized_Request,
e5660ee6 94 std_headers,
1211bb6d 95 str_or_none,
e29663c6 96 strftime_or_none,
ce02ed60 97 subtitles_filename,
732044af 98 to_high_limit_path,
ce02ed60 99 UnavailableVideoError,
29eb5174 100 url_basename,
58b1f00d 101 version_tuple,
ce02ed60
PH
102 write_json_file,
103 write_string,
1bab3437 104 YoutubeDLCookieJar,
6a3f4c3f 105 YoutubeDLCookieProcessor,
dca08720 106 YoutubeDLHandler,
fca6dba8 107 YoutubeDLRedirectHandler,
f5b1bca9 108 process_communicate_or_kill,
ce02ed60 109)
a0e07d31 110from .cache import Cache
f74980cb 111from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
4c54b89e 112from .extractor.openload import PhantomJSwrapper
3bc2ddcc 113from .downloader import get_suitable_downloader
4c83c967 114from .downloader.rtmp import rtmpdump_version
4f026faf 115from .postprocessor import (
f17f8651 116 FFmpegFixupM3u8PP,
62cd676c 117 FFmpegFixupM4aPP,
6271f1ca 118 FFmpegFixupStretchedPP,
4f026faf
PH
119 FFmpegMergerPP,
120 FFmpegPostProcessor,
0202b52a 121 # FFmpegSubtitlesConvertorPP,
4f026faf 122 get_postprocessor,
0202b52a 123 MoveFilesAfterDownloadPP,
4f026faf 124)
dca08720 125from .version import __version__
8222d8de 126
e9c0cdd3
YCH
127if compat_os_name == 'nt':
128 import ctypes
129
2459b6e1 130
8222d8de
JMF
131class YoutubeDL(object):
132 """YoutubeDL class.
133
134 YoutubeDL objects are the ones responsible of downloading the
135 actual video file and writing it to disk if the user has requested
136 it, among some other tasks. In most cases there should be one per
137 program. As, given a video URL, the downloader doesn't know how to
138 extract all the needed information, task that InfoExtractors do, it
139 has to pass the URL to one of them.
140
141 For this, YoutubeDL objects have a method that allows
142 InfoExtractors to be registered in a given order. When it is passed
143 a URL, the YoutubeDL object handles it to the first InfoExtractor it
144 finds that reports being able to handle it. The InfoExtractor extracts
145 all the information about the video or videos the URL refers to, and
146 YoutubeDL process the extracted information, possibly using a File
147 Downloader to download the video.
148
149 YoutubeDL objects accept a lot of parameters. In order not to saturate
150 the object constructor with arguments, it receives a dictionary of
151 options instead. These options are available through the params
152 attribute for the InfoExtractors to use. The YoutubeDL also
153 registers itself as the downloader in charge for the InfoExtractors
154 that are added to it, so this is a "mutual registration".
155
156 Available options:
157
158 username: Username for authentication purposes.
159 password: Password for authentication purposes.
180940e0 160 videopassword: Password for accessing a video.
1da50aa3
S
161 ap_mso: Adobe Pass multiple-system operator identifier.
162 ap_username: Multiple-system operator account username.
163 ap_password: Multiple-system operator account password.
8222d8de
JMF
164 usenetrc: Use netrc for authentication instead.
165 verbose: Print additional info to stdout.
166 quiet: Do not print messages to stdout.
ad8915b7 167 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
168 forceurl: Force printing final URL.
169 forcetitle: Force printing title.
170 forceid: Force printing ID.
171 forcethumbnail: Force printing thumbnail URL.
172 forcedescription: Force printing description.
173 forcefilename: Force printing final filename.
525ef922 174 forceduration: Force printing duration.
8694c600 175 forcejson: Force printing info_dict as JSON.
63e0be34
PH
176 dump_single_json: Force printing the info_dict of the whole playlist
177 (or video) as a single JSON line.
2d30509f 178 force_write_download_archive: Force writing download archive regardless of
179 'skip_download' or 'simulate'.
8222d8de 180 simulate: Do not download the video files.
eb8a4433 181 format: Video format code. see "FORMAT SELECTION" for more details.
182 format_sort: How to sort the video formats. see "Sorting Formats" for more details.
183 format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
909d24dd 184 allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
185 allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
de6000d9 186 outtmpl: Dictionary of templates for output names. Allowed keys
187 are 'default' and the keys of OUTTMPL_TYPES (in utils.py)
a820dc72
RA
188 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
189 restrictfilenames: Do not allow "&" and spaces in file names
190 trim_file_name: Limit length of filename (extension excluded)
191 ignoreerrors: Do not stop on download errors
192 (Default True when running youtube-dlc,
193 but False when directly accessing YoutubeDL class)
d22dec74 194 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 195 overwrites: Overwrite all video and metadata files if True,
196 overwrite only non-video files if None
197 and don't overwrite any file if False
8222d8de
JMF
198 playliststart: Playlist item to start at.
199 playlistend: Playlist item to end at.
c14e88f0 200 playlist_items: Specific indices of playlist to download.
ff815fe6 201 playlistreverse: Download playlist items in reverse order.
75822ca7 202 playlistrandom: Download playlist items in random order.
8222d8de
JMF
203 matchtitle: Download only matching titles.
204 rejecttitle: Reject downloads for matching titles.
8bf9319e 205 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
206 logtostderr: Log messages to stderr instead of stdout.
207 writedescription: Write the video description to a .description file
208 writeinfojson: Write the video description to a .info.json file
06167fbb 209 writecomments: Extract video comments. This will not be written to disk
210 unless writeinfojson is also given
1fb07d10 211 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 212 writethumbnail: Write the thumbnail image to a file
cac96421 213 allow_playlist_files: Also write playlists' description, infojson etc in a seperate file
ec82d85a 214 write_all_thumbnails: Write all thumbnail formats to files
732044af 215 writelink: Write an internet shortcut file, depending on the
216 current platform (.url/.webloc/.desktop)
217 writeurllink: Write a Windows internet shortcut file (.url)
218 writewebloclink: Write a macOS internet shortcut file (.webloc)
219 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 220 writesubtitles: Write the video subtitles to a file
741dd8ea 221 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 222 allsubtitles: Downloads all the subtitles of the video
0b7f3118 223 (requires writesubtitles or writeautomaticsub)
8222d8de 224 listsubtitles: Lists all available subtitles for the video
a504ced0 225 subtitlesformat: The format code for subtitles
aa6a10c4 226 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
227 keepvideo: Keep the video file after post-processing
228 daterange: A DateRange object, download only if the upload_date is in the range.
229 skip_download: Skip the actual download of the video file
c35f9e72 230 cachedir: Location of the cache files in the filesystem.
a0e07d31 231 False to disable filesystem cache.
47192f92 232 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
233 age_limit: An integer representing the user's age in years.
234 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
235 min_views: An integer representing the minimum view count the video
236 must have in order to not be skipped.
237 Videos without view count information are always
238 downloaded. None for no limit.
239 max_views: An integer representing the maximum view count.
240 Videos that are more popular than that are not
241 downloaded.
242 Videos without view count information are always
243 downloaded. None for no limit.
244 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
245 Videos already present in the file are not downloaded
246 again.
8a51f564 247 break_on_existing: Stop the download process after attempting to download a
248 file that is in the archive.
249 break_on_reject: Stop the download process when encountering a video that
250 has been filtered out.
251 cookiefile: File name where cookies should be read from and dumped to
a1ee09e8 252 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
253 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
254 At the moment, this is only supported by YouTube.
a1ee09e8 255 proxy: URL of the proxy server to use
38cce791 256 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 257 on geo-restricted sites.
e344693b 258 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
259 bidi_workaround: Work around buggy terminals without bidirectional text
260 support, using fridibi
a0ddb8a2 261 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 262 include_ads: Download ads as well
04b4d394
PH
263 default_search: Prepend this string if an input url is not valid.
264 'auto' for elaborate guessing
62fec3b2 265 encoding: Use this encoding instead of the system-specified.
e8ee972c 266 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
267 Pass in 'in_playlist' to only show this behavior for
268 playlist items.
4f026faf 269 postprocessors: A list of dictionaries, each with an entry
71b640cc 270 * key: The name of the postprocessor. See
cefecac1 271 youtube_dlc/postprocessor/__init__.py for a list.
0202b52a 272 * _after_move: Optional. If True, run this post_processor
273 after 'MoveFilesAfterDownload'
4f026faf
PH
274 as well as any further keyword arguments for the
275 postprocessor.
ab8e5e51
AM
276 post_hooks: A list of functions that get called as the final step
277 for each video file, after all postprocessors have been
278 called. The filename will be passed as the only argument.
71b640cc
PH
279 progress_hooks: A list of functions that get called on download
280 progress, with a dictionary with the entries
5cda4eda 281 * status: One of "downloading", "error", or "finished".
ee69b99a 282 Check this first and ignore unknown values.
71b640cc 283
5cda4eda 284 If status is one of "downloading", or "finished", the
ee69b99a
PH
285 following properties may also be present:
286 * filename: The final filename (always present)
5cda4eda 287 * tmpfilename: The filename we're currently writing to
71b640cc
PH
288 * downloaded_bytes: Bytes on disk
289 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
290 * total_bytes_estimate: Guess of the eventual file size,
291 None if unavailable.
292 * elapsed: The number of seconds since download started.
71b640cc
PH
293 * eta: The estimated time in seconds, None if unknown
294 * speed: The download speed in bytes/second, None if
295 unknown
5cda4eda
PH
296 * fragment_index: The counter of the currently
297 downloaded video fragment.
298 * fragment_count: The number of fragments (= individual
299 files that will be merged)
71b640cc
PH
300
301 Progress hooks are guaranteed to be called at least once
302 (with status "finished") if the download is successful.
45598f15 303 merge_output_format: Extension to use when merging formats.
6b591b29 304 final_ext: Expected final extension; used to detect when the file was
305 already downloaded and converted. "merge_output_format" is
306 replaced by this extension when given
6271f1ca
PH
307 fixup: Automatically correct known faults of the file.
308 One of:
309 - "never": do nothing
310 - "warn": only emit a warning
311 - "detect_or_warn": check whether we can do anything
62cd676c 312 about it, warn otherwise (default)
504f20dd 313 source_address: Client-side IP address to bind to.
6ec6cb4e 314 call_home: Boolean, true iff we are allowed to contact the
cefecac1 315 youtube-dlc servers for debugging.
7aa589a5
S
316 sleep_interval: Number of seconds to sleep before each download when
317 used alone or a lower bound of a range for randomized
318 sleep before each download (minimum possible number
319 of seconds to sleep) when used along with
320 max_sleep_interval.
321 max_sleep_interval:Upper bound of a range for randomized sleep before each
322 download (maximum possible number of seconds to sleep).
323 Must only be used along with sleep_interval.
324 Actual sleep time will be a random float from range
325 [sleep_interval; max_sleep_interval].
cfb56d1a
PH
326 listformats: Print an overview of available video formats and exit.
327 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
328 match_filter: A function that gets called with the info_dict of
329 every video.
330 If it returns a message, the video is ignored.
331 If it returns None, the video is downloaded.
332 match_filter_func in utils.py is one example for this.
7e5db8c9 333 no_color: Do not emit color codes in output.
0a840f58 334 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 335 HTTP header
0a840f58 336 geo_bypass_country:
773f291d
S
337 Two-letter ISO 3166-2 country code that will be used for
338 explicit geographic restriction bypassing via faking
504f20dd 339 X-Forwarded-For HTTP header
5f95927a
S
340 geo_bypass_ip_block:
341 IP range in CIDR notation that will be used similarly to
504f20dd 342 geo_bypass_country
71b640cc 343
85729c51
PH
344 The following options determine which downloader is picked:
345 external_downloader: Executable of the external downloader to call.
346 None or unset for standard (built-in) downloader.
bf09af3a
S
347 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
348 if True, otherwise use ffmpeg/avconv if False, otherwise
349 use downloader suggested by extractor if None.
fe7e0c98 350
8222d8de 351 The following parameters are not used by YoutubeDL itself, they are used by
cefecac1 352 the downloader (see youtube_dlc/downloader/common.py):
8222d8de 353 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 354 noresizebuffer, retries, continuedl, noprogress, consoletitle,
b54d4a5c
S
355 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
356 http_chunk_size.
76b1bd67
JMF
357
358 The following options are used by the post processors:
d4a24f40 359 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 360 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
361 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
362 to the binary or its containing directory.
43820c03 363 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
364 and a list of additional command-line arguments for the
365 postprocessor/executable. The dict can also have "PP+EXE" keys
366 which are used when the given exe is used by the given PP.
367 Use 'default' as the name for arguments to passed to all PP
3600fd59
S
368 The following options are used by the Youtube extractor:
369 youtube_include_dash_manifest: If True (default), DASH manifests and related
370 data will be downloaded and processed by extractor.
371 You can reduce network I/O by disabling it if you don't
372 care about DASH.
8222d8de
JMF
373 """
374
c9969434
S
375 _NUMERIC_FIELDS = set((
376 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
377 'timestamp', 'upload_year', 'upload_month', 'upload_day',
378 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
379 'average_rating', 'comment_count', 'age_limit',
380 'start_time', 'end_time',
381 'chapter_number', 'season_number', 'episode_number',
382 'track_number', 'disc_number', 'release_year',
383 'playlist_index',
384 ))
385
8222d8de
JMF
386 params = None
387 _ies = []
5bfa4862 388 _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 389 __prepare_filename_warned = False
8222d8de
JMF
390 _download_retcode = None
391 _num_downloads = None
30a074c2 392 _playlist_level = 0
393 _playlist_urls = set()
8222d8de
JMF
394 _screen_file = None
395
3511266b 396 def __init__(self, params=None, auto_init=True):
8222d8de 397 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
398 if params is None:
399 params = {}
8222d8de 400 self._ies = []
56c73665 401 self._ies_instances = {}
5bfa4862 402 self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
0202b52a 403 self.__prepare_filename_warned = False
ab8e5e51 404 self._post_hooks = []
933605d7 405 self._progress_hooks = []
8222d8de
JMF
406 self._download_retcode = 0
407 self._num_downloads = 0
408 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 409 self._err_file = sys.stderr
4abf617b
S
410 self.params = {
411 # Default parameters
412 'nocheckcertificate': False,
413 }
414 self.params.update(params)
a0e07d31 415 self.cache = Cache(self)
a45e8619 416 self.archive = set()
ecdec191
JB
417
418 """Preload the archive, if any is specified"""
419 def preload_download_archive(self):
420 fn = self.params.get('download_archive')
421 if fn is None:
422 return False
423 try:
424 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
425 for line in archive_file:
a45e8619 426 self.archive.add(line.strip())
ecdec191
JB
427 except IOError as ioe:
428 if ioe.errno != errno.ENOENT:
429 raise
1d74d8d9 430 return False
ecdec191 431 return True
34308b30 432
be5df5ee
S
433 def check_deprecated(param, option, suggestion):
434 if self.params.get(param) is not None:
435 self.report_warning(
436 '%s is deprecated. Use %s instead.' % (option, suggestion))
437 return True
438 return False
439
1de7ea76
JB
440 if self.params.get('verbose'):
441 self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
442
ecdec191
JB
443 preload_download_archive(self)
444
be5df5ee 445 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
446 if self.params.get('geo_verification_proxy') is None:
447 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
448
6b591b29 449 if self.params.get('final_ext'):
450 if self.params.get('merge_output_format'):
451 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
452 self.params['merge_output_format'] = self.params['final_ext']
453
b9d973be 454 if 'overwrites' in self.params and self.params['overwrites'] is None:
455 del self.params['overwrites']
456
be5df5ee
S
457 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
458 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
459 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
460
0783b09b 461 if params.get('bidi_workaround', False):
1c088fa8
PH
462 try:
463 import pty
464 master, slave = pty.openpty()
003c69a8 465 width = compat_get_terminal_size().columns
1c088fa8
PH
466 if width is None:
467 width_args = []
468 else:
469 width_args = ['-w', str(width)]
5d681e96 470 sp_kwargs = dict(
1c088fa8
PH
471 stdin=subprocess.PIPE,
472 stdout=slave,
473 stderr=self._err_file)
5d681e96
PH
474 try:
475 self._output_process = subprocess.Popen(
476 ['bidiv'] + width_args, **sp_kwargs
477 )
478 except OSError:
5d681e96
PH
479 self._output_process = subprocess.Popen(
480 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
481 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 482 except OSError as ose:
66e7ace1 483 if ose.errno == errno.ENOENT:
6febd1c1 484 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
485 else:
486 raise
0783b09b 487
3089bc74
S
488 if (sys.platform != 'win32'
489 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
490 and not params.get('restrictfilenames', False)):
e9137224 491 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 492 self.report_warning(
6febd1c1 493 'Assuming --restrict-filenames since file system encoding '
1b725173 494 'cannot encode all characters. '
6febd1c1 495 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 496 self.params['restrictfilenames'] = True
34308b30 497
de6000d9 498 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 499
dca08720
PH
500 self._setup_opener()
501
3511266b
PH
502 if auto_init:
503 self.print_debug_header()
504 self.add_default_info_extractors()
505
4f026faf
PH
506 for pp_def_raw in self.params.get('postprocessors', []):
507 pp_class = get_postprocessor(pp_def_raw['key'])
508 pp_def = dict(pp_def_raw)
509 del pp_def['key']
5bfa4862 510 if 'when' in pp_def:
511 when = pp_def['when']
512 del pp_def['when']
513 else:
514 when = 'normal'
4f026faf 515 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 516 self.add_post_processor(pp, when=when)
4f026faf 517
ab8e5e51
AM
518 for ph in self.params.get('post_hooks', []):
519 self.add_post_hook(ph)
520
71b640cc
PH
521 for ph in self.params.get('progress_hooks', []):
522 self.add_progress_hook(ph)
523
51fb4995
YCH
524 register_socks_protocols()
525
7d4111ed
PH
526 def warn_if_short_id(self, argv):
527 # short YouTube ID starting with dash?
528 idxs = [
529 i for i, a in enumerate(argv)
530 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
531 if idxs:
532 correct_argv = (
cefecac1 533 ['youtube-dlc']
3089bc74
S
534 + [a for i, a in enumerate(argv) if i not in idxs]
535 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
536 )
537 self.report_warning(
538 'Long argument string detected. '
539 'Use -- to separate parameters and URLs, like this:\n%s\n' %
540 args_to_str(correct_argv))
541
8222d8de
JMF
542 def add_info_extractor(self, ie):
543 """Add an InfoExtractor object to the end of the list."""
544 self._ies.append(ie)
e52d7f85
JMF
545 if not isinstance(ie, type):
546 self._ies_instances[ie.ie_key()] = ie
547 ie.set_downloader(self)
8222d8de 548
56c73665
JMF
549 def get_info_extractor(self, ie_key):
550 """
551 Get an instance of an IE with name ie_key, it will try to get one from
552 the _ies list, if there's no instance it will create a new one and add
553 it to the extractor list.
554 """
555 ie = self._ies_instances.get(ie_key)
556 if ie is None:
557 ie = get_info_extractor(ie_key)()
558 self.add_info_extractor(ie)
559 return ie
560
023fa8c4
JMF
561 def add_default_info_extractors(self):
562 """
563 Add the InfoExtractors returned by gen_extractors to the end of the list
564 """
e52d7f85 565 for ie in gen_extractor_classes():
023fa8c4
JMF
566 self.add_info_extractor(ie)
567
5bfa4862 568 def add_post_processor(self, pp, when='normal'):
8222d8de 569 """Add a PostProcessor object to the end of the chain."""
5bfa4862 570 self._pps[when].append(pp)
8222d8de
JMF
571 pp.set_downloader(self)
572
ab8e5e51
AM
573 def add_post_hook(self, ph):
574 """Add the post hook"""
575 self._post_hooks.append(ph)
576
933605d7
JMF
577 def add_progress_hook(self, ph):
578 """Add the progress hook (currently only for the file downloader)"""
579 self._progress_hooks.append(ph)
8ab470f1 580
1c088fa8 581 def _bidi_workaround(self, message):
5d681e96 582 if not hasattr(self, '_output_channel'):
1c088fa8
PH
583 return message
584
5d681e96 585 assert hasattr(self, '_output_process')
11b85ce6 586 assert isinstance(message, compat_str)
6febd1c1
PH
587 line_count = message.count('\n') + 1
588 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 589 self._output_process.stdin.flush()
6febd1c1 590 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 591 for _ in range(line_count))
6febd1c1 592 return res[:-len('\n')]
1c088fa8 593
8222d8de 594 def to_screen(self, message, skip_eol=False):
0783b09b
PH
595 """Print message to stdout if not in quiet mode."""
596 return self.to_stdout(message, skip_eol, check_quiet=True)
597
734f90bb 598 def _write_string(self, s, out=None):
b58ddb32 599 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 600
0783b09b 601 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 602 """Print message to stdout if not in quiet mode."""
8bf9319e 603 if self.params.get('logger'):
43afe285 604 self.params['logger'].debug(message)
0783b09b 605 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 606 message = self._bidi_workaround(message)
6febd1c1 607 terminator = ['\n', ''][skip_eol]
8222d8de 608 output = message + terminator
1c088fa8 609
734f90bb 610 self._write_string(output, self._screen_file)
8222d8de
JMF
611
612 def to_stderr(self, message):
613 """Print message to stderr."""
11b85ce6 614 assert isinstance(message, compat_str)
8bf9319e 615 if self.params.get('logger'):
43afe285
IB
616 self.params['logger'].error(message)
617 else:
1c088fa8 618 message = self._bidi_workaround(message)
6febd1c1 619 output = message + '\n'
734f90bb 620 self._write_string(output, self._err_file)
8222d8de 621
1e5b9a95
PH
622 def to_console_title(self, message):
623 if not self.params.get('consoletitle', False):
624 return
4bede0d8
C
625 if compat_os_name == 'nt':
626 if ctypes.windll.kernel32.GetConsoleWindow():
627 # c_wchar_p() might not be necessary if `message` is
628 # already of type unicode()
629 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 630 elif 'TERM' in os.environ:
b46696bd 631 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 632
bdde425c
PH
633 def save_console_title(self):
634 if not self.params.get('consoletitle', False):
635 return
94c3442e
S
636 if self.params.get('simulate', False):
637 return
4bede0d8 638 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 639 # Save the title on stack
734f90bb 640 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
641
642 def restore_console_title(self):
643 if not self.params.get('consoletitle', False):
644 return
94c3442e
S
645 if self.params.get('simulate', False):
646 return
4bede0d8 647 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 648 # Restore the title from stack
734f90bb 649 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
650
651 def __enter__(self):
652 self.save_console_title()
653 return self
654
655 def __exit__(self, *args):
656 self.restore_console_title()
f89197d7 657
dca08720 658 if self.params.get('cookiefile') is not None:
1bab3437 659 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 660
8222d8de
JMF
661 def trouble(self, message=None, tb=None):
662 """Determine action to take when a download problem appears.
663
664 Depending on if the downloader has been configured to ignore
665 download errors or not, this method may throw an exception or
666 not when errors are found, after printing the message.
667
668 tb, if given, is additional traceback information.
669 """
670 if message is not None:
671 self.to_stderr(message)
672 if self.params.get('verbose'):
673 if tb is None:
674 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 675 tb = ''
8222d8de 676 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 677 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 678 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
679 else:
680 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 681 tb = ''.join(tb_data)
8222d8de
JMF
682 self.to_stderr(tb)
683 if not self.params.get('ignoreerrors', False):
684 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
685 exc_info = sys.exc_info()[1].exc_info
686 else:
687 exc_info = sys.exc_info()
688 raise DownloadError(message, exc_info)
689 self._download_retcode = 1
690
691 def report_warning(self, message):
692 '''
693 Print the message to stderr, it will be prefixed with 'WARNING:'
694 If stderr is a tty file the 'WARNING:' will be colored
695 '''
6d07ce01
JMF
696 if self.params.get('logger') is not None:
697 self.params['logger'].warning(message)
8222d8de 698 else:
ad8915b7
PH
699 if self.params.get('no_warnings'):
700 return
e9c0cdd3 701 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
702 _msg_header = '\033[0;33mWARNING:\033[0m'
703 else:
704 _msg_header = 'WARNING:'
705 warning_message = '%s %s' % (_msg_header, message)
706 self.to_stderr(warning_message)
8222d8de
JMF
707
708 def report_error(self, message, tb=None):
709 '''
710 Do the same as trouble, but prefixes the message with 'ERROR:', colored
711 in red if stderr is a tty file.
712 '''
e9c0cdd3 713 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 714 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 715 else:
6febd1c1
PH
716 _msg_header = 'ERROR:'
717 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
718 self.trouble(error_message, tb)
719
8222d8de
JMF
720 def report_file_already_downloaded(self, file_name):
721 """Report file has already been fully downloaded."""
722 try:
6febd1c1 723 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 724 except UnicodeEncodeError:
6febd1c1 725 self.to_screen('[download] The file has already been downloaded')
8222d8de 726
0c3d0f51 727 def report_file_delete(self, file_name):
728 """Report that existing file will be deleted."""
729 try:
730 self.to_screen('Deleting already existent file %s' % file_name)
731 except UnicodeEncodeError:
732 self.to_screen('Deleting already existent file')
733
de6000d9 734 def parse_outtmpl(self):
735 outtmpl_dict = self.params.get('outtmpl', {})
736 if not isinstance(outtmpl_dict, dict):
737 outtmpl_dict = {'default': outtmpl_dict}
738 outtmpl_dict.update({
739 k: v for k, v in DEFAULT_OUTTMPL.items()
740 if not outtmpl_dict.get(k)})
741 for key, val in outtmpl_dict.items():
742 if isinstance(val, bytes):
743 self.report_warning(
744 'Parameter outtmpl is bytes, but should be a unicode string. '
745 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
746 return outtmpl_dict
747
748 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de
JMF
749 try:
750 template_dict = dict(info_dict)
751
e29663c6 752 template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
753 formatSeconds(info_dict['duration'], '-')
754 if info_dict.get('duration', None) is not None
755 else None)
756
8222d8de
JMF
757 template_dict['epoch'] = int(time.time())
758 autonumber_size = self.params.get('autonumber_size')
759 if autonumber_size is None:
760 autonumber_size = 5
89db639d 761 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
17b75c0d
PH
762 if template_dict.get('resolution') is None:
763 if template_dict.get('width') and template_dict.get('height'):
764 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
765 elif template_dict.get('height'):
805ef3c6 766 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 767 elif template_dict.get('width'):
51ce9117 768 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 769
586a91b6 770 sanitize = lambda k, v: sanitize_filename(
45598aab 771 compat_str(v),
1bb5c511 772 restricted=self.params.get('restrictfilenames'),
40df485f 773 is_id=(k == 'id' or k.endswith('_id')))
d0d9ade4 774 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
45598aab 775 for k, v in template_dict.items()
f0e14fdd 776 if v is not None and not isinstance(v, (list, tuple, dict)))
e29663c6 777 na = self.params.get('outtmpl_na_placeholder', 'NA')
778 template_dict = collections.defaultdict(lambda: na, template_dict)
8222d8de 779
de6000d9 780 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
781 force_ext = OUTTMPL_TYPES.get(tmpl_type)
d0d9ade4 782
89db639d
S
783 # For fields playlist_index and autonumber convert all occurrences
784 # of %(field)s to %(field)0Nd for backward compatibility
785 field_size_compat_map = {
786 'playlist_index': len(str(template_dict['n_entries'])),
787 'autonumber': autonumber_size,
788 }
789 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
790 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
791 if mobj:
792 outtmpl = re.sub(
793 FIELD_SIZE_COMPAT_RE,
794 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
795 outtmpl)
796
e29663c6 797 # As of [1] format syntax is:
798 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
799 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
800 FORMAT_RE = r'''(?x)
801 (?<!%)
802 %
803 \({0}\) # mapping key
804 (?:[#0\-+ ]+)? # conversion flags (optional)
805 (?:\d+)? # minimum field width (optional)
806 (?:\.\d+)? # precision (optional)
807 [hlL]? # length modifier (optional)
808 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
809 '''
810
811 numeric_fields = list(self._NUMERIC_FIELDS)
812
813 # Format date
814 FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
815 for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
816 conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
817 if key in template_dict:
818 continue
819 value = strftime_or_none(template_dict.get(field), frmt, na)
820 if conv_type in 'crs': # string
821 value = sanitize(field, value)
822 else: # number
823 numeric_fields.append(key)
824 value = float_or_none(value, default=None)
825 if value is not None:
826 template_dict[key] = value
827
d0d9ade4
S
828 # Missing numeric fields used together with integer presentation types
829 # in format specification will break the argument substitution since
a820dc72
RA
830 # string NA placeholder is returned for missing fields. We will patch
831 # output template for missing fields to meet string presentation type.
e29663c6 832 for numeric_field in numeric_fields:
d0d9ade4 833 if numeric_field not in template_dict:
d0d9ade4 834 outtmpl = re.sub(
e29663c6 835 FORMAT_RE.format(re.escape(numeric_field)),
d0d9ade4
S
836 r'%({0})s'.format(numeric_field), outtmpl)
837
15da37c7
S
838 # expand_path translates '%%' into '%' and '$$' into '$'
839 # correspondingly that is not what we want since we need to keep
840 # '%%' intact for template dict substitution step. Working around
841 # with boundary-alike separator hack.
961ea474 842 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
843 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
844
845 # outtmpl should be expand_path'ed before template dict substitution
846 # because meta fields may contain env variables we don't want to
847 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
848 # title "Hello $PATH", we don't want `$PATH` to be expanded.
849 filename = expand_path(outtmpl).replace(sep, '') % template_dict
850
de6000d9 851 if force_ext is not None:
852 filename = replace_extension(filename, force_ext, template_dict.get('ext'))
853
bdc3fd2f
U
854 # https://github.com/blackjack4494/youtube-dlc/issues/85
855 trim_file_name = self.params.get('trim_file_name', False)
856 if trim_file_name:
857 fn_groups = filename.rsplit('.')
858 ext = fn_groups[-1]
859 sub_ext = ''
860 if len(fn_groups) > 2:
861 sub_ext = fn_groups[-2]
862 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
863
3a0d2f52
S
864 # Temporary fix for #4787
865 # 'Treat' all problem characters by passing filename through preferredencoding
866 # to workaround encoding issues with subprocess on python2 @ Windows
867 if sys.version_info < (3, 0) and sys.platform == 'win32':
868 filename = encodeFilename(filename, True).decode(preferredencoding())
0202b52a 869 filename = sanitize_path(filename)
870
0202b52a 871 return filename
8222d8de 872 except ValueError as err:
6febd1c1 873 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
874 return None
875
de6000d9 876 def prepare_filename(self, info_dict, dir_type='', warn=False):
877 """Generate the output filename."""
0202b52a 878 paths = self.params.get('paths', {})
879 assert isinstance(paths, dict)
de6000d9 880 filename = self._prepare_filename(info_dict, dir_type or 'default')
881
882 if warn and not self.__prepare_filename_warned:
883 if not paths:
884 pass
885 elif filename == '-':
886 self.report_warning('--paths is ignored when an outputting to stdout')
887 elif os.path.isabs(filename):
888 self.report_warning('--paths is ignored since an absolute path is given in output template')
889 self.__prepare_filename_warned = True
890 if filename == '-' or not filename:
891 return filename
892
0202b52a 893 homepath = expand_path(paths.get('home', '').strip())
894 assert isinstance(homepath, compat_str)
895 subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
896 assert isinstance(subdir, compat_str)
897 return sanitize_path(os.path.join(homepath, subdir, filename))
898
442c37b7 899 def _match_entry(self, info_dict, incomplete):
ecdec191 900 """ Returns None if the file should be downloaded """
8222d8de 901
8b0d7497 902 def check_filter():
903 video_title = info_dict.get('title', info_dict.get('id', 'video'))
904 if 'title' in info_dict:
905 # This can happen when we're just evaluating the playlist
906 title = info_dict['title']
907 matchtitle = self.params.get('matchtitle', False)
908 if matchtitle:
909 if not re.search(matchtitle, title, re.IGNORECASE):
910 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
911 rejecttitle = self.params.get('rejecttitle', False)
912 if rejecttitle:
913 if re.search(rejecttitle, title, re.IGNORECASE):
914 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
915 date = info_dict.get('upload_date')
916 if date is not None:
917 dateRange = self.params.get('daterange', DateRange())
918 if date not in dateRange:
919 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
920 view_count = info_dict.get('view_count')
921 if view_count is not None:
922 min_views = self.params.get('min_views')
923 if min_views is not None and view_count < min_views:
924 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
925 max_views = self.params.get('max_views')
926 if max_views is not None and view_count > max_views:
927 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
928 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
929 return 'Skipping "%s" because it is age restricted' % video_title
930 if self.in_download_archive(info_dict):
931 return '%s has already been recorded in archive' % video_title
932
933 if not incomplete:
934 match_filter = self.params.get('match_filter')
935 if match_filter is not None:
936 ret = match_filter(info_dict)
937 if ret is not None:
938 return ret
939 return None
940
941 reason = check_filter()
942 if reason is not None:
943 self.to_screen('[download] ' + reason)
d83cb531 944 if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
8b0d7497 945 raise ExistingVideoReached()
d83cb531 946 elif self.params.get('break_on_reject', False):
8b0d7497 947 raise RejectedVideoReached()
948 return reason
fe7e0c98 949
b6c45014
JMF
950 @staticmethod
951 def add_extra_info(info_dict, extra_info):
952 '''Set the keys from extra_info in info dict if they are missing'''
953 for key, value in extra_info.items():
954 info_dict.setdefault(key, value)
955
0704d222 956 def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
61aa5ba3 957 process=True, force_generic_extractor=False):
8222d8de
JMF
958 '''
959 Returns a list with a dictionary for each video we find.
960 If 'download', also downloads the videos.
961 extra_info is a dict containing the extra values to add to each result
613b2d9d 962 '''
fe7e0c98 963
61aa5ba3 964 if not ie_key and force_generic_extractor:
d22dec74
S
965 ie_key = 'Generic'
966
8222d8de 967 if ie_key:
56c73665 968 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
969 else:
970 ies = self._ies
971
972 for ie in ies:
973 if not ie.suitable(url):
974 continue
975
9a68de12 976 ie_key = ie.ie_key()
977 ie = self.get_info_extractor(ie_key)
8222d8de 978 if not ie.working():
6febd1c1
PH
979 self.report_warning('The program functionality for this site has been marked as broken, '
980 'and will probably not work.')
8222d8de
JMF
981
982 try:
d0757229 983 temp_id = str_or_none(
63be1aab 984 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
985 else ie._match_id(url))
a0566bbf 986 except (AssertionError, IndexError, AttributeError):
987 temp_id = None
988 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
989 self.to_screen("[%s] %s: has already been recorded in archive" % (
990 ie_key, temp_id))
991 break
a0566bbf 992 return self.__extract_info(url, ie, download, extra_info, process, info_dict)
a0566bbf 993 else:
994 self.report_error('no suitable InfoExtractor for URL %s' % url)
995
996 def __handle_extraction_exceptions(func):
997 def wrapper(self, *args, **kwargs):
998 try:
999 return func(self, *args, **kwargs)
773f291d
S
1000 except GeoRestrictedError as e:
1001 msg = e.msg
1002 if e.countries:
1003 msg += '\nThis video is available in %s.' % ', '.join(
1004 map(ISO3166Utils.short2full, e.countries))
1005 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1006 self.report_error(msg)
fb043a6e 1007 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1008 self.report_error(compat_str(e), e.format_traceback())
8b0d7497 1009 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1010 raise
8222d8de
JMF
1011 except Exception as e:
1012 if self.params.get('ignoreerrors', False):
9b9c5355 1013 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1014 else:
1015 raise
a0566bbf 1016 return wrapper
1017
1018 @__handle_extraction_exceptions
1019 def __extract_info(self, url, ie, download, extra_info, process, info_dict):
1020 ie_result = ie.extract(url)
1021 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1022 return
1023 if isinstance(ie_result, list):
1024 # Backwards compatibility: old IE result format
1025 ie_result = {
1026 '_type': 'compat_list',
1027 'entries': ie_result,
1028 }
1029 if info_dict:
1030 if info_dict.get('id'):
1031 ie_result['id'] = info_dict['id']
1032 if info_dict.get('title'):
1033 ie_result['title'] = info_dict['title']
1034 self.add_default_extra_info(ie_result, ie, url)
1035 if process:
1036 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1037 else:
a0566bbf 1038 return ie_result
fe7e0c98 1039
ea38e55f
PH
1040 def add_default_extra_info(self, ie_result, ie, url):
1041 self.add_extra_info(ie_result, {
1042 'extractor': ie.IE_NAME,
1043 'webpage_url': url,
1044 'webpage_url_basename': url_basename(url),
1045 'extractor_key': ie.ie_key(),
1046 })
1047
8222d8de
JMF
1048 def process_ie_result(self, ie_result, download=True, extra_info={}):
1049 """
1050 Take the result of the ie(may be modified) and resolve all unresolved
1051 references (URLs, playlist items).
1052
1053 It will also download the videos if 'download'.
1054 Returns the resolved ie_result.
1055 """
e8ee972c
PH
1056 result_type = ie_result.get('_type', 'video')
1057
057a5206 1058 if result_type in ('url', 'url_transparent'):
134c6ea8 1059 ie_result['url'] = sanitize_url(ie_result['url'])
057a5206 1060 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1061 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1062 or extract_flat is True):
de6000d9 1063 self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
e8ee972c
PH
1064 return ie_result
1065
8222d8de 1066 if result_type == 'video':
b6c45014 1067 self.add_extra_info(ie_result, extra_info)
feee2ecf 1068 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
1069 elif result_type == 'url':
1070 # We have to add extra_info to the results because it may be
1071 # contained in a playlist
1072 return self.extract_info(ie_result['url'],
0704d222 1073 download, info_dict=ie_result,
8222d8de
JMF
1074 ie_key=ie_result.get('ie_key'),
1075 extra_info=extra_info)
7fc3fa05
PH
1076 elif result_type == 'url_transparent':
1077 # Use the information from the embedding page
1078 info = self.extract_info(
1079 ie_result['url'], ie_key=ie_result.get('ie_key'),
1080 extra_info=extra_info, download=False, process=False)
1081
1640eb09
S
1082 # extract_info may return None when ignoreerrors is enabled and
1083 # extraction failed with an error, don't crash and return early
1084 # in this case
1085 if not info:
1086 return info
1087
412c617d
PH
1088 force_properties = dict(
1089 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1090 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1091 if f in force_properties:
1092 del force_properties[f]
1093 new_result = info.copy()
1094 new_result.update(force_properties)
7fc3fa05 1095
0563f7ac
S
1096 # Extracted info may not be a video result (i.e.
1097 # info.get('_type', 'video') != video) but rather an url or
1098 # url_transparent. In such cases outer metadata (from ie_result)
1099 # should be propagated to inner one (info). For this to happen
1100 # _type of info should be overridden with url_transparent. This
067aa17e 1101 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1102 if new_result.get('_type') == 'url':
1103 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1104
1105 return self.process_ie_result(
1106 new_result, download=download, extra_info=extra_info)
40fcba5e 1107 elif result_type in ('playlist', 'multi_video'):
30a074c2 1108 # Protect from infinite recursion due to recursively nested playlists
1109 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1110 webpage_url = ie_result['webpage_url']
1111 if webpage_url in self._playlist_urls:
7e85e872 1112 self.to_screen(
30a074c2 1113 '[download] Skipping already downloaded playlist: %s'
1114 % ie_result.get('title') or ie_result.get('id'))
1115 return
7e85e872 1116
30a074c2 1117 self._playlist_level += 1
1118 self._playlist_urls.add(webpage_url)
1119 try:
1120 return self.__process_playlist(ie_result, download)
1121 finally:
1122 self._playlist_level -= 1
1123 if not self._playlist_level:
1124 self._playlist_urls.clear()
8222d8de 1125 elif result_type == 'compat_list':
c9bf4114
PH
1126 self.report_warning(
1127 'Extractor %s returned a compat_list result. '
1128 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1129
8222d8de 1130 def _fixup(r):
9e1a5b84
JW
1131 self.add_extra_info(
1132 r,
9103bbc5
JMF
1133 {
1134 'extractor': ie_result['extractor'],
1135 'webpage_url': ie_result['webpage_url'],
29eb5174 1136 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1137 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1138 }
1139 )
8222d8de
JMF
1140 return r
1141 ie_result['entries'] = [
b6c45014 1142 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1143 for r in ie_result['entries']
1144 ]
1145 return ie_result
1146 else:
1147 raise Exception('Invalid result type: %s' % result_type)
1148
30a074c2 1149 def __process_playlist(self, ie_result, download):
1150 # We process each entry in the playlist
1151 playlist = ie_result.get('title') or ie_result.get('id')
1152 self.to_screen('[download] Downloading playlist: %s' % playlist)
1153
cac96421 1154 if self.params.get('allow_playlist_files', True):
1155 ie_copy = {
1156 'playlist': playlist,
1157 'playlist_id': ie_result.get('id'),
1158 'playlist_title': ie_result.get('title'),
1159 'playlist_uploader': ie_result.get('uploader'),
1160 'playlist_uploader_id': ie_result.get('uploader_id'),
1161 'playlist_index': 0
1162 }
1163 ie_copy.update(dict(ie_result))
02fd60d3 1164
cac96421 1165 def ensure_dir_exists(path):
1166 return make_dir(path, self.report_error)
1167
1168 if self.params.get('writeinfojson', False):
de6000d9 1169 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
cac96421 1170 if not ensure_dir_exists(encodeFilename(infofn)):
02fd60d3 1171 return
b9d973be 1172 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
cac96421 1173 self.to_screen('[info] Playlist metadata is already present')
1174 else:
cac96421 1175 playlist_info = dict(ie_result)
18590cec 1176 # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
1177 del playlist_info['entries']
1178 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
cac96421 1179 try:
1180 write_json_file(self.filter_requested_info(playlist_info), infofn)
1181 except (OSError, IOError):
1182 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1183
1184 if self.params.get('writedescription', False):
de6000d9 1185 descfn = self.prepare_filename(ie_copy, 'pl_description')
cac96421 1186 if not ensure_dir_exists(encodeFilename(descfn)):
1187 return
1188 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1189 self.to_screen('[info] Playlist description is already present')
1190 elif ie_result.get('description') is None:
1191 self.report_warning('There\'s no playlist description to write.')
1192 else:
1193 try:
1194 self.to_screen('[info] Writing playlist description to: ' + descfn)
1195 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1196 descfile.write(ie_result['description'])
1197 except (OSError, IOError):
1198 self.report_error('Cannot write playlist description file ' + descfn)
1199 return
02fd60d3 1200
30a074c2 1201 playlist_results = []
1202
1203 playliststart = self.params.get('playliststart', 1) - 1
1204 playlistend = self.params.get('playlistend')
1205 # For backwards compatibility, interpret -1 as whole list
1206 if playlistend == -1:
1207 playlistend = None
1208
1209 playlistitems_str = self.params.get('playlist_items')
1210 playlistitems = None
1211 if playlistitems_str is not None:
1212 def iter_playlistitems(format):
1213 for string_segment in format.split(','):
1214 if '-' in string_segment:
1215 start, end = string_segment.split('-')
1216 for item in range(int(start), int(end) + 1):
1217 yield int(item)
1218 else:
1219 yield int(string_segment)
1220 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1221
1222 ie_entries = ie_result['entries']
1223
1224 def make_playlistitems_entries(list_ie_entries):
1225 num_entries = len(list_ie_entries)
1226 return [
1227 list_ie_entries[i - 1] for i in playlistitems
1228 if -num_entries <= i - 1 < num_entries]
1229
1230 def report_download(num_entries):
1231 self.to_screen(
1232 '[%s] playlist %s: Downloading %d videos' %
1233 (ie_result['extractor'], playlist, num_entries))
1234
1235 if isinstance(ie_entries, list):
1236 n_all_entries = len(ie_entries)
1237 if playlistitems:
1238 entries = make_playlistitems_entries(ie_entries)
1239 else:
1240 entries = ie_entries[playliststart:playlistend]
1241 n_entries = len(entries)
1242 self.to_screen(
1243 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1244 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1245 elif isinstance(ie_entries, PagedList):
1246 if playlistitems:
1247 entries = []
1248 for item in playlistitems:
1249 entries.extend(ie_entries.getslice(
1250 item - 1, item
1251 ))
1252 else:
1253 entries = ie_entries.getslice(
1254 playliststart, playlistend)
1255 n_entries = len(entries)
1256 report_download(n_entries)
1257 else: # iterable
1258 if playlistitems:
1259 entries = make_playlistitems_entries(list(itertools.islice(
1260 ie_entries, 0, max(playlistitems))))
1261 else:
1262 entries = list(itertools.islice(
1263 ie_entries, playliststart, playlistend))
1264 n_entries = len(entries)
1265 report_download(n_entries)
1266
1267 if self.params.get('playlistreverse', False):
1268 entries = entries[::-1]
1269
1270 if self.params.get('playlistrandom', False):
1271 random.shuffle(entries)
1272
1273 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1274
1275 for i, entry in enumerate(entries, 1):
1276 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1277 # This __x_forwarded_for_ip thing is a bit ugly but requires
1278 # minimal changes
1279 if x_forwarded_for:
1280 entry['__x_forwarded_for_ip'] = x_forwarded_for
1281 extra = {
1282 'n_entries': n_entries,
1283 'playlist': playlist,
1284 'playlist_id': ie_result.get('id'),
1285 'playlist_title': ie_result.get('title'),
1286 'playlist_uploader': ie_result.get('uploader'),
1287 'playlist_uploader_id': ie_result.get('uploader_id'),
1288 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1289 'extractor': ie_result['extractor'],
1290 'webpage_url': ie_result['webpage_url'],
1291 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1292 'extractor_key': ie_result['extractor_key'],
1293 }
1294
1295 if self._match_entry(entry, incomplete=True) is not None:
1296 continue
1297
1298 entry_result = self.__process_iterable_entry(entry, download, extra)
1299 # TODO: skip failed (empty) entries?
1300 playlist_results.append(entry_result)
1301 ie_result['entries'] = playlist_results
1302 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1303 return ie_result
1304
a0566bbf 1305 @__handle_extraction_exceptions
1306 def __process_iterable_entry(self, entry, download, extra_info):
1307 return self.process_ie_result(
1308 entry, download=download, extra_info=extra_info)
1309
67134eab
JMF
1310 def _build_format_filter(self, filter_spec):
1311 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1312
1313 OPERATORS = {
1314 '<': operator.lt,
1315 '<=': operator.le,
1316 '>': operator.gt,
1317 '>=': operator.ge,
1318 '=': operator.eq,
1319 '!=': operator.ne,
1320 }
67134eab 1321 operator_rex = re.compile(r'''(?x)\s*
a03a3c80 1322 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
083c9df9
PH
1323 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1324 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 1325 $
083c9df9 1326 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 1327 m = operator_rex.search(filter_spec)
9ddb6925
S
1328 if m:
1329 try:
1330 comparison_value = int(m.group('value'))
1331 except ValueError:
1332 comparison_value = parse_filesize(m.group('value'))
1333 if comparison_value is None:
1334 comparison_value = parse_filesize(m.group('value') + 'B')
1335 if comparison_value is None:
1336 raise ValueError(
1337 'Invalid value %r in format specification %r' % (
67134eab 1338 m.group('value'), filter_spec))
9ddb6925
S
1339 op = OPERATORS[m.group('op')]
1340
083c9df9 1341 if not m:
9ddb6925
S
1342 STR_OPERATORS = {
1343 '=': operator.eq,
10d33b34
YCH
1344 '^=': lambda attr, value: attr.startswith(value),
1345 '$=': lambda attr, value: attr.endswith(value),
1346 '*=': lambda attr, value: value in attr,
9ddb6925 1347 }
67134eab 1348 str_operator_rex = re.compile(r'''(?x)
f96bff99 1349 \s*(?P<key>[a-zA-Z0-9._-]+)
2cc779f4 1350 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 1351 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 1352 \s*$
9ddb6925 1353 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 1354 m = str_operator_rex.search(filter_spec)
9ddb6925
S
1355 if m:
1356 comparison_value = m.group('value')
2cc779f4
S
1357 str_op = STR_OPERATORS[m.group('op')]
1358 if m.group('negation'):
e118a879 1359 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1360 else:
1361 op = str_op
083c9df9 1362
9ddb6925 1363 if not m:
67134eab 1364 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1365
1366 def _filter(f):
1367 actual_value = f.get(m.group('key'))
1368 if actual_value is None:
1369 return m.group('none_inclusive')
1370 return op(actual_value, comparison_value)
67134eab
JMF
1371 return _filter
1372
0017d9ad 1373 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1374
af0f7428
S
1375 def can_merge():
1376 merger = FFmpegMergerPP(self)
1377 return merger.available and merger.can_merge()
1378
91ebc640 1379 prefer_best = (
1380 not self.params.get('simulate', False)
1381 and download
1382 and (
1383 not can_merge()
19807826 1384 or info_dict.get('is_live', False)
de6000d9 1385 or self.outtmpl_dict['default'] == '-'))
91ebc640 1386
1387 return (
1388 'best/bestvideo+bestaudio'
1389 if prefer_best
1390 else 'bestvideo*+bestaudio/best'
19807826 1391 if not self.params.get('allow_multiple_audio_streams', False)
91ebc640 1392 else 'bestvideo+bestaudio/best')
0017d9ad 1393
67134eab
JMF
1394 def build_format_selector(self, format_spec):
1395 def syntax_error(note, start):
1396 message = (
1397 'Invalid format specification: '
1398 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1399 return SyntaxError(message)
1400
1401 PICKFIRST = 'PICKFIRST'
1402 MERGE = 'MERGE'
1403 SINGLE = 'SINGLE'
0130afb7 1404 GROUP = 'GROUP'
67134eab
JMF
1405 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1406
91ebc640 1407 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1408 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1409
67134eab
JMF
1410 def _parse_filter(tokens):
1411 filter_parts = []
1412 for type, string, start, _, _ in tokens:
1413 if type == tokenize.OP and string == ']':
1414 return ''.join(filter_parts)
1415 else:
1416 filter_parts.append(string)
1417
232541df 1418 def _remove_unused_ops(tokens):
17cc1534 1419 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1420 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1421 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1422 last_string, last_start, last_end, last_line = None, None, None, None
1423 for type, string, start, end, line in tokens:
1424 if type == tokenize.OP and string == '[':
1425 if last_string:
1426 yield tokenize.NAME, last_string, last_start, last_end, last_line
1427 last_string = None
1428 yield type, string, start, end, line
1429 # everything inside brackets will be handled by _parse_filter
1430 for type, string, start, end, line in tokens:
1431 yield type, string, start, end, line
1432 if type == tokenize.OP and string == ']':
1433 break
1434 elif type == tokenize.OP and string in ALLOWED_OPS:
1435 if last_string:
1436 yield tokenize.NAME, last_string, last_start, last_end, last_line
1437 last_string = None
1438 yield type, string, start, end, line
1439 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1440 if not last_string:
1441 last_string = string
1442 last_start = start
1443 last_end = end
1444 else:
1445 last_string += string
1446 if last_string:
1447 yield tokenize.NAME, last_string, last_start, last_end, last_line
1448
cf2ac6df 1449 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1450 selectors = []
1451 current_selector = None
1452 for type, string, start, _, _ in tokens:
1453 # ENCODING is only defined in python 3.x
1454 if type == getattr(tokenize, 'ENCODING', None):
1455 continue
1456 elif type in [tokenize.NAME, tokenize.NUMBER]:
1457 current_selector = FormatSelector(SINGLE, string, [])
1458 elif type == tokenize.OP:
cf2ac6df
JMF
1459 if string == ')':
1460 if not inside_group:
1461 # ')' will be handled by the parentheses group
1462 tokens.restore_last_token()
67134eab 1463 break
cf2ac6df 1464 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1465 tokens.restore_last_token()
1466 break
cf2ac6df
JMF
1467 elif inside_choice and string == ',':
1468 tokens.restore_last_token()
1469 break
1470 elif string == ',':
0a31a350
JMF
1471 if not current_selector:
1472 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1473 selectors.append(current_selector)
1474 current_selector = None
1475 elif string == '/':
d96d604e
JMF
1476 if not current_selector:
1477 raise syntax_error('"/" must follow a format selector', start)
67134eab 1478 first_choice = current_selector
cf2ac6df 1479 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1480 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1481 elif string == '[':
1482 if not current_selector:
1483 current_selector = FormatSelector(SINGLE, 'best', [])
1484 format_filter = _parse_filter(tokens)
1485 current_selector.filters.append(format_filter)
0130afb7
JMF
1486 elif string == '(':
1487 if current_selector:
1488 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1489 group = _parse_format_selection(tokens, inside_group=True)
1490 current_selector = FormatSelector(GROUP, group, [])
67134eab 1491 elif string == '+':
d03cfdce 1492 if not current_selector:
1493 raise syntax_error('Unexpected "+"', start)
1494 selector_1 = current_selector
1495 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1496 if not selector_2:
1497 raise syntax_error('Expected a selector', start)
1498 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1499 else:
1500 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1501 elif type == tokenize.ENDMARKER:
1502 break
1503 if current_selector:
1504 selectors.append(current_selector)
1505 return selectors
1506
1507 def _build_selector_function(selector):
909d24dd 1508 if isinstance(selector, list): # ,
67134eab
JMF
1509 fs = [_build_selector_function(s) for s in selector]
1510
317f7ab6 1511 def selector_function(ctx):
67134eab 1512 for f in fs:
317f7ab6 1513 for format in f(ctx):
67134eab
JMF
1514 yield format
1515 return selector_function
909d24dd 1516
1517 elif selector.type == GROUP: # ()
0130afb7 1518 selector_function = _build_selector_function(selector.selector)
909d24dd 1519
1520 elif selector.type == PICKFIRST: # /
67134eab
JMF
1521 fs = [_build_selector_function(s) for s in selector.selector]
1522
317f7ab6 1523 def selector_function(ctx):
67134eab 1524 for f in fs:
317f7ab6 1525 picked_formats = list(f(ctx))
67134eab
JMF
1526 if picked_formats:
1527 return picked_formats
1528 return []
67134eab 1529
909d24dd 1530 elif selector.type == SINGLE: # atom
1531 format_spec = selector.selector if selector.selector is not None else 'best'
1532
1533 if format_spec == 'all':
1534 def selector_function(ctx):
1535 formats = list(ctx['formats'])
1536 if formats:
1537 for f in formats:
1538 yield f
1539
1540 else:
1541 format_fallback = False
1542 format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
1543 if format_spec_obj is not None:
1544 format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
1545 format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
1546 not_format_type = 'v' if format_type == 'a' else 'a'
1547 format_modified = format_spec_obj.group(3) is not None
1548
1549 format_fallback = not format_type and not format_modified # for b, w
1550 filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
1551 if format_type and format_modified # bv*, ba*, wv*, wa*
1552 else (lambda f: f.get(not_format_type + 'codec') == 'none')
1553 if format_type # bv, ba, wv, wa
1554 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1555 if not format_modified # b, w
1556 else None) # b*, w*
67134eab 1557 else:
909d24dd 1558 format_idx = -1
1559 filter_f = ((lambda f: f.get('ext') == format_spec)
1560 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1561 else (lambda f: f.get('format_id') == format_spec)) # id
1562
1563 def selector_function(ctx):
1564 formats = list(ctx['formats'])
1565 if not formats:
1566 return
1567 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
67134eab 1568 if matches:
909d24dd 1569 yield matches[format_idx]
1570 elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
1571 # for extractors with incomplete formats (audio only (soundcloud)
1572 # or video only (imgur)) best/worst will fallback to
1573 # best/worst {video,audio}-only format
1574 yield formats[format_idx]
1575
1576 elif selector.type == MERGE: # +
d03cfdce 1577 def _merge(formats_pair):
1578 format_1, format_2 = formats_pair
1579
1580 formats_info = []
1581 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1582 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1583
909d24dd 1584 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
1585 get_no_more = {"video": False, "audio": False}
1586 for (i, fmt_info) in enumerate(formats_info):
1587 for aud_vid in ["audio", "video"]:
1588 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1589 if get_no_more[aud_vid]:
1590 formats_info.pop(i)
1591 get_no_more[aud_vid] = True
1592
1593 if len(formats_info) == 1:
1594 return formats_info[0]
1595
d03cfdce 1596 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1597 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1598
1599 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1600 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1601
1602 output_ext = self.params.get('merge_output_format')
1603 if not output_ext:
1604 if the_only_video:
1605 output_ext = the_only_video['ext']
1606 elif the_only_audio and not video_fmts:
1607 output_ext = the_only_audio['ext']
1608 else:
1609 output_ext = 'mkv'
1610
1611 new_dict = {
67134eab 1612 'requested_formats': formats_info,
d03cfdce 1613 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1614 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
67134eab
JMF
1615 'ext': output_ext,
1616 }
d03cfdce 1617
1618 if the_only_video:
1619 new_dict.update({
1620 'width': the_only_video.get('width'),
1621 'height': the_only_video.get('height'),
1622 'resolution': the_only_video.get('resolution'),
1623 'fps': the_only_video.get('fps'),
1624 'vcodec': the_only_video.get('vcodec'),
1625 'vbr': the_only_video.get('vbr'),
1626 'stretched_ratio': the_only_video.get('stretched_ratio'),
1627 })
1628
1629 if the_only_audio:
1630 new_dict.update({
1631 'acodec': the_only_audio.get('acodec'),
1632 'abr': the_only_audio.get('abr'),
1633 })
1634
1635 return new_dict
1636
1637 selector_1, selector_2 = map(_build_selector_function, selector.selector)
083c9df9 1638
317f7ab6
S
1639 def selector_function(ctx):
1640 for pair in itertools.product(
d03cfdce 1641 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
67134eab 1642 yield _merge(pair)
083c9df9 1643
67134eab 1644 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1645
317f7ab6
S
1646 def final_selector(ctx):
1647 ctx_copy = copy.deepcopy(ctx)
67134eab 1648 for _filter in filters:
317f7ab6
S
1649 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1650 return selector_function(ctx_copy)
67134eab 1651 return final_selector
083c9df9 1652
67134eab 1653 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1654 try:
232541df 1655 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1656 except tokenize.TokenError:
1657 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1658
1659 class TokenIterator(object):
1660 def __init__(self, tokens):
1661 self.tokens = tokens
1662 self.counter = 0
1663
1664 def __iter__(self):
1665 return self
1666
1667 def __next__(self):
1668 if self.counter >= len(self.tokens):
1669 raise StopIteration()
1670 value = self.tokens[self.counter]
1671 self.counter += 1
1672 return value
1673
1674 next = __next__
1675
1676 def restore_last_token(self):
1677 self.counter -= 1
1678
1679 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1680 return _build_selector_function(parsed_selector)
a9c58ad9 1681
e5660ee6
JMF
1682 def _calc_headers(self, info_dict):
1683 res = std_headers.copy()
1684
1685 add_headers = info_dict.get('http_headers')
1686 if add_headers:
1687 res.update(add_headers)
1688
1689 cookies = self._calc_cookies(info_dict)
1690 if cookies:
1691 res['Cookie'] = cookies
1692
0016b84e
S
1693 if 'X-Forwarded-For' not in res:
1694 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1695 if x_forwarded_for_ip:
1696 res['X-Forwarded-For'] = x_forwarded_for_ip
1697
e5660ee6
JMF
1698 return res
1699
1700 def _calc_cookies(self, info_dict):
5c2266df 1701 pr = sanitized_Request(info_dict['url'])
e5660ee6 1702 self.cookiejar.add_cookie_header(pr)
662435f7 1703 return pr.get_header('Cookie')
e5660ee6 1704
dd82ffea
JMF
1705 def process_video_result(self, info_dict, download=True):
1706 assert info_dict.get('_type', 'video') == 'video'
1707
bec1fad2
PH
1708 if 'id' not in info_dict:
1709 raise ExtractorError('Missing "id" field in extractor result')
1710 if 'title' not in info_dict:
1711 raise ExtractorError('Missing "title" field in extractor result')
1712
c9969434
S
1713 def report_force_conversion(field, field_not, conversion):
1714 self.report_warning(
1715 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1716 % (field, field_not, conversion))
1717
1718 def sanitize_string_field(info, string_field):
1719 field = info.get(string_field)
1720 if field is None or isinstance(field, compat_str):
1721 return
1722 report_force_conversion(string_field, 'a string', 'string')
1723 info[string_field] = compat_str(field)
1724
1725 def sanitize_numeric_fields(info):
1726 for numeric_field in self._NUMERIC_FIELDS:
1727 field = info.get(numeric_field)
1728 if field is None or isinstance(field, compat_numeric_types):
1729 continue
1730 report_force_conversion(numeric_field, 'numeric', 'int')
1731 info[numeric_field] = int_or_none(field)
1732
1733 sanitize_string_field(info_dict, 'id')
1734 sanitize_numeric_fields(info_dict)
be6217b2 1735
dd82ffea
JMF
1736 if 'playlist' not in info_dict:
1737 # It isn't part of a playlist
1738 info_dict['playlist'] = None
1739 info_dict['playlist_index'] = None
1740
d5519808 1741 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1742 if thumbnails is None:
1743 thumbnail = info_dict.get('thumbnail')
1744 if thumbnail:
a7a14d95 1745 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1746 if thumbnails:
be6d7229 1747 thumbnails.sort(key=lambda t: (
d37708fc
RA
1748 t.get('preference') if t.get('preference') is not None else -1,
1749 t.get('width') if t.get('width') is not None else -1,
1750 t.get('height') if t.get('height') is not None else -1,
1751 t.get('id') if t.get('id') is not None else '', t.get('url')))
f6c24009 1752 for i, t in enumerate(thumbnails):
dcf77cf1 1753 t['url'] = sanitize_url(t['url'])
9603e8a7 1754 if t.get('width') and t.get('height'):
d5519808 1755 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1756 if t.get('id') is None:
1757 t['id'] = '%d' % i
d5519808 1758
b7b72db9 1759 if self.params.get('list_thumbnails'):
1760 self.list_thumbnails(info_dict)
1761 return
1762
536a55da
S
1763 thumbnail = info_dict.get('thumbnail')
1764 if thumbnail:
1765 info_dict['thumbnail'] = sanitize_url(thumbnail)
1766 elif thumbnails:
d5519808
PH
1767 info_dict['thumbnail'] = thumbnails[-1]['url']
1768
c9ae7b95 1769 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1770 info_dict['display_id'] = info_dict['id']
1771
955c4514 1772 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1773 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1774 # see http://bugs.python.org/issue1646728)
1775 try:
1776 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1777 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1778 except (ValueError, OverflowError, OSError):
1779 pass
9d2ecdbc 1780
33d2fc2f
S
1781 # Auto generate title fields corresponding to the *_number fields when missing
1782 # in order to always have clean titles. This is very common for TV series.
1783 for field in ('chapter', 'season', 'episode'):
1784 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1785 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1786
05108a49
S
1787 for cc_kind in ('subtitles', 'automatic_captions'):
1788 cc = info_dict.get(cc_kind)
1789 if cc:
1790 for _, subtitle in cc.items():
1791 for subtitle_format in subtitle:
1792 if subtitle_format.get('url'):
1793 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1794 if subtitle_format.get('ext') is None:
1795 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1796
1797 automatic_captions = info_dict.get('automatic_captions')
4bba3716 1798 subtitles = info_dict.get('subtitles')
4bba3716 1799
a504ced0 1800 if self.params.get('listsubtitles', False):
360e1ca5 1801 if 'automatic_captions' in info_dict:
05108a49
S
1802 self.list_subtitles(
1803 info_dict['id'], automatic_captions, 'automatic captions')
4bba3716 1804 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1805 return
05108a49 1806
360e1ca5 1807 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 1808 info_dict['id'], subtitles, automatic_captions)
a504ced0 1809
dd82ffea
JMF
1810 # We now pick which formats have to be downloaded
1811 if info_dict.get('formats') is None:
1812 # There's only one format available
1813 formats = [info_dict]
1814 else:
1815 formats = info_dict['formats']
1816
db95dc13
PH
1817 if not formats:
1818 raise ExtractorError('No video formats found!')
1819
73af5cc8
S
1820 def is_wellformed(f):
1821 url = f.get('url')
a5ac0c47 1822 if not url:
73af5cc8
S
1823 self.report_warning(
1824 '"url" field is missing or empty - skipping format, '
1825 'there is an error in extractor')
a5ac0c47
S
1826 return False
1827 if isinstance(url, bytes):
1828 sanitize_string_field(f, 'url')
1829 return True
73af5cc8
S
1830
1831 # Filter out malformed formats for better extraction robustness
1832 formats = list(filter(is_wellformed, formats))
1833
181c7053
S
1834 formats_dict = {}
1835
dd82ffea 1836 # We check that all the formats have the format and format_id fields
db95dc13 1837 for i, format in enumerate(formats):
c9969434
S
1838 sanitize_string_field(format, 'format_id')
1839 sanitize_numeric_fields(format)
dcf77cf1 1840 format['url'] = sanitize_url(format['url'])
e74e3b63 1841 if not format.get('format_id'):
8016c922 1842 format['format_id'] = compat_str(i)
e2effb08
S
1843 else:
1844 # Sanitize format_id from characters used in format selector expression
ec85ded8 1845 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1846 format_id = format['format_id']
1847 if format_id not in formats_dict:
1848 formats_dict[format_id] = []
1849 formats_dict[format_id].append(format)
1850
1851 # Make sure all formats have unique format_id
1852 for format_id, ambiguous_formats in formats_dict.items():
1853 if len(ambiguous_formats) > 1:
1854 for i, format in enumerate(ambiguous_formats):
1855 format['format_id'] = '%s-%d' % (format_id, i)
1856
1857 for i, format in enumerate(formats):
8c51aa65 1858 if format.get('format') is None:
6febd1c1 1859 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1860 id=format['format_id'],
1861 res=self.format_resolution(format),
6febd1c1 1862 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1863 )
c1002e96 1864 # Automatically determine file extension if missing
5b1d8575 1865 if format.get('ext') is None:
cce929ea 1866 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1867 # Automatically determine protocol if missing (useful for format
1868 # selection purposes)
6f0be937 1869 if format.get('protocol') is None:
b5559424 1870 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1871 # Add HTTP headers, so that external programs can use them from the
1872 # json output
1873 full_format_info = info_dict.copy()
1874 full_format_info.update(format)
1875 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
1876 # Remove private housekeeping stuff
1877 if '__x_forwarded_for_ip' in info_dict:
1878 del info_dict['__x_forwarded_for_ip']
dd82ffea 1879
4bcc7bd1 1880 # TODO Central sorting goes here
99e206d5 1881
f89197d7 1882 if formats[0] is not info_dict:
b3d9ef88
JMF
1883 # only set the 'formats' fields if the original info_dict list them
1884 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1885 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1886 # which can't be exported to json
b3d9ef88 1887 info_dict['formats'] = formats
cfb56d1a 1888 if self.params.get('listformats'):
bfaae0a7 1889 self.list_formats(info_dict)
1890 return
1891
de3ef3ed 1892 req_format = self.params.get('format')
a9c58ad9 1893 if req_format is None:
0017d9ad
S
1894 req_format = self._default_format_spec(info_dict, download=download)
1895 if self.params.get('verbose'):
e8be92f9 1896 self.to_screen('[debug] Default format spec: %s' % req_format)
0017d9ad 1897
5acfa126 1898 format_selector = self.build_format_selector(req_format)
317f7ab6
S
1899
1900 # While in format selection we may need to have an access to the original
1901 # format set in order to calculate some metrics or do some processing.
1902 # For now we need to be able to guess whether original formats provided
1903 # by extractor are incomplete or not (i.e. whether extractor provides only
1904 # video-only or audio-only formats) for proper formats selection for
1905 # extractors with such incomplete formats (see
067aa17e 1906 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
1907 # Since formats may be filtered during format selection and may not match
1908 # the original formats the results may be incorrect. Thus original formats
1909 # or pre-calculated metrics should be passed to format selection routines
1910 # as well.
1911 # We will pass a context object containing all necessary additional data
1912 # instead of just formats.
1913 # This fixes incorrect format selection issue (see
067aa17e 1914 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 1915 incomplete_formats = (
317f7ab6 1916 # All formats are video-only or
3089bc74 1917 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 1918 # all formats are audio-only
3089bc74 1919 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
1920
1921 ctx = {
1922 'formats': formats,
1923 'incomplete_formats': incomplete_formats,
1924 }
1925
1926 formats_to_download = list(format_selector(ctx))
dd82ffea 1927 if not formats_to_download:
6febd1c1 1928 raise ExtractorError('requested format not available',
78a3a9f8 1929 expected=True)
dd82ffea
JMF
1930
1931 if download:
909d24dd 1932 self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
dd82ffea 1933 if len(formats_to_download) > 1:
6febd1c1 1934 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1935 for format in formats_to_download:
1936 new_info = dict(info_dict)
1937 new_info.update(format)
1938 self.process_info(new_info)
1939 # We update the info dict with the best quality format (backwards compatibility)
1940 info_dict.update(formats_to_download[-1])
1941 return info_dict
1942
98c70d6f 1943 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1944 """Select the requested subtitles and their format"""
98c70d6f
JMF
1945 available_subs = {}
1946 if normal_subtitles and self.params.get('writesubtitles'):
1947 available_subs.update(normal_subtitles)
1948 if automatic_captions and self.params.get('writeautomaticsub'):
1949 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1950 if lang not in available_subs:
1951 available_subs[lang] = cap_info
1952
4d171848
JMF
1953 if (not self.params.get('writesubtitles') and not
1954 self.params.get('writeautomaticsub') or not
1955 available_subs):
1956 return None
a504ced0
JMF
1957
1958 if self.params.get('allsubtitles', False):
1959 requested_langs = available_subs.keys()
1960 else:
1961 if self.params.get('subtitleslangs', False):
1962 requested_langs = self.params.get('subtitleslangs')
1963 elif 'en' in available_subs:
1964 requested_langs = ['en']
1965 else:
1966 requested_langs = [list(available_subs.keys())[0]]
1967
1968 formats_query = self.params.get('subtitlesformat', 'best')
1969 formats_preference = formats_query.split('/') if formats_query else []
1970 subs = {}
1971 for lang in requested_langs:
1972 formats = available_subs.get(lang)
1973 if formats is None:
1974 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1975 continue
a504ced0
JMF
1976 for ext in formats_preference:
1977 if ext == 'best':
1978 f = formats[-1]
1979 break
1980 matches = list(filter(lambda f: f['ext'] == ext, formats))
1981 if matches:
1982 f = matches[-1]
1983 break
1984 else:
1985 f = formats[-1]
1986 self.report_warning(
1987 'No subtitle format found matching "%s" for language %s, '
1988 'using %s' % (formats_query, lang, f['ext']))
1989 subs[lang] = f
1990 return subs
1991
d06daf23
S
1992 def __forced_printings(self, info_dict, filename, incomplete):
1993 def print_mandatory(field):
1994 if (self.params.get('force%s' % field, False)
1995 and (not incomplete or info_dict.get(field) is not None)):
1996 self.to_stdout(info_dict[field])
1997
1998 def print_optional(field):
1999 if (self.params.get('force%s' % field, False)
2000 and info_dict.get(field) is not None):
2001 self.to_stdout(info_dict[field])
2002
2003 print_mandatory('title')
2004 print_mandatory('id')
2005 if self.params.get('forceurl', False) and not incomplete:
2006 if info_dict.get('requested_formats') is not None:
2007 for f in info_dict['requested_formats']:
2008 self.to_stdout(f['url'] + f.get('play_path', ''))
2009 else:
2010 # For RTMP URLs, also include the playpath
2011 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
2012 print_optional('thumbnail')
2013 print_optional('description')
2014 if self.params.get('forcefilename', False) and filename is not None:
2015 self.to_stdout(filename)
2016 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2017 self.to_stdout(formatSeconds(info_dict['duration']))
2018 print_mandatory('format')
2019 if self.params.get('forcejson', False):
2020 self.to_stdout(json.dumps(info_dict))
2021
8222d8de
JMF
2022 def process_info(self, info_dict):
2023 """Process a single resolved IE result."""
2024
2025 assert info_dict.get('_type', 'video') == 'video'
fd288278 2026
0202b52a 2027 info_dict.setdefault('__postprocessors', [])
2028
fd288278
PH
2029 max_downloads = self.params.get('max_downloads')
2030 if max_downloads is not None:
2031 if self._num_downloads >= int(max_downloads):
2032 raise MaxDownloadsReached()
8222d8de 2033
d06daf23 2034 # TODO: backward compatibility, to be removed
8222d8de 2035 info_dict['fulltitle'] = info_dict['title']
8222d8de 2036
11b85ce6 2037 if 'format' not in info_dict:
8222d8de
JMF
2038 info_dict['format'] = info_dict['ext']
2039
8b0d7497 2040 if self._match_entry(info_dict, incomplete=False) is not None:
8222d8de
JMF
2041 return
2042
fd288278 2043 self._num_downloads += 1
8222d8de 2044
5bfa4862 2045 info_dict = self.pre_process(info_dict)
2046
de6000d9 2047 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2048 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2049 files_to_move = {}
de6000d9 2050 skip_dl = self.params.get('skip_download', False)
8222d8de
JMF
2051
2052 # Forced printings
0202b52a 2053 self.__forced_printings(info_dict, full_filename, incomplete=False)
8222d8de 2054
8222d8de 2055 if self.params.get('simulate', False):
2d30509f 2056 if self.params.get('force_write_download_archive', False):
2057 self.record_download_archive(info_dict)
2058
2059 # Do nothing else if in simulate mode
8222d8de
JMF
2060 return
2061
de6000d9 2062 if full_filename is None:
8222d8de
JMF
2063 return
2064
c5c9bf0c 2065 def ensure_dir_exists(path):
0202b52a 2066 return make_dir(path, self.report_error)
c5c9bf0c 2067
0202b52a 2068 if not ensure_dir_exists(encodeFilename(full_filename)):
2069 return
2070 if not ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2071 return
2072
2073 if self.params.get('writedescription', False):
de6000d9 2074 descfn = self.prepare_filename(info_dict, 'description')
0202b52a 2075 if not ensure_dir_exists(encodeFilename(descfn)):
2076 return
0c3d0f51 2077 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2078 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2079 elif info_dict.get('description') is None:
2080 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2081 else:
2082 try:
6febd1c1 2083 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2084 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2085 descfile.write(info_dict['description'])
7b6fefc9 2086 except (OSError, IOError):
6febd1c1 2087 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2088 return
8222d8de 2089
1fb07d10 2090 if self.params.get('writeannotations', False):
de6000d9 2091 annofn = self.prepare_filename(info_dict, 'annotation')
0202b52a 2092 if not ensure_dir_exists(encodeFilename(annofn)):
2093 return
0c3d0f51 2094 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2095 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2096 elif not info_dict.get('annotations'):
2097 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2098 else:
2099 try:
6febd1c1 2100 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2101 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2102 annofile.write(info_dict['annotations'])
2103 except (KeyError, TypeError):
6febd1c1 2104 self.report_warning('There are no annotations to write.')
7b6fefc9 2105 except (OSError, IOError):
6febd1c1 2106 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2107 return
1fb07d10 2108
9f448fcb 2109 def dl(name, info, subtitle=False):
98b69821 2110 fd = get_suitable_downloader(info, self.params)(self, self.params)
2111 for ph in self._progress_hooks:
2112 fd.add_progress_hook(ph)
2113 if self.params.get('verbose'):
29f7c58a 2114 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
9f448fcb 2115 return fd.download(name, info, subtitle)
98b69821 2116
c4a91be7 2117 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2118 self.params.get('writeautomaticsub')])
c4a91be7 2119
c84dd8a9 2120 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2121 # subtitles download errors are already managed as troubles in relevant IE
2122 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2123 subtitles = info_dict['requested_subtitles']
fa57af1e 2124 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2125 for sub_lang, sub_info in subtitles.items():
2126 sub_format = sub_info['ext']
de6000d9 2127 sub_fn = self.prepare_filename(info_dict, 'subtitle')
2128 sub_filename = subtitles_filename(
2129 temp_filename if not skip_dl else sub_fn,
0202b52a 2130 sub_lang, sub_format, info_dict.get('ext'))
de6000d9 2131 sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2132 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2133 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
0202b52a 2134 files_to_move[sub_filename] = sub_filename_final
a504ced0 2135 else:
0c9df79e 2136 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2137 if sub_info.get('data') is not None:
2138 try:
2139 # Use newline='' to prevent conversion of newline characters
067aa17e 2140 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2141 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2142 subfile.write(sub_info['data'])
0202b52a 2143 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2144 except (OSError, IOError):
2145 self.report_error('Cannot write subtitles file ' + sub_filename)
2146 return
7b6fefc9 2147 else:
5ff1bc0c 2148 try:
9f448fcb
U
2149 dl(sub_filename, sub_info, subtitle=True)
2150 '''
0c9df79e
U
2151 if self.params.get('sleep_interval_subtitles', False):
2152 dl(sub_filename, sub_info)
2153 else:
2154 sub_data = ie._request_webpage(
2155 sub_info['url'], info_dict['id'], note=False).read()
2156 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
2157 subfile.write(sub_data)
9f448fcb 2158 '''
0202b52a 2159 files_to_move[sub_filename] = sub_filename_final
0c9df79e 2160 except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
5ff1bc0c
RA
2161 self.report_warning('Unable to download subtitle for "%s": %s' %
2162 (sub_lang, error_to_compat_str(err)))
2163 continue
8222d8de 2164
de6000d9 2165 if skip_dl:
57df9f53 2166 if self.params.get('convertsubtitles', False):
0202b52a 2167 # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
de6000d9 2168 filename_real_ext = os.path.splitext(full_filename)[1][1:]
57df9f53 2169 filename_wo_ext = (
0202b52a 2170 os.path.splitext(full_filename)[0]
57df9f53 2171 if filename_real_ext == info_dict['ext']
0202b52a 2172 else full_filename)
57df9f53 2173 afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
0202b52a 2174 # if subconv.available:
2175 # info_dict['__postprocessors'].append(subconv)
57df9f53 2176 if os.path.exists(encodeFilename(afilename)):
f791b419
U
2177 self.to_screen(
2178 '[download] %s has already been downloaded and '
2179 'converted' % afilename)
57df9f53
U
2180 else:
2181 try:
0202b52a 2182 self.post_process(full_filename, info_dict, files_to_move)
af819c21 2183 except PostProcessingError as err:
2184 self.report_error('Postprocessing: %s' % str(err))
57df9f53
U
2185 return
2186
8222d8de 2187 if self.params.get('writeinfojson', False):
de6000d9 2188 infofn = self.prepare_filename(info_dict, 'infojson')
0202b52a 2189 if not ensure_dir_exists(encodeFilename(infofn)):
2190 return
0c3d0f51 2191 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2192 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2193 else:
66c935fb 2194 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2195 try:
cb202fd2 2196 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 2197 except (OSError, IOError):
66c935fb 2198 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2199 return
de6000d9 2200 info_dict['__infojson_filename'] = infofn
8222d8de 2201
de6000d9 2202 thumbfn = self.prepare_filename(info_dict, 'thumbnail')
2203 thumb_fn_temp = temp_filename if not skip_dl else thumbfn
2204 for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
2205 thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
2206 thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
2207 files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
8222d8de 2208
732044af 2209 # Write internet shortcut files
2210 url_link = webloc_link = desktop_link = False
2211 if self.params.get('writelink', False):
2212 if sys.platform == "darwin": # macOS.
2213 webloc_link = True
2214 elif sys.platform.startswith("linux"):
2215 desktop_link = True
2216 else: # if sys.platform in ['win32', 'cygwin']:
2217 url_link = True
2218 if self.params.get('writeurllink', False):
2219 url_link = True
2220 if self.params.get('writewebloclink', False):
2221 webloc_link = True
2222 if self.params.get('writedesktoplink', False):
2223 desktop_link = True
2224
2225 if url_link or webloc_link or desktop_link:
2226 if 'webpage_url' not in info_dict:
2227 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2228 return
2229 ascii_url = iri_to_uri(info_dict['webpage_url'])
2230
2231 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2232 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2233 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2234 self.to_screen('[info] Internet shortcut is already present')
2235 else:
2236 try:
2237 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2238 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2239 template_vars = {'url': ascii_url}
2240 if embed_filename:
2241 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2242 linkfile.write(template % template_vars)
2243 except (OSError, IOError):
2244 self.report_error('Cannot write internet shortcut ' + linkfn)
2245 return False
2246 return True
2247
2248 if url_link:
2249 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2250 return
2251 if webloc_link:
2252 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2253 return
2254 if desktop_link:
2255 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2256 return
2257
2258 # Download
2259 must_record_download_archive = False
de6000d9 2260 if not skip_dl:
4340deca 2261 try:
0202b52a 2262
6b591b29 2263 def existing_file(*filepaths):
2264 ext = info_dict.get('ext')
2265 final_ext = self.params.get('final_ext', ext)
2266 existing_files = []
2267 for file in orderedSet(filepaths):
2268 if final_ext != ext:
2269 converted = replace_extension(file, final_ext, ext)
2270 if os.path.exists(encodeFilename(converted)):
2271 existing_files.append(converted)
2272 if os.path.exists(encodeFilename(file)):
2273 existing_files.append(file)
2274
2275 if not existing_files or self.params.get('overwrites', False):
2276 for file in orderedSet(existing_files):
2277 self.report_file_delete(file)
2278 os.remove(encodeFilename(file))
2279 return None
2280
2281 self.report_file_already_downloaded(existing_files[0])
2282 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2283 return existing_files[0]
0202b52a 2284
2285 success = True
4340deca
P
2286 if info_dict.get('requested_formats') is not None:
2287 downloaded = []
d47aeb22 2288 merger = FFmpegMergerPP(self)
f740fae2 2289 if not merger.available:
4340deca
P
2290 postprocessors = []
2291 self.report_warning('You have requested multiple '
e4172ac9 2292 'formats but ffmpeg is not installed.'
4a5a898a 2293 ' The formats won\'t be merged.')
6350728b 2294 else:
4340deca 2295 postprocessors = [merger]
81cd954a
S
2296
2297 def compatible_formats(formats):
d03cfdce 2298 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2299 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2300 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2301 if len(video_formats) > 2 or len(audio_formats) > 2:
2302 return False
2303
81cd954a 2304 # Check extension
d03cfdce 2305 exts = set(format.get('ext') for format in formats)
2306 COMPATIBLE_EXTS = (
2307 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2308 set(('webm',)),
2309 )
2310 for ext_sets in COMPATIBLE_EXTS:
2311 if ext_sets.issuperset(exts):
2312 return True
81cd954a
S
2313 # TODO: Check acodec/vcodec
2314 return False
2315
2316 requested_formats = info_dict['requested_formats']
0202b52a 2317 old_ext = info_dict['ext']
c0dea0a7 2318 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 2319 info_dict['ext'] = 'mkv'
4a5a898a
S
2320 self.report_warning(
2321 'Requested formats are incompatible for merge and will be merged into mkv.')
0202b52a 2322
2323 def correct_ext(filename):
2324 filename_real_ext = os.path.splitext(filename)[1][1:]
2325 filename_wo_ext = (
2326 os.path.splitext(filename)[0]
2327 if filename_real_ext == old_ext
2328 else filename)
2329 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2330
38c6902b 2331 # Ensure filename always has a correct extension for successful merge
0202b52a 2332 full_filename = correct_ext(full_filename)
2333 temp_filename = correct_ext(temp_filename)
2334 dl_filename = existing_file(full_filename, temp_filename)
2335 if dl_filename is None:
81cd954a 2336 for f in requested_formats:
5b5fbc08
JMF
2337 new_info = dict(info_dict)
2338 new_info.update(f)
c5c9bf0c 2339 fname = prepend_extension(
de6000d9 2340 self.prepare_filename(new_info, 'temp'),
c5c9bf0c
S
2341 'f%s' % f['format_id'], new_info['ext'])
2342 if not ensure_dir_exists(fname):
2343 return
5b5fbc08 2344 downloaded.append(fname)
a9e7f546 2345 partial_success, real_download = dl(fname, new_info)
5b5fbc08
JMF
2346 success = success and partial_success
2347 info_dict['__postprocessors'] = postprocessors
2348 info_dict['__files_to_merge'] = downloaded
a9e7f546 2349 # Even if there were no downloads, it is being merged only now
2350 info_dict['__real_download'] = True
4340deca
P
2351 else:
2352 # Just a single file
0202b52a 2353 dl_filename = existing_file(full_filename, temp_filename)
2354 if dl_filename is None:
2355 success, real_download = dl(temp_filename, info_dict)
2356 info_dict['__real_download'] = real_download
2357
0202b52a 2358 dl_filename = dl_filename or temp_filename
c571435f 2359 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2360
4340deca 2361 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
7960b056 2362 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2363 return
2364 except (OSError, IOError) as err:
2365 raise UnavailableVideoError(err)
2366 except (ContentTooShortError, ) as err:
2367 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2368 return
8222d8de 2369
de6000d9 2370 if success and full_filename != '-':
6271f1ca 2371 # Fixup content
62cd676c
PH
2372 fixup_policy = self.params.get('fixup')
2373 if fixup_policy is None:
2374 fixup_policy = 'detect_or_warn'
2375
e4172ac9 2376 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
d1e4a464 2377
6271f1ca
PH
2378 stretched_ratio = info_dict.get('stretched_ratio')
2379 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
2380 if fixup_policy == 'warn':
2381 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2382 info_dict['id'], stretched_ratio))
2383 elif fixup_policy == 'detect_or_warn':
2384 stretched_pp = FFmpegFixupStretchedPP(self)
2385 if stretched_pp.available:
6271f1ca
PH
2386 info_dict['__postprocessors'].append(stretched_pp)
2387 else:
2388 self.report_warning(
d1e4a464
S
2389 '%s: Non-uniform pixel ratio (%s). %s'
2390 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
6271f1ca 2391 else:
62cd676c
PH
2392 assert fixup_policy in ('ignore', 'never')
2393
3089bc74 2394 if (info_dict.get('requested_formats') is None
6b591b29 2395 and info_dict.get('container') == 'm4a_dash'
2396 and info_dict.get('ext') == 'm4a'):
62cd676c 2397 if fixup_policy == 'warn':
d1e4a464
S
2398 self.report_warning(
2399 '%s: writing DASH m4a. '
2400 'Only some players support this container.'
2401 % info_dict['id'])
62cd676c
PH
2402 elif fixup_policy == 'detect_or_warn':
2403 fixup_pp = FFmpegFixupM4aPP(self)
2404 if fixup_pp.available:
62cd676c
PH
2405 info_dict['__postprocessors'].append(fixup_pp)
2406 else:
2407 self.report_warning(
d1e4a464
S
2408 '%s: writing DASH m4a. '
2409 'Only some players support this container. %s'
2410 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
62cd676c
PH
2411 else:
2412 assert fixup_policy in ('ignore', 'never')
6271f1ca 2413
3089bc74
S
2414 if (info_dict.get('protocol') == 'm3u8_native'
2415 or info_dict.get('protocol') == 'm3u8'
2416 and self.params.get('hls_prefer_native')):
f17f8651 2417 if fixup_policy == 'warn':
a02682fd 2418 self.report_warning('%s: malformed AAC bitstream detected.' % (
f17f8651 2419 info_dict['id']))
2420 elif fixup_policy == 'detect_or_warn':
2421 fixup_pp = FFmpegFixupM3u8PP(self)
2422 if fixup_pp.available:
f17f8651 2423 info_dict['__postprocessors'].append(fixup_pp)
2424 else:
2425 self.report_warning(
a02682fd 2426 '%s: malformed AAC bitstream detected. %s'
d1e4a464 2427 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
f17f8651 2428 else:
2429 assert fixup_policy in ('ignore', 'never')
2430
8222d8de 2431 try:
0202b52a 2432 self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2433 except PostProcessingError as err:
2434 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2435 return
ab8e5e51
AM
2436 try:
2437 for ph in self._post_hooks:
0202b52a 2438 ph(full_filename)
ab8e5e51
AM
2439 except Exception as err:
2440 self.report_error('post hooks: %s' % str(err))
2441 return
2d30509f 2442 must_record_download_archive = True
2443
2444 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2445 self.record_download_archive(info_dict)
c3e6ffba 2446 max_downloads = self.params.get('max_downloads')
2447 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2448 raise MaxDownloadsReached()
8222d8de
JMF
2449
2450 def download(self, url_list):
2451 """Download a given list of URLs."""
de6000d9 2452 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2453 if (len(url_list) > 1
2454 and outtmpl != '-'
2455 and '%' not in outtmpl
2456 and self.params.get('max_downloads') != 1):
acd69589 2457 raise SameFileError(outtmpl)
8222d8de
JMF
2458
2459 for url in url_list:
2460 try:
5f6a1245 2461 # It also downloads the videos
61aa5ba3
S
2462 res = self.extract_info(
2463 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2464 except UnavailableVideoError:
6febd1c1 2465 self.report_error('unable to download video')
8222d8de 2466 except MaxDownloadsReached:
8b0d7497 2467 self.to_screen('[info] Maximum number of downloaded files reached')
2468 raise
2469 except ExistingVideoReached:
d83cb531 2470 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2471 raise
2472 except RejectedVideoReached:
d83cb531 2473 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2474 raise
63e0be34
PH
2475 else:
2476 if self.params.get('dump_single_json', False):
2477 self.to_stdout(json.dumps(res))
8222d8de
JMF
2478
2479 return self._download_retcode
2480
1dcc4c0c 2481 def download_with_info_file(self, info_filename):
31bd3925
JMF
2482 with contextlib.closing(fileinput.FileInput(
2483 [info_filename], mode='r',
2484 openhook=fileinput.hook_encoded('utf-8'))) as f:
2485 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 2486 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
2487 try:
2488 self.process_ie_result(info, download=True)
2489 except DownloadError:
2490 webpage_url = info.get('webpage_url')
2491 if webpage_url is not None:
6febd1c1 2492 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2493 return self.download([webpage_url])
2494 else:
2495 raise
2496 return self._download_retcode
1dcc4c0c 2497
cb202fd2
S
2498 @staticmethod
2499 def filter_requested_info(info_dict):
18590cec 2500 fields_to_remove = ('requested_formats', 'requested_subtitles')
cb202fd2
S
2501 return dict(
2502 (k, v) for k, v in info_dict.items()
18590cec 2503 if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
cb202fd2 2504
5bfa4862 2505 def run_pp(self, pp, infodict, files_to_move={}):
2506 files_to_delete = []
af819c21 2507 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2508 if not files_to_delete:
2509 return files_to_move, infodict
2510
2511 if self.params.get('keepvideo', False):
2512 for f in files_to_delete:
2513 files_to_move.setdefault(f, '')
2514 else:
2515 for old_filename in set(files_to_delete):
2516 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2517 try:
2518 os.remove(encodeFilename(old_filename))
2519 except (IOError, OSError):
2520 self.report_warning('Unable to remove downloaded original file')
2521 if old_filename in files_to_move:
2522 del files_to_move[old_filename]
2523 return files_to_move, infodict
2524
2525 def pre_process(self, ie_info):
2526 info = dict(ie_info)
2527 for pp in self._pps['beforedl']:
2528 info = self.run_pp(pp, info)[1]
2529 return info
2530
0202b52a 2531 def post_process(self, filename, ie_info, files_to_move={}):
8222d8de
JMF
2532 """Run all the postprocessors on the given file."""
2533 info = dict(ie_info)
2534 info['filepath'] = filename
de6000d9 2535 info['__files_to_move'] = {}
0202b52a 2536
5bfa4862 2537 for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
2538 files_to_move, info = self.run_pp(pp, info, files_to_move)
de6000d9 2539 info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
5bfa4862 2540 for pp in self._pps['aftermove']:
de6000d9 2541 info = self.run_pp(pp, info, {})[1]
c1c9a79c 2542
5db07df6 2543 def _make_archive_id(self, info_dict):
e9fef7ee
S
2544 video_id = info_dict.get('id')
2545 if not video_id:
2546 return
5db07df6
PH
2547 # Future-proof against any change in case
2548 # and backwards compatibility with prior versions
e9fef7ee 2549 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2550 if extractor is None:
1211bb6d
S
2551 url = str_or_none(info_dict.get('url'))
2552 if not url:
2553 return
e9fef7ee
S
2554 # Try to find matching extractor for the URL and take its ie_key
2555 for ie in self._ies:
1211bb6d 2556 if ie.suitable(url):
e9fef7ee
S
2557 extractor = ie.ie_key()
2558 break
2559 else:
2560 return
d0757229 2561 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2562
2563 def in_download_archive(self, info_dict):
2564 fn = self.params.get('download_archive')
2565 if fn is None:
2566 return False
2567
2568 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2569 if not vid_id:
7012b23c 2570 return False # Incomplete video information
5db07df6 2571
a45e8619 2572 return vid_id in self.archive
c1c9a79c
PH
2573
2574 def record_download_archive(self, info_dict):
2575 fn = self.params.get('download_archive')
2576 if fn is None:
2577 return
5db07df6
PH
2578 vid_id = self._make_archive_id(info_dict)
2579 assert vid_id
c1c9a79c 2580 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2581 archive_file.write(vid_id + '\n')
a45e8619 2582 self.archive.add(vid_id)
dd82ffea 2583
8c51aa65 2584 @staticmethod
8abeeb94 2585 def format_resolution(format, default='unknown'):
fb04e403
PH
2586 if format.get('vcodec') == 'none':
2587 return 'audio only'
f49d89ee
PH
2588 if format.get('resolution') is not None:
2589 return format['resolution']
8c51aa65
JMF
2590 if format.get('height') is not None:
2591 if format.get('width') is not None:
6febd1c1 2592 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 2593 else:
6febd1c1 2594 res = '%sp' % format['height']
f49d89ee 2595 elif format.get('width') is not None:
388ae76b 2596 res = '%dx?' % format['width']
8c51aa65 2597 else:
8abeeb94 2598 res = default
8c51aa65
JMF
2599 return res
2600
c57f7757
PH
2601 def _format_note(self, fdict):
2602 res = ''
2603 if fdict.get('ext') in ['f4f', 'f4m']:
2604 res += '(unsupported) '
32f90364
PH
2605 if fdict.get('language'):
2606 if res:
2607 res += ' '
9016d76f 2608 res += '[%s] ' % fdict['language']
c57f7757
PH
2609 if fdict.get('format_note') is not None:
2610 res += fdict['format_note'] + ' '
2611 if fdict.get('tbr') is not None:
2612 res += '%4dk ' % fdict['tbr']
2613 if fdict.get('container') is not None:
2614 if res:
2615 res += ', '
2616 res += '%s container' % fdict['container']
3089bc74
S
2617 if (fdict.get('vcodec') is not None
2618 and fdict.get('vcodec') != 'none'):
c57f7757
PH
2619 if res:
2620 res += ', '
2621 res += fdict['vcodec']
91c7271a 2622 if fdict.get('vbr') is not None:
c57f7757
PH
2623 res += '@'
2624 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2625 res += 'video@'
2626 if fdict.get('vbr') is not None:
2627 res += '%4dk' % fdict['vbr']
fbb21cf5 2628 if fdict.get('fps') is not None:
5d583bdf
S
2629 if res:
2630 res += ', '
2631 res += '%sfps' % fdict['fps']
c57f7757
PH
2632 if fdict.get('acodec') is not None:
2633 if res:
2634 res += ', '
2635 if fdict['acodec'] == 'none':
2636 res += 'video only'
2637 else:
2638 res += '%-5s' % fdict['acodec']
2639 elif fdict.get('abr') is not None:
2640 if res:
2641 res += ', '
2642 res += 'audio'
2643 if fdict.get('abr') is not None:
2644 res += '@%3dk' % fdict['abr']
2645 if fdict.get('asr') is not None:
2646 res += ' (%5dHz)' % fdict['asr']
2647 if fdict.get('filesize') is not None:
2648 if res:
2649 res += ', '
2650 res += format_bytes(fdict['filesize'])
9732d77e
PH
2651 elif fdict.get('filesize_approx') is not None:
2652 if res:
2653 res += ', '
2654 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 2655 return res
91c7271a 2656
76d321f6 2657 def _format_note_table(self, f):
2658 def join_fields(*vargs):
2659 return ', '.join((val for val in vargs if val != ''))
2660
2661 return join_fields(
2662 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
2663 format_field(f, 'language', '[%s]'),
2664 format_field(f, 'format_note'),
2665 format_field(f, 'container', ignore=(None, f.get('ext'))),
2666 format_field(f, 'asr', '%5dHz'))
2667
c57f7757 2668 def list_formats(self, info_dict):
94badb25 2669 formats = info_dict.get('formats', [info_dict])
76d321f6 2670 new_format = self.params.get('listformats_table', False)
2671 if new_format:
2672 table = [
2673 [
2674 format_field(f, 'format_id'),
2675 format_field(f, 'ext'),
2676 self.format_resolution(f),
2677 format_field(f, 'fps', '%d'),
2678 '|',
2679 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
2680 format_field(f, 'tbr', '%4dk'),
2681 f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
2682 '|',
2683 format_field(f, 'vcodec', default='unknown').replace('none', ''),
2684 format_field(f, 'vbr', '%4dk'),
2685 format_field(f, 'acodec', default='unknown').replace('none', ''),
2686 format_field(f, 'abr', '%3dk'),
2687 format_field(f, 'asr', '%5dHz'),
2688 self._format_note_table(f)]
2689 for f in formats
2690 if f.get('preference') is None or f['preference'] >= -1000]
2691 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
2692 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
2693 else:
2694 table = [
2695 [
2696 format_field(f, 'format_id'),
2697 format_field(f, 'ext'),
2698 self.format_resolution(f),
2699 self._format_note(f)]
2700 for f in formats
2701 if f.get('preference') is None or f['preference'] >= -1000]
2702 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 2703
76d321f6 2704 # if len(formats) > 1:
2705 # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
cfb56d1a 2706 self.to_screen(
76d321f6 2707 '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
2708 header_line,
2709 table,
2710 delim=new_format,
2711 extraGap=(0 if new_format else 1),
2712 hideEmpty=new_format)))
cfb56d1a
PH
2713
2714 def list_thumbnails(self, info_dict):
2715 thumbnails = info_dict.get('thumbnails')
2716 if not thumbnails:
b7b72db9 2717 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2718 return
cfb56d1a
PH
2719
2720 self.to_screen(
2721 '[info] Thumbnails for %s:' % info_dict['id'])
2722 self.to_screen(render_table(
2723 ['ID', 'width', 'height', 'URL'],
2724 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 2725
360e1ca5 2726 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 2727 if not subtitles:
360e1ca5 2728 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 2729 return
a504ced0 2730 self.to_screen(
edab9dbf
JMF
2731 'Available %s for %s:' % (name, video_id))
2732 self.to_screen(render_table(
2733 ['Language', 'formats'],
2734 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2735 for lang, formats in subtitles.items()]))
a504ced0 2736
dca08720
PH
2737 def urlopen(self, req):
2738 """ Start an HTTP download """
82d8a8b6 2739 if isinstance(req, compat_basestring):
67dda517 2740 req = sanitized_Request(req)
19a41fc6 2741 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
2742
2743 def print_debug_header(self):
2744 if not self.params.get('verbose'):
2745 return
62fec3b2 2746
4192b51c 2747 if type('') is not compat_str:
067aa17e 2748 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
2749 self.report_warning(
2750 'Your Python is broken! Update to a newer and supported version')
2751
c6afed48
PH
2752 stdout_encoding = getattr(
2753 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 2754 encoding_str = (
734f90bb
PH
2755 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2756 locale.getpreferredencoding(),
2757 sys.getfilesystemencoding(),
c6afed48 2758 stdout_encoding,
b0472057 2759 self.get_encoding()))
4192b51c 2760 write_string(encoding_str, encoding=None)
734f90bb 2761
f74980cb 2762 self._write_string('[debug] yt-dlp version %s\n' % __version__)
e0986e31 2763 if _LAZY_LOADER:
f74980cb 2764 self._write_string('[debug] Lazy loading extractors enabled\n')
2765 if _PLUGIN_CLASSES:
2766 self._write_string(
2767 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
dca08720
PH
2768 try:
2769 sp = subprocess.Popen(
2770 ['git', 'rev-parse', '--short', 'HEAD'],
2771 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2772 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 2773 out, err = process_communicate_or_kill(sp)
dca08720
PH
2774 out = out.decode().strip()
2775 if re.match('[0-9a-f]+', out):
f74980cb 2776 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 2777 except Exception:
dca08720
PH
2778 try:
2779 sys.exc_clear()
70a1165b 2780 except Exception:
dca08720 2781 pass
b300cda4
S
2782
2783 def python_implementation():
2784 impl_name = platform.python_implementation()
2785 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2786 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2787 return impl_name
2788
2789 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2790 platform.python_version(), python_implementation(),
2791 platform_name()))
d28b5171 2792
73fac4e9 2793 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 2794 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 2795 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
2796 exe_str = ', '.join(
2797 '%s %s' % (exe, v)
2798 for exe, v in sorted(exe_versions.items())
2799 if v
2800 )
2801 if not exe_str:
2802 exe_str = 'none'
2803 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
2804
2805 proxy_map = {}
2806 for handler in self._opener.handlers:
2807 if hasattr(handler, 'proxies'):
2808 proxy_map.update(handler.proxies)
734f90bb 2809 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 2810
58b1f00d
PH
2811 if self.params.get('call_home', False):
2812 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2813 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 2814 return
58b1f00d
PH
2815 latest_version = self.urlopen(
2816 'https://yt-dl.org/latest/version').read().decode('utf-8')
2817 if version_tuple(latest_version) > version_tuple(__version__):
2818 self.report_warning(
2819 'You are using an outdated version (newest version: %s)! '
2820 'See https://yt-dl.org/update if you need help updating.' %
2821 latest_version)
2822
e344693b 2823 def _setup_opener(self):
6ad14cab 2824 timeout_val = self.params.get('socket_timeout')
19a41fc6 2825 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 2826
dca08720
PH
2827 opts_cookiefile = self.params.get('cookiefile')
2828 opts_proxy = self.params.get('proxy')
2829
2830 if opts_cookiefile is None:
2831 self.cookiejar = compat_cookiejar.CookieJar()
2832 else:
590bc6f6 2833 opts_cookiefile = expand_path(opts_cookiefile)
1bab3437 2834 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
dca08720 2835 if os.access(opts_cookiefile, os.R_OK):
1d88b3e6 2836 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
dca08720 2837
6a3f4c3f 2838 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2839 if opts_proxy is not None:
2840 if opts_proxy == '':
2841 proxies = {}
2842 else:
2843 proxies = {'http': opts_proxy, 'https': opts_proxy}
2844 else:
2845 proxies = compat_urllib_request.getproxies()
067aa17e 2846 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
2847 if 'http' in proxies and 'https' not in proxies:
2848 proxies['https'] = proxies['http']
91410c9b 2849 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2850
2851 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2852 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2853 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 2854 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 2855 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2856
2857 # When passing our own FileHandler instance, build_opener won't add the
2858 # default FileHandler and allows us to disable the file protocol, which
2859 # can be used for malicious purposes (see
067aa17e 2860 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
2861 file_handler = compat_urllib_request.FileHandler()
2862
2863 def file_open(*args, **kwargs):
cefecac1 2864 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
6240b0a2
JMF
2865 file_handler.file_open = file_open
2866
2867 opener = compat_urllib_request.build_opener(
fca6dba8 2868 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 2869
dca08720
PH
2870 # Delete the default user-agent header, which would otherwise apply in
2871 # cases where our custom HTTP handler doesn't come into play
067aa17e 2872 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
2873 opener.addheaders = []
2874 self._opener = opener
62fec3b2
PH
2875
2876 def encode(self, s):
2877 if isinstance(s, bytes):
2878 return s # Already encoded
2879
2880 try:
2881 return s.encode(self.get_encoding())
2882 except UnicodeEncodeError as err:
2883 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2884 raise
2885
2886 def get_encoding(self):
2887 encoding = self.params.get('encoding')
2888 if encoding is None:
2889 encoding = preferredencoding()
2890 return encoding
ec82d85a 2891
de6000d9 2892 def _write_thumbnails(self, info_dict, filename): # return the extensions
ec82d85a
PH
2893 if self.params.get('writethumbnail', False):
2894 thumbnails = info_dict.get('thumbnails')
2895 if thumbnails:
2896 thumbnails = [thumbnails[-1]]
2897 elif self.params.get('write_all_thumbnails', False):
0202b52a 2898 thumbnails = info_dict.get('thumbnails') or []
ec82d85a 2899 else:
0202b52a 2900 thumbnails = []
ec82d85a 2901
0202b52a 2902 ret = []
ec82d85a
PH
2903 for t in thumbnails:
2904 thumb_ext = determine_ext(t['url'], 'jpg')
de6000d9 2905 suffix = '%s.' % t['id'] if len(thumbnails) > 1 else ''
ec82d85a 2906 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
de6000d9 2907 t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 2908
0c3d0f51 2909 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 2910 ret.append(suffix + thumb_ext)
ec82d85a
PH
2911 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2912 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2913 else:
2914 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2915 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2916 try:
2917 uf = self.urlopen(t['url'])
d3d89c32 2918 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 2919 shutil.copyfileobj(uf, thumbf)
de6000d9 2920 ret.append(suffix + thumb_ext)
ec82d85a
PH
2921 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2922 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2923 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2924 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2925 (t['url'], error_to_compat_str(err)))
0202b52a 2926 return ret