]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Release 2021.07.21
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
317f7ab6 8import copy
9d2ecdbc 9import datetime
c1c9a79c 10import errno
31bd3925 11import fileinput
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
8222d8de 28
961ea474 29from string import ascii_letters
e5813e53 30from zipimport import zipimporter
961ea474 31
8c25f81b 32from .compat import (
82d8a8b6 33 compat_basestring,
003c69a8 34 compat_get_terminal_size,
4f026faf 35 compat_kwargs,
d0d9ade4 36 compat_numeric_types,
e9c0cdd3 37 compat_os_name,
ce02ed60 38 compat_str,
67134eab 39 compat_tokenize_tokenize,
ce02ed60
PH
40 compat_urllib_error,
41 compat_urllib_request,
8b172c2e 42 compat_urllib_request_DataHandler,
8c25f81b 43)
982ee69a 44from .cookies import load_cookies
8c25f81b 45from .utils import (
eedb7ba5
S
46 age_restricted,
47 args_to_str,
ce02ed60
PH
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
acd69589 51 DEFAULT_OUTTMPL,
ce02ed60 52 determine_ext,
b5559424 53 determine_protocol,
732044af 54 DOT_DESKTOP_LINK_TEMPLATE,
55 DOT_URL_LINK_TEMPLATE,
56 DOT_WEBLOC_LINK_TEMPLATE,
ce02ed60 57 DownloadError,
c0384f22 58 encode_compat_str,
ce02ed60 59 encodeFilename,
498f5606 60 EntryNotInPlaylist,
a06916d9 61 error_to_compat_str,
8b0d7497 62 ExistingVideoReached,
590bc6f6 63 expand_path,
ce02ed60 64 ExtractorError,
e29663c6 65 float_or_none,
02dbf93f 66 format_bytes,
76d321f6 67 format_field,
752cda38 68 STR_FORMAT_RE,
525ef922 69 formatSeconds,
773f291d 70 GeoRestrictedError,
b0249bca 71 HEADRequest,
c9969434 72 int_or_none,
732044af 73 iri_to_uri,
773f291d 74 ISO3166Utils,
56a8fb4f 75 LazyList,
ce02ed60 76 locked_file,
0202b52a 77 make_dir,
dca08720 78 make_HTTPS_handler,
ce02ed60 79 MaxDownloadsReached,
3158150c 80 network_exceptions,
cd6fc19e 81 orderedSet,
a06916d9 82 OUTTMPL_TYPES,
b7ab0590 83 PagedList,
083c9df9 84 parse_filesize,
91410c9b 85 PerRequestProxyHandler,
dca08720 86 platform_name,
eedb7ba5 87 PostProcessingError,
ce02ed60 88 preferredencoding,
eedb7ba5 89 prepend_extension,
a06916d9 90 process_communicate_or_kill,
51fb4995 91 register_socks_protocols,
a06916d9 92 RejectedVideoReached,
cfb56d1a 93 render_table,
eedb7ba5 94 replace_extension,
ce02ed60
PH
95 SameFileError,
96 sanitize_filename,
1bb5c511 97 sanitize_path,
dcf77cf1 98 sanitize_url,
67dda517 99 sanitized_Request,
e5660ee6 100 std_headers,
1211bb6d 101 str_or_none,
e29663c6 102 strftime_or_none,
ce02ed60 103 subtitles_filename,
51d9739f 104 ThrottledDownload,
732044af 105 to_high_limit_path,
324ad820 106 traverse_obj,
6033d980 107 try_get,
ce02ed60 108 UnavailableVideoError,
29eb5174 109 url_basename,
58b1f00d 110 version_tuple,
ce02ed60
PH
111 write_json_file,
112 write_string,
6a3f4c3f 113 YoutubeDLCookieProcessor,
dca08720 114 YoutubeDLHandler,
fca6dba8 115 YoutubeDLRedirectHandler,
ce02ed60 116)
a0e07d31 117from .cache import Cache
52a8a1e1 118from .extractor import (
119 gen_extractor_classes,
120 get_info_extractor,
121 _LAZY_LOADER,
122 _PLUGIN_CLASSES
123)
4c54b89e 124from .extractor.openload import PhantomJSwrapper
52a8a1e1 125from .downloader import (
126 get_suitable_downloader,
127 shorten_protocol_name
128)
4c83c967 129from .downloader.rtmp import rtmpdump_version
4f026faf 130from .postprocessor import (
e36d50c5 131 get_postprocessor,
132 FFmpegFixupDurationPP,
f17f8651 133 FFmpegFixupM3u8PP,
62cd676c 134 FFmpegFixupM4aPP,
6271f1ca 135 FFmpegFixupStretchedPP,
e36d50c5 136 FFmpegFixupTimestampPP,
4f026faf
PH
137 FFmpegMergerPP,
138 FFmpegPostProcessor,
0202b52a 139 MoveFilesAfterDownloadPP,
4f026faf 140)
dca08720 141from .version import __version__
8222d8de 142
e9c0cdd3
YCH
143if compat_os_name == 'nt':
144 import ctypes
145
2459b6e1 146
8222d8de
JMF
147class YoutubeDL(object):
148 """YoutubeDL class.
149
150 YoutubeDL objects are the ones responsible of downloading the
151 actual video file and writing it to disk if the user has requested
152 it, among some other tasks. In most cases there should be one per
153 program. As, given a video URL, the downloader doesn't know how to
154 extract all the needed information, task that InfoExtractors do, it
155 has to pass the URL to one of them.
156
157 For this, YoutubeDL objects have a method that allows
158 InfoExtractors to be registered in a given order. When it is passed
159 a URL, the YoutubeDL object handles it to the first InfoExtractor it
160 finds that reports being able to handle it. The InfoExtractor extracts
161 all the information about the video or videos the URL refers to, and
162 YoutubeDL process the extracted information, possibly using a File
163 Downloader to download the video.
164
165 YoutubeDL objects accept a lot of parameters. In order not to saturate
166 the object constructor with arguments, it receives a dictionary of
167 options instead. These options are available through the params
168 attribute for the InfoExtractors to use. The YoutubeDL also
169 registers itself as the downloader in charge for the InfoExtractors
170 that are added to it, so this is a "mutual registration".
171
172 Available options:
173
174 username: Username for authentication purposes.
175 password: Password for authentication purposes.
180940e0 176 videopassword: Password for accessing a video.
1da50aa3
S
177 ap_mso: Adobe Pass multiple-system operator identifier.
178 ap_username: Multiple-system operator account username.
179 ap_password: Multiple-system operator account password.
8222d8de
JMF
180 usenetrc: Use netrc for authentication instead.
181 verbose: Print additional info to stdout.
182 quiet: Do not print messages to stdout.
ad8915b7 183 no_warnings: Do not print out anything for warnings.
53c18592 184 forceprint: A list of templates to force print
185 forceurl: Force printing final URL. (Deprecated)
186 forcetitle: Force printing title. (Deprecated)
187 forceid: Force printing ID. (Deprecated)
188 forcethumbnail: Force printing thumbnail URL. (Deprecated)
189 forcedescription: Force printing description. (Deprecated)
190 forcefilename: Force printing final filename. (Deprecated)
191 forceduration: Force printing duration. (Deprecated)
8694c600 192 forcejson: Force printing info_dict as JSON.
63e0be34
PH
193 dump_single_json: Force printing the info_dict of the whole playlist
194 (or video) as a single JSON line.
c25228e5 195 force_write_download_archive: Force writing download archive regardless
196 of 'skip_download' or 'simulate'.
8222d8de 197 simulate: Do not download the video files.
eb8a4433 198 format: Video format code. see "FORMAT SELECTION" for more details.
63ad4d43 199 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 200 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
201 extracting metadata even if the video is not actually
202 available for download (experimental)
c25228e5 203 format_sort: How to sort the video formats. see "Sorting Formats"
204 for more details.
205 format_sort_force: Force the given format_sort. see "Sorting Formats"
206 for more details.
207 allow_multiple_video_streams: Allow multiple video streams to be merged
208 into a single file
209 allow_multiple_audio_streams: Allow multiple audio streams to be merged
210 into a single file
0ba692ac 211 check_formats Whether to test if the formats are downloadable.
212 Can be True (check all), False (check none)
213 or None (check only if requested by extractor)
4524baf0 214 paths: Dictionary of output paths. The allowed keys are 'home'
215 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 216 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 217 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
218 A string a also accepted for backward compatibility
a820dc72
RA
219 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
220 restrictfilenames: Do not allow "&" and spaces in file names
221 trim_file_name: Limit length of filename (extension excluded)
4524baf0 222 windowsfilenames: Force the filenames to be windows compatible
a820dc72 223 ignoreerrors: Do not stop on download errors
7a5c1cfe 224 (Default True when running yt-dlp,
a820dc72 225 but False when directly accessing YoutubeDL class)
26e2805c 226 skip_playlist_after_errors: Number of allowed failures until the rest of
227 the playlist is skipped
d22dec74 228 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 229 overwrites: Overwrite all video and metadata files if True,
230 overwrite only non-video files if None
231 and don't overwrite any file if False
8222d8de
JMF
232 playliststart: Playlist item to start at.
233 playlistend: Playlist item to end at.
c14e88f0 234 playlist_items: Specific indices of playlist to download.
ff815fe6 235 playlistreverse: Download playlist items in reverse order.
75822ca7 236 playlistrandom: Download playlist items in random order.
8222d8de
JMF
237 matchtitle: Download only matching titles.
238 rejecttitle: Reject downloads for matching titles.
8bf9319e 239 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
240 logtostderr: Log messages to stderr instead of stdout.
241 writedescription: Write the video description to a .description file
242 writeinfojson: Write the video description to a .info.json file
75d43ca0 243 clean_infojson: Remove private fields from the infojson
06167fbb 244 writecomments: Extract video comments. This will not be written to disk
245 unless writeinfojson is also given
1fb07d10 246 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 247 writethumbnail: Write the thumbnail image to a file
c25228e5 248 allow_playlist_files: Whether to write playlists' description, infojson etc
249 also to disk when using the 'write*' options
ec82d85a 250 write_all_thumbnails: Write all thumbnail formats to files
732044af 251 writelink: Write an internet shortcut file, depending on the
252 current platform (.url/.webloc/.desktop)
253 writeurllink: Write a Windows internet shortcut file (.url)
254 writewebloclink: Write a macOS internet shortcut file (.webloc)
255 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 256 writesubtitles: Write the video subtitles to a file
741dd8ea 257 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 258 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 259 Downloads all the subtitles of the video
0b7f3118 260 (requires writesubtitles or writeautomaticsub)
8222d8de 261 listsubtitles: Lists all available subtitles for the video
a504ced0 262 subtitlesformat: The format code for subtitles
c32b0aab 263 subtitleslangs: List of languages of the subtitles to download (can be regex).
264 The list may contain "all" to refer to all the available
265 subtitles. The language can be prefixed with a "-" to
266 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
267 keepvideo: Keep the video file after post-processing
268 daterange: A DateRange object, download only if the upload_date is in the range.
269 skip_download: Skip the actual download of the video file
c35f9e72 270 cachedir: Location of the cache files in the filesystem.
a0e07d31 271 False to disable filesystem cache.
47192f92 272 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
273 age_limit: An integer representing the user's age in years.
274 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
275 min_views: An integer representing the minimum view count the video
276 must have in order to not be skipped.
277 Videos without view count information are always
278 downloaded. None for no limit.
279 max_views: An integer representing the maximum view count.
280 Videos that are more popular than that are not
281 downloaded.
282 Videos without view count information are always
283 downloaded. None for no limit.
284 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
285 Videos already present in the file are not downloaded
286 again.
8a51f564 287 break_on_existing: Stop the download process after attempting to download a
288 file that is in the archive.
289 break_on_reject: Stop the download process when encountering a video that
290 has been filtered out.
291 cookiefile: File name where cookies should be read from and dumped to
982ee69a
MB
292 cookiesfrombrowser: A tuple containing the name of the browser and the profile
293 name/path from where cookies are loaded.
294 Eg: ('chrome', ) or (vivaldi, 'default')
a1ee09e8 295 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
296 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
297 At the moment, this is only supported by YouTube.
a1ee09e8 298 proxy: URL of the proxy server to use
38cce791 299 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 300 on geo-restricted sites.
e344693b 301 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
302 bidi_workaround: Work around buggy terminals without bidirectional text
303 support, using fridibi
a0ddb8a2 304 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 305 include_ads: Download ads as well
04b4d394
PH
306 default_search: Prepend this string if an input url is not valid.
307 'auto' for elaborate guessing
62fec3b2 308 encoding: Use this encoding instead of the system-specified.
e8ee972c 309 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
310 Pass in 'in_playlist' to only show this behavior for
311 playlist items.
4f026faf 312 postprocessors: A list of dictionaries, each with an entry
71b640cc 313 * key: The name of the postprocessor. See
7a5c1cfe 314 yt_dlp/postprocessor/__init__.py for a list.
56d868db 315 * when: When to run the postprocessor. Can be one of
316 pre_process|before_dl|post_process|after_move.
317 Assumed to be 'post_process' if not given
ab8e5e51
AM
318 post_hooks: A list of functions that get called as the final step
319 for each video file, after all postprocessors have been
320 called. The filename will be passed as the only argument.
71b640cc
PH
321 progress_hooks: A list of functions that get called on download
322 progress, with a dictionary with the entries
5cda4eda 323 * status: One of "downloading", "error", or "finished".
ee69b99a 324 Check this first and ignore unknown values.
71b640cc 325
5cda4eda 326 If status is one of "downloading", or "finished", the
ee69b99a
PH
327 following properties may also be present:
328 * filename: The final filename (always present)
5cda4eda 329 * tmpfilename: The filename we're currently writing to
71b640cc
PH
330 * downloaded_bytes: Bytes on disk
331 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
332 * total_bytes_estimate: Guess of the eventual file size,
333 None if unavailable.
334 * elapsed: The number of seconds since download started.
71b640cc
PH
335 * eta: The estimated time in seconds, None if unknown
336 * speed: The download speed in bytes/second, None if
337 unknown
5cda4eda
PH
338 * fragment_index: The counter of the currently
339 downloaded video fragment.
340 * fragment_count: The number of fragments (= individual
341 files that will be merged)
71b640cc
PH
342
343 Progress hooks are guaranteed to be called at least once
344 (with status "finished") if the download is successful.
45598f15 345 merge_output_format: Extension to use when merging formats.
6b591b29 346 final_ext: Expected final extension; used to detect when the file was
347 already downloaded and converted. "merge_output_format" is
348 replaced by this extension when given
6271f1ca
PH
349 fixup: Automatically correct known faults of the file.
350 One of:
351 - "never": do nothing
352 - "warn": only emit a warning
353 - "detect_or_warn": check whether we can do anything
62cd676c 354 about it, warn otherwise (default)
504f20dd 355 source_address: Client-side IP address to bind to.
6ec6cb4e 356 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 357 yt-dlp servers for debugging. (BROKEN)
1cf376f5 358 sleep_interval_requests: Number of seconds to sleep between requests
359 during extraction
7aa589a5
S
360 sleep_interval: Number of seconds to sleep before each download when
361 used alone or a lower bound of a range for randomized
362 sleep before each download (minimum possible number
363 of seconds to sleep) when used along with
364 max_sleep_interval.
365 max_sleep_interval:Upper bound of a range for randomized sleep before each
366 download (maximum possible number of seconds to sleep).
367 Must only be used along with sleep_interval.
368 Actual sleep time will be a random float from range
369 [sleep_interval; max_sleep_interval].
1cf376f5 370 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
371 listformats: Print an overview of available video formats and exit.
372 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
373 match_filter: A function that gets called with the info_dict of
374 every video.
375 If it returns a message, the video is ignored.
376 If it returns None, the video is downloaded.
377 match_filter_func in utils.py is one example for this.
7e5db8c9 378 no_color: Do not emit color codes in output.
0a840f58 379 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 380 HTTP header
0a840f58 381 geo_bypass_country:
773f291d
S
382 Two-letter ISO 3166-2 country code that will be used for
383 explicit geographic restriction bypassing via faking
504f20dd 384 X-Forwarded-For HTTP header
5f95927a
S
385 geo_bypass_ip_block:
386 IP range in CIDR notation that will be used similarly to
504f20dd 387 geo_bypass_country
71b640cc 388
85729c51 389 The following options determine which downloader is picked:
52a8a1e1 390 external_downloader: A dictionary of protocol keys and the executable of the
391 external downloader to use for it. The allowed protocols
392 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
393 Set the value to 'native' to use the native downloader
394 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
395 or {'m3u8': 'ffmpeg'} instead.
396 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
397 if True, otherwise use ffmpeg/avconv if False, otherwise
398 use downloader suggested by extractor if None.
53ed7066 399 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 400 The following options do not work when used through the API:
401 filename, abort-on-error, multistreams, no-live-chat,
402 no-playlist-metafiles. Refer __init__.py for their implementation
fe7e0c98 403
8222d8de 404 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 405 the downloader (see yt_dlp/downloader/common.py):
51d9739f 406 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
407 max_filesize, test, noresizebuffer, retries, continuedl, noprogress, consoletitle,
408 xattr_set_filesize, external_downloader_args, hls_use_mpegts, http_chunk_size.
76b1bd67
JMF
409
410 The following options are used by the post processors:
d4a24f40 411 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 412 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
413 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
414 to the binary or its containing directory.
43820c03 415 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
416 and a list of additional command-line arguments for the
417 postprocessor/executable. The dict can also have "PP+EXE" keys
418 which are used when the given exe is used by the given PP.
419 Use 'default' as the name for arguments to passed to all PP
e409895f 420
421 The following options are used by the extractors:
62bff2c1 422 extractor_retries: Number of times to retry for known errors
423 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 424 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 425 discontinuities such as ad breaks (default: False)
5d3a0e79 426 extractor_args: A dictionary of arguments to be passed to the extractors.
427 See "EXTRACTOR ARGUMENTS" for details.
428 Eg: {'youtube': {'skip': ['dash', 'hls']}}
429 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
430 If True (default), DASH manifests and related
62bff2c1 431 data will be downloaded and processed by extractor.
432 You can reduce network I/O by disabling it if you don't
433 care about DASH. (only for youtube)
5d3a0e79 434 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
435 If True (default), HLS manifests and related
62bff2c1 436 data will be downloaded and processed by extractor.
437 You can reduce network I/O by disabling it if you don't
438 care about HLS. (only for youtube)
8222d8de
JMF
439 """
440
c9969434
S
441 _NUMERIC_FIELDS = set((
442 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
443 'timestamp', 'upload_year', 'upload_month', 'upload_day',
444 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
445 'average_rating', 'comment_count', 'age_limit',
446 'start_time', 'end_time',
447 'chapter_number', 'season_number', 'episode_number',
448 'track_number', 'disc_number', 'release_year',
449 'playlist_index',
450 ))
451
8222d8de
JMF
452 params = None
453 _ies = []
56d868db 454 _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 455 _printed_messages = set()
1cf376f5 456 _first_webpage_request = True
8222d8de
JMF
457 _download_retcode = None
458 _num_downloads = None
30a074c2 459 _playlist_level = 0
460 _playlist_urls = set()
8222d8de
JMF
461 _screen_file = None
462
3511266b 463 def __init__(self, params=None, auto_init=True):
8222d8de 464 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
465 if params is None:
466 params = {}
8222d8de 467 self._ies = []
56c73665 468 self._ies_instances = {}
56d868db 469 self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
b35496d8 470 self._printed_messages = set()
1cf376f5 471 self._first_webpage_request = True
ab8e5e51 472 self._post_hooks = []
933605d7 473 self._progress_hooks = []
8222d8de
JMF
474 self._download_retcode = 0
475 self._num_downloads = 0
476 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 477 self._err_file = sys.stderr
4abf617b
S
478 self.params = {
479 # Default parameters
480 'nocheckcertificate': False,
481 }
482 self.params.update(params)
a0e07d31 483 self.cache = Cache(self)
34308b30 484
a61f4b28 485 if sys.version_info < (3, 6):
486 self.report_warning(
0181adef 487 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 488
be5df5ee
S
489 def check_deprecated(param, option, suggestion):
490 if self.params.get(param) is not None:
53ed7066 491 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
492 return True
493 return False
494
495 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
496 if self.params.get('geo_verification_proxy') is None:
497 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
498
0d1bb027 499 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
500 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 501 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 502
503 for msg in self.params.get('warnings', []):
504 self.report_warning(msg)
505
6b591b29 506 if self.params.get('final_ext'):
507 if self.params.get('merge_output_format'):
508 self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
509 self.params['merge_output_format'] = self.params['final_ext']
510
b9d973be 511 if 'overwrites' in self.params and self.params['overwrites'] is None:
512 del self.params['overwrites']
513
0783b09b 514 if params.get('bidi_workaround', False):
1c088fa8
PH
515 try:
516 import pty
517 master, slave = pty.openpty()
003c69a8 518 width = compat_get_terminal_size().columns
1c088fa8
PH
519 if width is None:
520 width_args = []
521 else:
522 width_args = ['-w', str(width)]
5d681e96 523 sp_kwargs = dict(
1c088fa8
PH
524 stdin=subprocess.PIPE,
525 stdout=slave,
526 stderr=self._err_file)
5d681e96
PH
527 try:
528 self._output_process = subprocess.Popen(
529 ['bidiv'] + width_args, **sp_kwargs
530 )
531 except OSError:
5d681e96
PH
532 self._output_process = subprocess.Popen(
533 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
534 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 535 except OSError as ose:
66e7ace1 536 if ose.errno == errno.ENOENT:
6febd1c1 537 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
538 else:
539 raise
0783b09b 540
3089bc74
S
541 if (sys.platform != 'win32'
542 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
543 and not params.get('restrictfilenames', False)):
e9137224 544 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 545 self.report_warning(
6febd1c1 546 'Assuming --restrict-filenames since file system encoding '
1b725173 547 'cannot encode all characters. '
6febd1c1 548 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 549 self.params['restrictfilenames'] = True
34308b30 550
de6000d9 551 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 552
187986a8 553 # Creating format selector here allows us to catch syntax errors before the extraction
554 self.format_selector = (
555 None if self.params.get('format') is None
556 else self.build_format_selector(self.params['format']))
557
dca08720
PH
558 self._setup_opener()
559
4cd0a709 560 """Preload the archive, if any is specified"""
561 def preload_download_archive(fn):
562 if fn is None:
563 return False
0760b0a7 564 self.write_debug('Loading archive file %r\n' % fn)
4cd0a709 565 try:
566 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
567 for line in archive_file:
568 self.archive.add(line.strip())
569 except IOError as ioe:
570 if ioe.errno != errno.ENOENT:
571 raise
572 return False
573 return True
574
575 self.archive = set()
576 preload_download_archive(self.params.get('download_archive'))
577
3511266b
PH
578 if auto_init:
579 self.print_debug_header()
580 self.add_default_info_extractors()
581
4f026faf 582 for pp_def_raw in self.params.get('postprocessors', []):
4f026faf 583 pp_def = dict(pp_def_raw)
fd7cfb64 584 when = pp_def.pop('when', 'post_process')
585 pp_class = get_postprocessor(pp_def.pop('key'))
4f026faf 586 pp = pp_class(self, **compat_kwargs(pp_def))
5bfa4862 587 self.add_post_processor(pp, when=when)
4f026faf 588
ab8e5e51
AM
589 for ph in self.params.get('post_hooks', []):
590 self.add_post_hook(ph)
591
71b640cc
PH
592 for ph in self.params.get('progress_hooks', []):
593 self.add_progress_hook(ph)
594
51fb4995
YCH
595 register_socks_protocols()
596
7d4111ed
PH
597 def warn_if_short_id(self, argv):
598 # short YouTube ID starting with dash?
599 idxs = [
600 i for i, a in enumerate(argv)
601 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
602 if idxs:
603 correct_argv = (
7a5c1cfe 604 ['yt-dlp']
3089bc74
S
605 + [a for i, a in enumerate(argv) if i not in idxs]
606 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
607 )
608 self.report_warning(
609 'Long argument string detected. '
610 'Use -- to separate parameters and URLs, like this:\n%s\n' %
611 args_to_str(correct_argv))
612
8222d8de
JMF
613 def add_info_extractor(self, ie):
614 """Add an InfoExtractor object to the end of the list."""
615 self._ies.append(ie)
e52d7f85
JMF
616 if not isinstance(ie, type):
617 self._ies_instances[ie.ie_key()] = ie
618 ie.set_downloader(self)
8222d8de 619
56c73665
JMF
620 def get_info_extractor(self, ie_key):
621 """
622 Get an instance of an IE with name ie_key, it will try to get one from
623 the _ies list, if there's no instance it will create a new one and add
624 it to the extractor list.
625 """
626 ie = self._ies_instances.get(ie_key)
627 if ie is None:
628 ie = get_info_extractor(ie_key)()
629 self.add_info_extractor(ie)
630 return ie
631
023fa8c4
JMF
632 def add_default_info_extractors(self):
633 """
634 Add the InfoExtractors returned by gen_extractors to the end of the list
635 """
e52d7f85 636 for ie in gen_extractor_classes():
023fa8c4
JMF
637 self.add_info_extractor(ie)
638
56d868db 639 def add_post_processor(self, pp, when='post_process'):
8222d8de 640 """Add a PostProcessor object to the end of the chain."""
5bfa4862 641 self._pps[when].append(pp)
8222d8de
JMF
642 pp.set_downloader(self)
643
ab8e5e51
AM
644 def add_post_hook(self, ph):
645 """Add the post hook"""
646 self._post_hooks.append(ph)
647
933605d7
JMF
648 def add_progress_hook(self, ph):
649 """Add the progress hook (currently only for the file downloader)"""
650 self._progress_hooks.append(ph)
8ab470f1 651
1c088fa8 652 def _bidi_workaround(self, message):
5d681e96 653 if not hasattr(self, '_output_channel'):
1c088fa8
PH
654 return message
655
5d681e96 656 assert hasattr(self, '_output_process')
11b85ce6 657 assert isinstance(message, compat_str)
6febd1c1
PH
658 line_count = message.count('\n') + 1
659 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 660 self._output_process.stdin.flush()
6febd1c1 661 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 662 for _ in range(line_count))
6febd1c1 663 return res[:-len('\n')]
1c088fa8 664
b35496d8 665 def _write_string(self, message, out=None, only_once=False):
666 if only_once:
667 if message in self._printed_messages:
668 return
669 self._printed_messages.add(message)
670 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 671
848887eb 672 def to_stdout(self, message, skip_eol=False, quiet=False):
0760b0a7 673 """Print message to stdout"""
8bf9319e 674 if self.params.get('logger'):
43afe285 675 self.params['logger'].debug(message)
835a1478 676 elif not quiet or self.params.get('verbose'):
677 self._write_string(
678 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
679 self._err_file if quiet else self._screen_file)
8222d8de 680
b35496d8 681 def to_stderr(self, message, only_once=False):
0760b0a7 682 """Print message to stderr"""
11b85ce6 683 assert isinstance(message, compat_str)
8bf9319e 684 if self.params.get('logger'):
43afe285
IB
685 self.params['logger'].error(message)
686 else:
b35496d8 687 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
8222d8de 688
1e5b9a95
PH
689 def to_console_title(self, message):
690 if not self.params.get('consoletitle', False):
691 return
4bede0d8
C
692 if compat_os_name == 'nt':
693 if ctypes.windll.kernel32.GetConsoleWindow():
694 # c_wchar_p() might not be necessary if `message` is
695 # already of type unicode()
696 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
1e5b9a95 697 elif 'TERM' in os.environ:
b46696bd 698 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 699
bdde425c
PH
700 def save_console_title(self):
701 if not self.params.get('consoletitle', False):
702 return
94c3442e
S
703 if self.params.get('simulate', False):
704 return
4bede0d8 705 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 706 # Save the title on stack
734f90bb 707 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
708
709 def restore_console_title(self):
710 if not self.params.get('consoletitle', False):
711 return
94c3442e
S
712 if self.params.get('simulate', False):
713 return
4bede0d8 714 if compat_os_name != 'nt' and 'TERM' in os.environ:
efd6c574 715 # Restore the title from stack
734f90bb 716 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
717
718 def __enter__(self):
719 self.save_console_title()
720 return self
721
722 def __exit__(self, *args):
723 self.restore_console_title()
f89197d7 724
dca08720 725 if self.params.get('cookiefile') is not None:
1bab3437 726 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 727
8222d8de
JMF
728 def trouble(self, message=None, tb=None):
729 """Determine action to take when a download problem appears.
730
731 Depending on if the downloader has been configured to ignore
732 download errors or not, this method may throw an exception or
733 not when errors are found, after printing the message.
734
735 tb, if given, is additional traceback information.
736 """
737 if message is not None:
738 self.to_stderr(message)
739 if self.params.get('verbose'):
740 if tb is None:
741 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 742 tb = ''
8222d8de 743 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 744 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 745 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
746 else:
747 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 748 tb = ''.join(tb_data)
c19bc311 749 if tb:
750 self.to_stderr(tb)
8222d8de
JMF
751 if not self.params.get('ignoreerrors', False):
752 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
753 exc_info = sys.exc_info()[1].exc_info
754 else:
755 exc_info = sys.exc_info()
756 raise DownloadError(message, exc_info)
757 self._download_retcode = 1
758
0760b0a7 759 def to_screen(self, message, skip_eol=False):
760 """Print message to stdout if not in quiet mode"""
761 self.to_stdout(
762 message, skip_eol, quiet=self.params.get('quiet', False))
763
c84aeac6 764 def report_warning(self, message, only_once=False):
8222d8de
JMF
765 '''
766 Print the message to stderr, it will be prefixed with 'WARNING:'
767 If stderr is a tty file the 'WARNING:' will be colored
768 '''
6d07ce01
JMF
769 if self.params.get('logger') is not None:
770 self.params['logger'].warning(message)
8222d8de 771 else:
ad8915b7
PH
772 if self.params.get('no_warnings'):
773 return
e9c0cdd3 774 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
775 _msg_header = '\033[0;33mWARNING:\033[0m'
776 else:
777 _msg_header = 'WARNING:'
778 warning_message = '%s %s' % (_msg_header, message)
b35496d8 779 self.to_stderr(warning_message, only_once)
8222d8de
JMF
780
781 def report_error(self, message, tb=None):
782 '''
783 Do the same as trouble, but prefixes the message with 'ERROR:', colored
784 in red if stderr is a tty file.
785 '''
e9c0cdd3 786 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 787 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 788 else:
6febd1c1
PH
789 _msg_header = 'ERROR:'
790 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
791 self.trouble(error_message, tb)
792
b35496d8 793 def write_debug(self, message, only_once=False):
0760b0a7 794 '''Log debug message or Print message to stderr'''
795 if not self.params.get('verbose', False):
796 return
797 message = '[debug] %s' % message
798 if self.params.get('logger'):
799 self.params['logger'].debug(message)
800 else:
b35496d8 801 self.to_stderr(message, only_once)
0760b0a7 802
8222d8de
JMF
803 def report_file_already_downloaded(self, file_name):
804 """Report file has already been fully downloaded."""
805 try:
6febd1c1 806 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 807 except UnicodeEncodeError:
6febd1c1 808 self.to_screen('[download] The file has already been downloaded')
8222d8de 809
0c3d0f51 810 def report_file_delete(self, file_name):
811 """Report that existing file will be deleted."""
812 try:
c25228e5 813 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 814 except UnicodeEncodeError:
c25228e5 815 self.to_screen('Deleting existing file')
0c3d0f51 816
de6000d9 817 def parse_outtmpl(self):
818 outtmpl_dict = self.params.get('outtmpl', {})
819 if not isinstance(outtmpl_dict, dict):
820 outtmpl_dict = {'default': outtmpl_dict}
821 outtmpl_dict.update({
822 k: v for k, v in DEFAULT_OUTTMPL.items()
823 if not outtmpl_dict.get(k)})
824 for key, val in outtmpl_dict.items():
825 if isinstance(val, bytes):
826 self.report_warning(
827 'Parameter outtmpl is bytes, but should be a unicode string. '
828 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
829 return outtmpl_dict
830
21cd8fae 831 def get_output_path(self, dir_type='', filename=None):
832 paths = self.params.get('paths', {})
833 assert isinstance(paths, dict)
834 path = os.path.join(
835 expand_path(paths.get('home', '').strip()),
836 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
837 filename or '')
838
839 # Temporary fix for #4787
840 # 'Treat' all problem characters by passing filename through preferredencoding
841 # to workaround encoding issues with subprocess on python2 @ Windows
842 if sys.version_info < (3, 0) and sys.platform == 'win32':
843 path = encodeFilename(path, True).decode(preferredencoding())
844 return sanitize_path(path, force=self.params.get('windowsfilenames'))
845
76a264ac 846 @staticmethod
847 def validate_outtmpl(tmpl):
848 ''' @return None or Exception object '''
849 try:
850 re.sub(
851 STR_FORMAT_RE.format(''),
852 lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
853 tmpl
854 ) % collections.defaultdict(int)
855 return None
856 except ValueError as err:
857 return err
858
143db31d 859 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
860 """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
752cda38 861 info_dict = dict(info_dict)
a439a3a4 862 na = self.params.get('outtmpl_na_placeholder', 'NA')
143db31d 863
752cda38 864 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 865 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 866 if info_dict.get('duration', None) is not None
867 else None)
752cda38 868 info_dict['epoch'] = int(time.time())
869 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
870 if info_dict.get('resolution') is None:
871 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 872
143db31d 873 # For fields playlist_index and autonumber convert all occurrences
874 # of %(field)s to %(field)0Nd for backward compatibility
875 field_size_compat_map = {
752cda38 876 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
877 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 878 }
752cda38 879
385a27fa 880 TMPL_DICT = {}
881 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
882 MATH_FUNCTIONS = {
883 '+': float.__add__,
884 '-': float.__sub__,
885 }
e625be0d 886 # Field is of the form key1.key2...
887 # where keys (except first) can be string, int or slice
385a27fa 888 FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
889 MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
890 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 891 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
892 (?P<negate>-)?
385a27fa 893 (?P<fields>{field})
894 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 895 (?:>(?P<strf_format>.+?))?
896 (?:\|(?P<default>.*?))?
385a27fa 897 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 898
76a264ac 899 get_key = lambda k: traverse_obj(
900 info_dict, k.split('.'), is_user_input=True, traverse_string=True)
901
752cda38 902 def get_value(mdict):
903 # Object traversal
76a264ac 904 value = get_key(mdict['fields'])
752cda38 905 # Negative
906 if mdict['negate']:
907 value = float_or_none(value)
908 if value is not None:
909 value *= -1
910 # Do maths
385a27fa 911 offset_key = mdict['maths']
912 if offset_key:
752cda38 913 value = float_or_none(value)
914 operator = None
385a27fa 915 while offset_key:
916 item = re.match(
917 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
918 offset_key).group(0)
919 offset_key = offset_key[len(item):]
920 if operator is None:
752cda38 921 operator = MATH_FUNCTIONS[item]
385a27fa 922 continue
923 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
924 offset = float_or_none(item)
925 if offset is None:
926 offset = float_or_none(get_key(item))
927 try:
928 value = operator(value, multiplier * offset)
929 except (TypeError, ZeroDivisionError):
930 return None
931 operator = None
752cda38 932 # Datetime formatting
933 if mdict['strf_format']:
934 value = strftime_or_none(value, mdict['strf_format'])
935
936 return value
937
938 def create_key(outer_mobj):
939 if not outer_mobj.group('has_key'):
940 return '%{}'.format(outer_mobj.group(0))
941
942 key = outer_mobj.group('key')
943 fmt = outer_mobj.group('format')
944 mobj = re.match(INTERNAL_FORMAT_RE, key)
945 if mobj is None:
9fea350f 946 value, default, mobj = None, na, {'fields': ''}
752cda38 947 else:
e625be0d 948 mobj = mobj.groupdict()
752cda38 949 default = mobj['default'] if mobj['default'] is not None else na
950 value = get_value(mobj)
951
952 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
953 fmt = '0{:d}d'.format(field_size_compat_map[key])
954
955 value = default if value is None else value
752cda38 956
76a264ac 957 if fmt == 'c':
958 value = compat_str(value)
959 if value is None:
960 value, fmt = default, 's'
961 else:
962 value = value[0]
963 elif fmt[-1] not in 'rs': # numeric
a439a3a4 964 value = float_or_none(value)
752cda38 965 if value is None:
966 value, fmt = default, 's'
967 if sanitize:
968 if fmt[-1] == 'r':
969 # If value is an object, sanitize might convert it to a string
970 # So we convert it to repr first
971 value, fmt = repr(value), '%ss' % fmt[:-1]
639f1cea 972 if fmt[-1] in 'csr':
9fea350f 973 value = sanitize(mobj['fields'].split('.')[-1], value)
974 key += '\0%s' % fmt
385a27fa 975 TMPL_DICT[key] = value
752cda38 976 return '%({key}){fmt}'.format(key=key, fmt=fmt)
977
385a27fa 978 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 979
de6000d9 980 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 981 try:
586a91b6 982 sanitize = lambda k, v: sanitize_filename(
45598aab 983 compat_str(v),
1bb5c511 984 restricted=self.params.get('restrictfilenames'),
40df485f 985 is_id=(k == 'id' or k.endswith('_id')))
de6000d9 986 outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
143db31d 987 outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
d0d9ade4 988
15da37c7
S
989 # expand_path translates '%%' into '%' and '$$' into '$'
990 # correspondingly that is not what we want since we need to keep
991 # '%%' intact for template dict substitution step. Working around
992 # with boundary-alike separator hack.
961ea474 993 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
15da37c7
S
994 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
995
996 # outtmpl should be expand_path'ed before template dict substitution
997 # because meta fields may contain env variables we don't want to
998 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
999 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1000 filename = expand_path(outtmpl).replace(sep, '') % template_dict
1001
143db31d 1002 force_ext = OUTTMPL_TYPES.get(tmpl_type)
de6000d9 1003 if force_ext is not None:
752cda38 1004 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1005
bdc3fd2f
U
1006 # https://github.com/blackjack4494/youtube-dlc/issues/85
1007 trim_file_name = self.params.get('trim_file_name', False)
1008 if trim_file_name:
1009 fn_groups = filename.rsplit('.')
1010 ext = fn_groups[-1]
1011 sub_ext = ''
1012 if len(fn_groups) > 2:
1013 sub_ext = fn_groups[-2]
1014 filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
1015
0202b52a 1016 return filename
8222d8de 1017 except ValueError as err:
6febd1c1 1018 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1019 return None
1020
de6000d9 1021 def prepare_filename(self, info_dict, dir_type='', warn=False):
1022 """Generate the output filename."""
21cd8fae 1023
de6000d9 1024 filename = self._prepare_filename(info_dict, dir_type or 'default')
1025
c84aeac6 1026 if warn:
21cd8fae 1027 if not self.params.get('paths'):
de6000d9 1028 pass
1029 elif filename == '-':
c84aeac6 1030 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1031 elif os.path.isabs(filename):
c84aeac6 1032 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1033 self.__prepare_filename_warned = True
1034 if filename == '-' or not filename:
1035 return filename
1036
21cd8fae 1037 return self.get_output_path(dir_type, filename)
0202b52a 1038
120fe513 1039 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1040 """ Returns None if the file should be downloaded """
8222d8de 1041
c77495e3 1042 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1043
8b0d7497 1044 def check_filter():
8b0d7497 1045 if 'title' in info_dict:
1046 # This can happen when we're just evaluating the playlist
1047 title = info_dict['title']
1048 matchtitle = self.params.get('matchtitle', False)
1049 if matchtitle:
1050 if not re.search(matchtitle, title, re.IGNORECASE):
1051 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1052 rejecttitle = self.params.get('rejecttitle', False)
1053 if rejecttitle:
1054 if re.search(rejecttitle, title, re.IGNORECASE):
1055 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1056 date = info_dict.get('upload_date')
1057 if date is not None:
1058 dateRange = self.params.get('daterange', DateRange())
1059 if date not in dateRange:
1060 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1061 view_count = info_dict.get('view_count')
1062 if view_count is not None:
1063 min_views = self.params.get('min_views')
1064 if min_views is not None and view_count < min_views:
1065 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1066 max_views = self.params.get('max_views')
1067 if max_views is not None and view_count > max_views:
1068 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1069 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1070 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1071
1072 if not incomplete:
1073 match_filter = self.params.get('match_filter')
1074 if match_filter is not None:
1075 ret = match_filter(info_dict)
1076 if ret is not None:
1077 return ret
1078 return None
1079
c77495e3 1080 if self.in_download_archive(info_dict):
1081 reason = '%s has already been recorded in the archive' % video_title
1082 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1083 else:
1084 reason = check_filter()
1085 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1086 if reason is not None:
120fe513 1087 if not silent:
1088 self.to_screen('[download] ' + reason)
c77495e3 1089 if self.params.get(break_opt, False):
1090 raise break_err()
8b0d7497 1091 return reason
fe7e0c98 1092
b6c45014
JMF
1093 @staticmethod
1094 def add_extra_info(info_dict, extra_info):
1095 '''Set the keys from extra_info in info dict if they are missing'''
1096 for key, value in extra_info.items():
1097 info_dict.setdefault(key, value)
1098
58f197b7 1099 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 1100 process=True, force_generic_extractor=False):
41d1cca3 1101 """
1102 Return a list with a dictionary for each video extracted.
1103
1104 Arguments:
1105 url -- URL to extract
1106
1107 Keyword arguments:
1108 download -- whether to download videos during extraction
1109 ie_key -- extractor key hint
1110 extra_info -- dictionary containing the extra values to add to each result
1111 process -- whether to resolve all unresolved references (URLs, playlist items),
1112 must be True for download to work.
1113 force_generic_extractor -- force using the generic extractor
1114 """
fe7e0c98 1115
61aa5ba3 1116 if not ie_key and force_generic_extractor:
d22dec74
S
1117 ie_key = 'Generic'
1118
8222d8de 1119 if ie_key:
56c73665 1120 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
1121 else:
1122 ies = self._ies
1123
1124 for ie in ies:
1125 if not ie.suitable(url):
1126 continue
1127
9a68de12 1128 ie_key = ie.ie_key()
1129 ie = self.get_info_extractor(ie_key)
8222d8de 1130 if not ie.working():
6febd1c1
PH
1131 self.report_warning('The program functionality for this site has been marked as broken, '
1132 'and will probably not work.')
8222d8de
JMF
1133
1134 try:
d0757229 1135 temp_id = str_or_none(
63be1aab 1136 ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
1137 else ie._match_id(url))
a0566bbf 1138 except (AssertionError, IndexError, AttributeError):
1139 temp_id = None
1140 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1141 self.to_screen("[%s] %s: has already been recorded in archive" % (
1142 ie_key, temp_id))
1143 break
58f197b7 1144 return self.__extract_info(url, ie, download, extra_info, process)
a0566bbf 1145 else:
1146 self.report_error('no suitable InfoExtractor for URL %s' % url)
1147
cc9d1493 1148 def __handle_extraction_exceptions(func, handle_all_errors=True):
a0566bbf 1149 def wrapper(self, *args, **kwargs):
1150 try:
1151 return func(self, *args, **kwargs)
773f291d
S
1152 except GeoRestrictedError as e:
1153 msg = e.msg
1154 if e.countries:
1155 msg += '\nThis video is available in %s.' % ', '.join(
1156 map(ISO3166Utils.short2full, e.countries))
1157 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1158 self.report_error(msg)
fb043a6e 1159 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 1160 self.report_error(compat_str(e), e.format_traceback())
51d9739f 1161 except ThrottledDownload:
1162 self.to_stderr('\r')
1163 self.report_warning('The download speed is below throttle limit. Re-extracting data')
1164 return wrapper(self, *args, **kwargs)
8b0d7497 1165 except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
d3e5bbf4 1166 raise
8222d8de 1167 except Exception as e:
cc9d1493 1168 if handle_all_errors and self.params.get('ignoreerrors', False):
9b9c5355 1169 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
1170 else:
1171 raise
a0566bbf 1172 return wrapper
1173
1174 @__handle_extraction_exceptions
58f197b7 1175 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1176 ie_result = ie.extract(url)
1177 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1178 return
1179 if isinstance(ie_result, list):
1180 # Backwards compatibility: old IE result format
1181 ie_result = {
1182 '_type': 'compat_list',
1183 'entries': ie_result,
1184 }
e37d0efb 1185 if extra_info.get('original_url'):
1186 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1187 self.add_default_extra_info(ie_result, ie, url)
1188 if process:
1189 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1190 else:
a0566bbf 1191 return ie_result
fe7e0c98 1192
ea38e55f 1193 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1194 if url is not None:
1195 self.add_extra_info(ie_result, {
1196 'webpage_url': url,
1197 'original_url': url,
1198 'webpage_url_basename': url_basename(url),
1199 })
1200 if ie is not None:
1201 self.add_extra_info(ie_result, {
1202 'extractor': ie.IE_NAME,
1203 'extractor_key': ie.ie_key(),
1204 })
ea38e55f 1205
8222d8de
JMF
1206 def process_ie_result(self, ie_result, download=True, extra_info={}):
1207 """
1208 Take the result of the ie(may be modified) and resolve all unresolved
1209 references (URLs, playlist items).
1210
1211 It will also download the videos if 'download'.
1212 Returns the resolved ie_result.
1213 """
e8ee972c
PH
1214 result_type = ie_result.get('_type', 'video')
1215
057a5206 1216 if result_type in ('url', 'url_transparent'):
134c6ea8 1217 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1218 if ie_result.get('original_url'):
1219 extra_info.setdefault('original_url', ie_result['original_url'])
1220
057a5206 1221 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1222 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1223 or extract_flat is True):
ecb54191 1224 info_copy = ie_result.copy()
1225 self.add_extra_info(info_copy, extra_info)
6033d980 1226 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1227 self.add_default_extra_info(info_copy, ie, ie_result['url'])
ecb54191 1228 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
e8ee972c
PH
1229 return ie_result
1230
8222d8de 1231 if result_type == 'video':
b6c45014 1232 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1233 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1234 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1235 if additional_urls:
1236 # TODO: Improve MetadataFromFieldPP to allow setting a list
1237 if isinstance(additional_urls, compat_str):
1238 additional_urls = [additional_urls]
1239 self.to_screen(
1240 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1241 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1242 ie_result['additional_entries'] = [
1243 self.extract_info(
1244 url, download, extra_info,
1245 force_generic_extractor=self.params.get('force_generic_extractor'))
1246 for url in additional_urls
1247 ]
1248 return ie_result
8222d8de
JMF
1249 elif result_type == 'url':
1250 # We have to add extra_info to the results because it may be
1251 # contained in a playlist
07cce701 1252 return self.extract_info(
1253 ie_result['url'], download,
1254 ie_key=ie_result.get('ie_key'),
1255 extra_info=extra_info)
7fc3fa05
PH
1256 elif result_type == 'url_transparent':
1257 # Use the information from the embedding page
1258 info = self.extract_info(
1259 ie_result['url'], ie_key=ie_result.get('ie_key'),
1260 extra_info=extra_info, download=False, process=False)
1261
1640eb09
S
1262 # extract_info may return None when ignoreerrors is enabled and
1263 # extraction failed with an error, don't crash and return early
1264 # in this case
1265 if not info:
1266 return info
1267
412c617d
PH
1268 force_properties = dict(
1269 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1270 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1271 if f in force_properties:
1272 del force_properties[f]
1273 new_result = info.copy()
1274 new_result.update(force_properties)
7fc3fa05 1275
0563f7ac
S
1276 # Extracted info may not be a video result (i.e.
1277 # info.get('_type', 'video') != video) but rather an url or
1278 # url_transparent. In such cases outer metadata (from ie_result)
1279 # should be propagated to inner one (info). For this to happen
1280 # _type of info should be overridden with url_transparent. This
067aa17e 1281 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1282 if new_result.get('_type') == 'url':
1283 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1284
1285 return self.process_ie_result(
1286 new_result, download=download, extra_info=extra_info)
40fcba5e 1287 elif result_type in ('playlist', 'multi_video'):
30a074c2 1288 # Protect from infinite recursion due to recursively nested playlists
1289 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1290 webpage_url = ie_result['webpage_url']
1291 if webpage_url in self._playlist_urls:
7e85e872 1292 self.to_screen(
30a074c2 1293 '[download] Skipping already downloaded playlist: %s'
1294 % ie_result.get('title') or ie_result.get('id'))
1295 return
7e85e872 1296
30a074c2 1297 self._playlist_level += 1
1298 self._playlist_urls.add(webpage_url)
bc516a3f 1299 self._sanitize_thumbnails(ie_result)
30a074c2 1300 try:
1301 return self.__process_playlist(ie_result, download)
1302 finally:
1303 self._playlist_level -= 1
1304 if not self._playlist_level:
1305 self._playlist_urls.clear()
8222d8de 1306 elif result_type == 'compat_list':
c9bf4114
PH
1307 self.report_warning(
1308 'Extractor %s returned a compat_list result. '
1309 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1310
8222d8de 1311 def _fixup(r):
9e1a5b84
JW
1312 self.add_extra_info(
1313 r,
9103bbc5
JMF
1314 {
1315 'extractor': ie_result['extractor'],
1316 'webpage_url': ie_result['webpage_url'],
29eb5174 1317 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 1318 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
1319 }
1320 )
8222d8de
JMF
1321 return r
1322 ie_result['entries'] = [
b6c45014 1323 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1324 for r in ie_result['entries']
1325 ]
1326 return ie_result
1327 else:
1328 raise Exception('Invalid result type: %s' % result_type)
1329
e92caff5 1330 def _ensure_dir_exists(self, path):
1331 return make_dir(path, self.report_error)
1332
30a074c2 1333 def __process_playlist(self, ie_result, download):
1334 # We process each entry in the playlist
1335 playlist = ie_result.get('title') or ie_result.get('id')
1336 self.to_screen('[download] Downloading playlist: %s' % playlist)
1337
498f5606 1338 if 'entries' not in ie_result:
1339 raise EntryNotInPlaylist()
1340 incomplete_entries = bool(ie_result.get('requested_entries'))
1341 if incomplete_entries:
1342 def fill_missing_entries(entries, indexes):
1343 ret = [None] * max(*indexes)
1344 for i, entry in zip(indexes, entries):
1345 ret[i - 1] = entry
1346 return ret
1347 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1348
30a074c2 1349 playlist_results = []
1350
56a8fb4f 1351 playliststart = self.params.get('playliststart', 1)
30a074c2 1352 playlistend = self.params.get('playlistend')
1353 # For backwards compatibility, interpret -1 as whole list
1354 if playlistend == -1:
1355 playlistend = None
1356
1357 playlistitems_str = self.params.get('playlist_items')
1358 playlistitems = None
1359 if playlistitems_str is not None:
1360 def iter_playlistitems(format):
1361 for string_segment in format.split(','):
1362 if '-' in string_segment:
1363 start, end = string_segment.split('-')
1364 for item in range(int(start), int(end) + 1):
1365 yield int(item)
1366 else:
1367 yield int(string_segment)
1368 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1369
1370 ie_entries = ie_result['entries']
56a8fb4f 1371 msg = (
1372 'Downloading %d videos' if not isinstance(ie_entries, list)
1373 else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
1374 if not isinstance(ie_entries, (list, PagedList)):
1375 ie_entries = LazyList(ie_entries)
1376
50fed816 1377 def get_entry(i):
1378 return YoutubeDL.__handle_extraction_exceptions(
cc9d1493 1379 lambda self, i: ie_entries[i - 1],
1380 False
50fed816 1381 )(self, i)
1382
56a8fb4f 1383 entries = []
1384 for i in playlistitems or itertools.count(playliststart):
1385 if playlistitems is None and playlistend is not None and playlistend < i:
1386 break
1387 entry = None
1388 try:
50fed816 1389 entry = get_entry(i)
56a8fb4f 1390 if entry is None:
498f5606 1391 raise EntryNotInPlaylist()
56a8fb4f 1392 except (IndexError, EntryNotInPlaylist):
1393 if incomplete_entries:
1394 raise EntryNotInPlaylist()
1395 elif not playlistitems:
1396 break
1397 entries.append(entry)
120fe513 1398 try:
1399 if entry is not None:
1400 self._match_entry(entry, incomplete=True, silent=True)
1401 except (ExistingVideoReached, RejectedVideoReached):
1402 break
56a8fb4f 1403 ie_result['entries'] = entries
30a074c2 1404
56a8fb4f 1405 # Save playlist_index before re-ordering
1406 entries = [
1407 ((playlistitems[i - 1] if playlistitems else i), entry)
1408 for i, entry in enumerate(entries, 1)
1409 if entry is not None]
1410 n_entries = len(entries)
498f5606 1411
498f5606 1412 if not playlistitems and (playliststart or playlistend):
56a8fb4f 1413 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1414 ie_result['requested_entries'] = playlistitems
1415
1416 if self.params.get('allow_playlist_files', True):
1417 ie_copy = {
1418 'playlist': playlist,
1419 'playlist_id': ie_result.get('id'),
1420 'playlist_title': ie_result.get('title'),
1421 'playlist_uploader': ie_result.get('uploader'),
1422 'playlist_uploader_id': ie_result.get('uploader_id'),
71729754 1423 'playlist_index': 0,
498f5606 1424 }
1425 ie_copy.update(dict(ie_result))
1426
1427 if self.params.get('writeinfojson', False):
1428 infofn = self.prepare_filename(ie_copy, 'pl_infojson')
1429 if not self._ensure_dir_exists(encodeFilename(infofn)):
1430 return
1431 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
1432 self.to_screen('[info] Playlist metadata is already present')
1433 else:
1434 self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
1435 try:
1436 write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
1437 except (OSError, IOError):
1438 self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
1439
681de68e 1440 # TODO: This should be passed to ThumbnailsConvertor if necessary
1441 self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1442
498f5606 1443 if self.params.get('writedescription', False):
1444 descfn = self.prepare_filename(ie_copy, 'pl_description')
1445 if not self._ensure_dir_exists(encodeFilename(descfn)):
1446 return
1447 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
1448 self.to_screen('[info] Playlist description is already present')
1449 elif ie_result.get('description') is None:
1450 self.report_warning('There\'s no playlist description to write.')
1451 else:
1452 try:
1453 self.to_screen('[info] Writing playlist description to: ' + descfn)
1454 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1455 descfile.write(ie_result['description'])
1456 except (OSError, IOError):
1457 self.report_error('Cannot write playlist description file ' + descfn)
1458 return
30a074c2 1459
1460 if self.params.get('playlistreverse', False):
1461 entries = entries[::-1]
30a074c2 1462 if self.params.get('playlistrandom', False):
1463 random.shuffle(entries)
1464
1465 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1466
56a8fb4f 1467 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1468 failures = 0
1469 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1470 for i, entry_tuple in enumerate(entries, 1):
1471 playlist_index, entry = entry_tuple
53ed7066 1472 if 'playlist_index' in self.params.get('compat_options', []):
1473 playlist_index = playlistitems[i - 1] if playlistitems else i
30a074c2 1474 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1475 # This __x_forwarded_for_ip thing is a bit ugly but requires
1476 # minimal changes
1477 if x_forwarded_for:
1478 entry['__x_forwarded_for_ip'] = x_forwarded_for
1479 extra = {
1480 'n_entries': n_entries,
f59ae581 1481 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
71729754 1482 'playlist_index': playlist_index,
1483 'playlist_autonumber': i,
30a074c2 1484 'playlist': playlist,
1485 'playlist_id': ie_result.get('id'),
1486 'playlist_title': ie_result.get('title'),
1487 'playlist_uploader': ie_result.get('uploader'),
1488 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1489 'extractor': ie_result['extractor'],
1490 'webpage_url': ie_result['webpage_url'],
1491 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1492 'extractor_key': ie_result['extractor_key'],
1493 }
1494
1495 if self._match_entry(entry, incomplete=True) is not None:
1496 continue
1497
1498 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1499 if not entry_result:
1500 failures += 1
1501 if failures >= max_failures:
1502 self.report_error(
1503 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1504 break
30a074c2 1505 # TODO: skip failed (empty) entries?
1506 playlist_results.append(entry_result)
1507 ie_result['entries'] = playlist_results
1508 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1509 return ie_result
1510
a0566bbf 1511 @__handle_extraction_exceptions
1512 def __process_iterable_entry(self, entry, download, extra_info):
1513 return self.process_ie_result(
1514 entry, download=download, extra_info=extra_info)
1515
67134eab
JMF
1516 def _build_format_filter(self, filter_spec):
1517 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1518
1519 OPERATORS = {
1520 '<': operator.lt,
1521 '<=': operator.le,
1522 '>': operator.gt,
1523 '>=': operator.ge,
1524 '=': operator.eq,
1525 '!=': operator.ne,
1526 }
67134eab 1527 operator_rex = re.compile(r'''(?x)\s*
187986a8 1528 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1529 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1530 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1531 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1532 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1533 if m:
1534 try:
1535 comparison_value = int(m.group('value'))
1536 except ValueError:
1537 comparison_value = parse_filesize(m.group('value'))
1538 if comparison_value is None:
1539 comparison_value = parse_filesize(m.group('value') + 'B')
1540 if comparison_value is None:
1541 raise ValueError(
1542 'Invalid value %r in format specification %r' % (
67134eab 1543 m.group('value'), filter_spec))
9ddb6925
S
1544 op = OPERATORS[m.group('op')]
1545
083c9df9 1546 if not m:
9ddb6925
S
1547 STR_OPERATORS = {
1548 '=': operator.eq,
10d33b34
YCH
1549 '^=': lambda attr, value: attr.startswith(value),
1550 '$=': lambda attr, value: attr.endswith(value),
1551 '*=': lambda attr, value: value in attr,
9ddb6925 1552 }
187986a8 1553 str_operator_rex = re.compile(r'''(?x)\s*
1554 (?P<key>[a-zA-Z0-9._-]+)\s*
1555 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1556 (?P<value>[a-zA-Z0-9._-]+)\s*
9ddb6925 1557 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1558 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925
S
1559 if m:
1560 comparison_value = m.group('value')
2cc779f4
S
1561 str_op = STR_OPERATORS[m.group('op')]
1562 if m.group('negation'):
e118a879 1563 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1564 else:
1565 op = str_op
083c9df9 1566
9ddb6925 1567 if not m:
187986a8 1568 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1569
1570 def _filter(f):
1571 actual_value = f.get(m.group('key'))
1572 if actual_value is None:
1573 return m.group('none_inclusive')
1574 return op(actual_value, comparison_value)
67134eab
JMF
1575 return _filter
1576
0017d9ad 1577 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1578
af0f7428
S
1579 def can_merge():
1580 merger = FFmpegMergerPP(self)
1581 return merger.available and merger.can_merge()
1582
91ebc640 1583 prefer_best = (
1584 not self.params.get('simulate', False)
1585 and download
1586 and (
1587 not can_merge()
19807826 1588 or info_dict.get('is_live', False)
de6000d9 1589 or self.outtmpl_dict['default'] == '-'))
53ed7066 1590 compat = (
1591 prefer_best
1592 or self.params.get('allow_multiple_audio_streams', False)
1593 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1594
1595 return (
53ed7066 1596 'best/bestvideo+bestaudio' if prefer_best
1597 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1598 else 'bestvideo+bestaudio/best')
0017d9ad 1599
67134eab
JMF
1600 def build_format_selector(self, format_spec):
1601 def syntax_error(note, start):
1602 message = (
1603 'Invalid format specification: '
1604 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1605 return SyntaxError(message)
1606
1607 PICKFIRST = 'PICKFIRST'
1608 MERGE = 'MERGE'
1609 SINGLE = 'SINGLE'
0130afb7 1610 GROUP = 'GROUP'
67134eab
JMF
1611 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1612
91ebc640 1613 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1614 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1615
e8e73840 1616 check_formats = self.params.get('check_formats')
1617
67134eab
JMF
1618 def _parse_filter(tokens):
1619 filter_parts = []
1620 for type, string, start, _, _ in tokens:
1621 if type == tokenize.OP and string == ']':
1622 return ''.join(filter_parts)
1623 else:
1624 filter_parts.append(string)
1625
232541df 1626 def _remove_unused_ops(tokens):
17cc1534 1627 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1628 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1629 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1630 last_string, last_start, last_end, last_line = None, None, None, None
1631 for type, string, start, end, line in tokens:
1632 if type == tokenize.OP and string == '[':
1633 if last_string:
1634 yield tokenize.NAME, last_string, last_start, last_end, last_line
1635 last_string = None
1636 yield type, string, start, end, line
1637 # everything inside brackets will be handled by _parse_filter
1638 for type, string, start, end, line in tokens:
1639 yield type, string, start, end, line
1640 if type == tokenize.OP and string == ']':
1641 break
1642 elif type == tokenize.OP and string in ALLOWED_OPS:
1643 if last_string:
1644 yield tokenize.NAME, last_string, last_start, last_end, last_line
1645 last_string = None
1646 yield type, string, start, end, line
1647 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1648 if not last_string:
1649 last_string = string
1650 last_start = start
1651 last_end = end
1652 else:
1653 last_string += string
1654 if last_string:
1655 yield tokenize.NAME, last_string, last_start, last_end, last_line
1656
cf2ac6df 1657 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1658 selectors = []
1659 current_selector = None
1660 for type, string, start, _, _ in tokens:
1661 # ENCODING is only defined in python 3.x
1662 if type == getattr(tokenize, 'ENCODING', None):
1663 continue
1664 elif type in [tokenize.NAME, tokenize.NUMBER]:
1665 current_selector = FormatSelector(SINGLE, string, [])
1666 elif type == tokenize.OP:
cf2ac6df
JMF
1667 if string == ')':
1668 if not inside_group:
1669 # ')' will be handled by the parentheses group
1670 tokens.restore_last_token()
67134eab 1671 break
cf2ac6df 1672 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1673 tokens.restore_last_token()
1674 break
cf2ac6df
JMF
1675 elif inside_choice and string == ',':
1676 tokens.restore_last_token()
1677 break
1678 elif string == ',':
0a31a350
JMF
1679 if not current_selector:
1680 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1681 selectors.append(current_selector)
1682 current_selector = None
1683 elif string == '/':
d96d604e
JMF
1684 if not current_selector:
1685 raise syntax_error('"/" must follow a format selector', start)
67134eab 1686 first_choice = current_selector
cf2ac6df 1687 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1688 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1689 elif string == '[':
1690 if not current_selector:
1691 current_selector = FormatSelector(SINGLE, 'best', [])
1692 format_filter = _parse_filter(tokens)
1693 current_selector.filters.append(format_filter)
0130afb7
JMF
1694 elif string == '(':
1695 if current_selector:
1696 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1697 group = _parse_format_selection(tokens, inside_group=True)
1698 current_selector = FormatSelector(GROUP, group, [])
67134eab 1699 elif string == '+':
d03cfdce 1700 if not current_selector:
1701 raise syntax_error('Unexpected "+"', start)
1702 selector_1 = current_selector
1703 selector_2 = _parse_format_selection(tokens, inside_merge=True)
1704 if not selector_2:
1705 raise syntax_error('Expected a selector', start)
1706 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
1707 else:
1708 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1709 elif type == tokenize.ENDMARKER:
1710 break
1711 if current_selector:
1712 selectors.append(current_selector)
1713 return selectors
1714
f8d4ad9a 1715 def _merge(formats_pair):
1716 format_1, format_2 = formats_pair
1717
1718 formats_info = []
1719 formats_info.extend(format_1.get('requested_formats', (format_1,)))
1720 formats_info.extend(format_2.get('requested_formats', (format_2,)))
1721
1722 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 1723 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 1724 for (i, fmt_info) in enumerate(formats_info):
551f9388 1725 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
1726 formats_info.pop(i)
1727 continue
1728 for aud_vid in ['audio', 'video']:
f8d4ad9a 1729 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
1730 if get_no_more[aud_vid]:
1731 formats_info.pop(i)
1732 get_no_more[aud_vid] = True
1733
1734 if len(formats_info) == 1:
1735 return formats_info[0]
1736
1737 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
1738 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
1739
1740 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
1741 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
1742
1743 output_ext = self.params.get('merge_output_format')
1744 if not output_ext:
1745 if the_only_video:
1746 output_ext = the_only_video['ext']
1747 elif the_only_audio and not video_fmts:
1748 output_ext = the_only_audio['ext']
1749 else:
1750 output_ext = 'mkv'
1751
1752 new_dict = {
1753 'requested_formats': formats_info,
1754 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
1755 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
1756 'ext': output_ext,
1757 }
1758
1759 if the_only_video:
1760 new_dict.update({
1761 'width': the_only_video.get('width'),
1762 'height': the_only_video.get('height'),
1763 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
1764 'fps': the_only_video.get('fps'),
1765 'vcodec': the_only_video.get('vcodec'),
1766 'vbr': the_only_video.get('vbr'),
1767 'stretched_ratio': the_only_video.get('stretched_ratio'),
1768 })
1769
1770 if the_only_audio:
1771 new_dict.update({
1772 'acodec': the_only_audio.get('acodec'),
1773 'abr': the_only_audio.get('abr'),
1774 })
1775
1776 return new_dict
1777
e8e73840 1778 def _check_formats(formats):
981052c9 1779 if not check_formats:
1780 yield from formats
b5ac45b1 1781 return
e8e73840 1782 for f in formats:
1783 self.to_screen('[info] Testing format %s' % f['format_id'])
21cd8fae 1784 temp_file = tempfile.NamedTemporaryFile(
1785 suffix='.tmp', delete=False,
1786 dir=self.get_output_path('temp') or None)
1787 temp_file.close()
fe346461 1788 try:
981052c9 1789 success, _ = self.dl(temp_file.name, f, test=True)
1790 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1791 success = False
fe346461 1792 finally:
21cd8fae 1793 if os.path.exists(temp_file.name):
1794 try:
1795 os.remove(temp_file.name)
1796 except OSError:
1797 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
981052c9 1798 if success:
e8e73840 1799 yield f
1800 else:
1801 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1802
67134eab 1803 def _build_selector_function(selector):
909d24dd 1804 if isinstance(selector, list): # ,
67134eab
JMF
1805 fs = [_build_selector_function(s) for s in selector]
1806
317f7ab6 1807 def selector_function(ctx):
67134eab 1808 for f in fs:
981052c9 1809 yield from f(ctx)
67134eab 1810 return selector_function
909d24dd 1811
1812 elif selector.type == GROUP: # ()
0130afb7 1813 selector_function = _build_selector_function(selector.selector)
909d24dd 1814
1815 elif selector.type == PICKFIRST: # /
67134eab
JMF
1816 fs = [_build_selector_function(s) for s in selector.selector]
1817
317f7ab6 1818 def selector_function(ctx):
67134eab 1819 for f in fs:
317f7ab6 1820 picked_formats = list(f(ctx))
67134eab
JMF
1821 if picked_formats:
1822 return picked_formats
1823 return []
67134eab 1824
981052c9 1825 elif selector.type == MERGE: # +
1826 selector_1, selector_2 = map(_build_selector_function, selector.selector)
1827
1828 def selector_function(ctx):
1829 for pair in itertools.product(
1830 selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
1831 yield _merge(pair)
1832
909d24dd 1833 elif selector.type == SINGLE: # atom
598d185d 1834 format_spec = selector.selector or 'best'
909d24dd 1835
f8d4ad9a 1836 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 1837 if format_spec == 'all':
1838 def selector_function(ctx):
981052c9 1839 yield from _check_formats(ctx['formats'])
f8d4ad9a 1840 elif format_spec == 'mergeall':
1841 def selector_function(ctx):
981052c9 1842 formats = list(_check_formats(ctx['formats']))
e01d6aa4 1843 if not formats:
1844 return
921b76ca 1845 merged_format = formats[-1]
1846 for f in formats[-2::-1]:
f8d4ad9a 1847 merged_format = _merge((merged_format, f))
1848 yield merged_format
909d24dd 1849
1850 else:
e8e73840 1851 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 1852 mobj = re.match(
1853 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
1854 format_spec)
1855 if mobj is not None:
1856 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 1857 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 1858 format_type = (mobj.group('type') or [None])[0]
1859 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
1860 format_modified = mobj.group('mod') is not None
909d24dd 1861
1862 format_fallback = not format_type and not format_modified # for b, w
8326b00a 1863 _filter_f = (
eff63539 1864 (lambda f: f.get('%scodec' % format_type) != 'none')
1865 if format_type and format_modified # bv*, ba*, wv*, wa*
1866 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
1867 if format_type # bv, ba, wv, wa
1868 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
1869 if not format_modified # b, w
8326b00a 1870 else lambda f: True) # b*, w*
1871 filter_f = lambda f: _filter_f(f) and (
1872 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 1873 else:
909d24dd 1874 filter_f = ((lambda f: f.get('ext') == format_spec)
1875 if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
1876 else (lambda f: f.get('format_id') == format_spec)) # id
1877
1878 def selector_function(ctx):
1879 formats = list(ctx['formats'])
909d24dd 1880 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 1881 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 1882 # for extractors with incomplete formats (audio only (soundcloud)
1883 # or video only (imgur)) best/worst will fallback to
1884 # best/worst {video,audio}-only format
e8e73840 1885 matches = formats
981052c9 1886 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
1887 try:
e8e73840 1888 yield matches[format_idx - 1]
981052c9 1889 except IndexError:
1890 return
083c9df9 1891
67134eab 1892 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1893
317f7ab6
S
1894 def final_selector(ctx):
1895 ctx_copy = copy.deepcopy(ctx)
67134eab 1896 for _filter in filters:
317f7ab6
S
1897 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1898 return selector_function(ctx_copy)
67134eab 1899 return final_selector
083c9df9 1900
67134eab 1901 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1902 try:
232541df 1903 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1904 except tokenize.TokenError:
1905 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1906
1907 class TokenIterator(object):
1908 def __init__(self, tokens):
1909 self.tokens = tokens
1910 self.counter = 0
1911
1912 def __iter__(self):
1913 return self
1914
1915 def __next__(self):
1916 if self.counter >= len(self.tokens):
1917 raise StopIteration()
1918 value = self.tokens[self.counter]
1919 self.counter += 1
1920 return value
1921
1922 next = __next__
1923
1924 def restore_last_token(self):
1925 self.counter -= 1
1926
1927 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1928 return _build_selector_function(parsed_selector)
a9c58ad9 1929
e5660ee6
JMF
1930 def _calc_headers(self, info_dict):
1931 res = std_headers.copy()
1932
1933 add_headers = info_dict.get('http_headers')
1934 if add_headers:
1935 res.update(add_headers)
1936
1937 cookies = self._calc_cookies(info_dict)
1938 if cookies:
1939 res['Cookie'] = cookies
1940
0016b84e
S
1941 if 'X-Forwarded-For' not in res:
1942 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1943 if x_forwarded_for_ip:
1944 res['X-Forwarded-For'] = x_forwarded_for_ip
1945
e5660ee6
JMF
1946 return res
1947
1948 def _calc_cookies(self, info_dict):
5c2266df 1949 pr = sanitized_Request(info_dict['url'])
e5660ee6 1950 self.cookiejar.add_cookie_header(pr)
662435f7 1951 return pr.get_header('Cookie')
e5660ee6 1952
b0249bca 1953 def _sanitize_thumbnails(self, info_dict):
bc516a3f 1954 thumbnails = info_dict.get('thumbnails')
1955 if thumbnails is None:
1956 thumbnail = info_dict.get('thumbnail')
1957 if thumbnail:
1958 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1959 if thumbnails:
1960 thumbnails.sort(key=lambda t: (
1961 t.get('preference') if t.get('preference') is not None else -1,
1962 t.get('width') if t.get('width') is not None else -1,
1963 t.get('height') if t.get('height') is not None else -1,
1964 t.get('id') if t.get('id') is not None else '',
1965 t.get('url')))
b0249bca 1966
0ba692ac 1967 def thumbnail_tester():
1968 if self.params.get('check_formats'):
cca80fe6 1969 test_all = True
1970 to_screen = lambda msg: self.to_screen(f'[info] {msg}')
0ba692ac 1971 else:
cca80fe6 1972 test_all = False
0ba692ac 1973 to_screen = self.write_debug
1974
1975 def test_thumbnail(t):
cca80fe6 1976 if not test_all and not t.get('_test_url'):
1977 return True
0ba692ac 1978 to_screen('Testing thumbnail %s' % t['id'])
1979 try:
1980 self.urlopen(HEADRequest(t['url']))
1981 except network_exceptions as err:
1982 to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
1983 t['id'], t['url'], error_to_compat_str(err)))
1984 return False
1985 return True
1986
1987 return test_thumbnail
b0249bca 1988
bc516a3f 1989 for i, t in enumerate(thumbnails):
bc516a3f 1990 if t.get('id') is None:
1991 t['id'] = '%d' % i
b0249bca 1992 if t.get('width') and t.get('height'):
1993 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1994 t['url'] = sanitize_url(t['url'])
0ba692ac 1995
1996 if self.params.get('check_formats') is not False:
1997 info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
1998 else:
1999 info_dict['thumbnails'] = thumbnails
bc516a3f 2000
dd82ffea
JMF
2001 def process_video_result(self, info_dict, download=True):
2002 assert info_dict.get('_type', 'video') == 'video'
2003
bec1fad2
PH
2004 if 'id' not in info_dict:
2005 raise ExtractorError('Missing "id" field in extractor result')
2006 if 'title' not in info_dict:
2007 raise ExtractorError('Missing "title" field in extractor result')
2008
c9969434
S
2009 def report_force_conversion(field, field_not, conversion):
2010 self.report_warning(
2011 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2012 % (field, field_not, conversion))
2013
2014 def sanitize_string_field(info, string_field):
2015 field = info.get(string_field)
2016 if field is None or isinstance(field, compat_str):
2017 return
2018 report_force_conversion(string_field, 'a string', 'string')
2019 info[string_field] = compat_str(field)
2020
2021 def sanitize_numeric_fields(info):
2022 for numeric_field in self._NUMERIC_FIELDS:
2023 field = info.get(numeric_field)
2024 if field is None or isinstance(field, compat_numeric_types):
2025 continue
2026 report_force_conversion(numeric_field, 'numeric', 'int')
2027 info[numeric_field] = int_or_none(field)
2028
2029 sanitize_string_field(info_dict, 'id')
2030 sanitize_numeric_fields(info_dict)
be6217b2 2031
dd82ffea
JMF
2032 if 'playlist' not in info_dict:
2033 # It isn't part of a playlist
2034 info_dict['playlist'] = None
2035 info_dict['playlist_index'] = None
2036
bc516a3f 2037 self._sanitize_thumbnails(info_dict)
d5519808 2038
536a55da 2039 thumbnail = info_dict.get('thumbnail')
bc516a3f 2040 thumbnails = info_dict.get('thumbnails')
536a55da
S
2041 if thumbnail:
2042 info_dict['thumbnail'] = sanitize_url(thumbnail)
2043 elif thumbnails:
d5519808
PH
2044 info_dict['thumbnail'] = thumbnails[-1]['url']
2045
ae30b840 2046 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2047 info_dict['display_id'] = info_dict['id']
2048
10db0d2f 2049 for ts_key, date_key in (
2050 ('timestamp', 'upload_date'),
2051 ('release_timestamp', 'release_date'),
2052 ):
2053 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2054 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2055 # see http://bugs.python.org/issue1646728)
2056 try:
2057 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2058 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2059 except (ValueError, OverflowError, OSError):
2060 pass
9d2ecdbc 2061
ae30b840 2062 live_keys = ('is_live', 'was_live')
2063 live_status = info_dict.get('live_status')
2064 if live_status is None:
2065 for key in live_keys:
2066 if info_dict.get(key) is False:
2067 continue
2068 if info_dict.get(key):
2069 live_status = key
2070 break
2071 if all(info_dict.get(key) is False for key in live_keys):
2072 live_status = 'not_live'
2073 if live_status:
2074 info_dict['live_status'] = live_status
2075 for key in live_keys:
2076 if info_dict.get(key) is None:
2077 info_dict[key] = (live_status == key)
2078
33d2fc2f
S
2079 # Auto generate title fields corresponding to the *_number fields when missing
2080 # in order to always have clean titles. This is very common for TV series.
2081 for field in ('chapter', 'season', 'episode'):
2082 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2083 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2084
05108a49
S
2085 for cc_kind in ('subtitles', 'automatic_captions'):
2086 cc = info_dict.get(cc_kind)
2087 if cc:
2088 for _, subtitle in cc.items():
2089 for subtitle_format in subtitle:
2090 if subtitle_format.get('url'):
2091 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2092 if subtitle_format.get('ext') is None:
2093 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2094
2095 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2096 subtitles = info_dict.get('subtitles')
4bba3716 2097
360e1ca5 2098 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2099 info_dict['id'], subtitles, automatic_captions)
a504ced0 2100
dd82ffea
JMF
2101 # We now pick which formats have to be downloaded
2102 if info_dict.get('formats') is None:
2103 # There's only one format available
2104 formats = [info_dict]
2105 else:
2106 formats = info_dict['formats']
2107
db95dc13 2108 if not formats:
b7da73eb 2109 if not self.params.get('ignore_no_formats_error'):
2110 raise ExtractorError('No video formats found!')
2111 else:
2112 self.report_warning('No video formats found!')
db95dc13 2113
73af5cc8
S
2114 def is_wellformed(f):
2115 url = f.get('url')
a5ac0c47 2116 if not url:
73af5cc8
S
2117 self.report_warning(
2118 '"url" field is missing or empty - skipping format, '
2119 'there is an error in extractor')
a5ac0c47
S
2120 return False
2121 if isinstance(url, bytes):
2122 sanitize_string_field(f, 'url')
2123 return True
73af5cc8
S
2124
2125 # Filter out malformed formats for better extraction robustness
2126 formats = list(filter(is_wellformed, formats))
2127
181c7053
S
2128 formats_dict = {}
2129
dd82ffea 2130 # We check that all the formats have the format and format_id fields
db95dc13 2131 for i, format in enumerate(formats):
c9969434
S
2132 sanitize_string_field(format, 'format_id')
2133 sanitize_numeric_fields(format)
dcf77cf1 2134 format['url'] = sanitize_url(format['url'])
e74e3b63 2135 if not format.get('format_id'):
8016c922 2136 format['format_id'] = compat_str(i)
e2effb08
S
2137 else:
2138 # Sanitize format_id from characters used in format selector expression
ec85ded8 2139 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2140 format_id = format['format_id']
2141 if format_id not in formats_dict:
2142 formats_dict[format_id] = []
2143 formats_dict[format_id].append(format)
2144
2145 # Make sure all formats have unique format_id
2146 for format_id, ambiguous_formats in formats_dict.items():
2147 if len(ambiguous_formats) > 1:
2148 for i, format in enumerate(ambiguous_formats):
2149 format['format_id'] = '%s-%d' % (format_id, i)
2150
2151 for i, format in enumerate(formats):
8c51aa65 2152 if format.get('format') is None:
6febd1c1 2153 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2154 id=format['format_id'],
2155 res=self.format_resolution(format),
6febd1c1 2156 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 2157 )
c1002e96 2158 # Automatically determine file extension if missing
5b1d8575 2159 if format.get('ext') is None:
cce929ea 2160 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
2161 # Automatically determine protocol if missing (useful for format
2162 # selection purposes)
6f0be937 2163 if format.get('protocol') is None:
b5559424 2164 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
2165 # Add HTTP headers, so that external programs can use them from the
2166 # json output
2167 full_format_info = info_dict.copy()
2168 full_format_info.update(format)
2169 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2170 # Remove private housekeeping stuff
2171 if '__x_forwarded_for_ip' in info_dict:
2172 del info_dict['__x_forwarded_for_ip']
dd82ffea 2173
4bcc7bd1 2174 # TODO Central sorting goes here
99e206d5 2175
b7da73eb 2176 if formats and formats[0] is not info_dict:
b3d9ef88
JMF
2177 # only set the 'formats' fields if the original info_dict list them
2178 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2179 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2180 # which can't be exported to json
b3d9ef88 2181 info_dict['formats'] = formats
4ec82a72 2182
2183 info_dict, _ = self.pre_process(info_dict)
2184
169dbde9 2185 list_only = self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')
2186 if list_only:
2187 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2188 if self.params.get('list_thumbnails'):
2189 self.list_thumbnails(info_dict)
2190 if self.params.get('listformats'):
2191 if not info_dict.get('formats'):
2192 raise ExtractorError('No video formats found', expected=True)
2193 self.list_formats(info_dict)
2194 if self.params.get('listsubtitles'):
2195 if 'automatic_captions' in info_dict:
2196 self.list_subtitles(
2197 info_dict['id'], automatic_captions, 'automatic captions')
2198 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
bfaae0a7 2199 return
2200
187986a8 2201 format_selector = self.format_selector
2202 if format_selector is None:
0017d9ad 2203 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2204 self.write_debug('Default format spec: %s' % req_format)
187986a8 2205 format_selector = self.build_format_selector(req_format)
317f7ab6
S
2206
2207 # While in format selection we may need to have an access to the original
2208 # format set in order to calculate some metrics or do some processing.
2209 # For now we need to be able to guess whether original formats provided
2210 # by extractor are incomplete or not (i.e. whether extractor provides only
2211 # video-only or audio-only formats) for proper formats selection for
2212 # extractors with such incomplete formats (see
067aa17e 2213 # https://github.com/ytdl-org/youtube-dl/pull/5556).
317f7ab6
S
2214 # Since formats may be filtered during format selection and may not match
2215 # the original formats the results may be incorrect. Thus original formats
2216 # or pre-calculated metrics should be passed to format selection routines
2217 # as well.
2218 # We will pass a context object containing all necessary additional data
2219 # instead of just formats.
2220 # This fixes incorrect format selection issue (see
067aa17e 2221 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2e221ca3 2222 incomplete_formats = (
317f7ab6 2223 # All formats are video-only or
3089bc74 2224 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
317f7ab6 2225 # all formats are audio-only
3089bc74 2226 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
317f7ab6
S
2227
2228 ctx = {
2229 'formats': formats,
2230 'incomplete_formats': incomplete_formats,
2231 }
2232
2233 formats_to_download = list(format_selector(ctx))
dd82ffea 2234 if not formats_to_download:
b7da73eb 2235 if not self.params.get('ignore_no_formats_error'):
2236 raise ExtractorError('Requested format is not available', expected=True)
2237 else:
2238 self.report_warning('Requested format is not available')
4513a41a
A
2239 # Process what we can, even without any available formats.
2240 self.process_info(dict(info_dict))
b7da73eb 2241 elif download:
2242 self.to_screen(
07cce701 2243 '[info] %s: Downloading %d format(s): %s' % (
2244 info_dict['id'], len(formats_to_download),
2245 ", ".join([f['format_id'] for f in formats_to_download])))
b7da73eb 2246 for fmt in formats_to_download:
dd82ffea 2247 new_info = dict(info_dict)
4ec82a72 2248 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2249 new_info['__original_infodict'] = info_dict
b7da73eb 2250 new_info.update(fmt)
dd82ffea
JMF
2251 self.process_info(new_info)
2252 # We update the info dict with the best quality format (backwards compatibility)
b7da73eb 2253 if formats_to_download:
2254 info_dict.update(formats_to_download[-1])
dd82ffea
JMF
2255 return info_dict
2256
98c70d6f 2257 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2258 """Select the requested subtitles and their format"""
98c70d6f
JMF
2259 available_subs = {}
2260 if normal_subtitles and self.params.get('writesubtitles'):
2261 available_subs.update(normal_subtitles)
2262 if automatic_captions and self.params.get('writeautomaticsub'):
2263 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2264 if lang not in available_subs:
2265 available_subs[lang] = cap_info
2266
4d171848
JMF
2267 if (not self.params.get('writesubtitles') and not
2268 self.params.get('writeautomaticsub') or not
2269 available_subs):
2270 return None
a504ced0 2271
c32b0aab 2272 all_sub_langs = available_subs.keys()
a504ced0 2273 if self.params.get('allsubtitles', False):
c32b0aab 2274 requested_langs = all_sub_langs
2275 elif self.params.get('subtitleslangs', False):
2276 requested_langs = set()
2277 for lang in self.params.get('subtitleslangs'):
2278 if lang == 'all':
2279 requested_langs.update(all_sub_langs)
2280 continue
2281 discard = lang[0] == '-'
2282 if discard:
2283 lang = lang[1:]
2284 current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
2285 if discard:
2286 for lang in current_langs:
2287 requested_langs.discard(lang)
2288 else:
2289 requested_langs.update(current_langs)
2290 elif 'en' in available_subs:
2291 requested_langs = ['en']
a504ced0 2292 else:
c32b0aab 2293 requested_langs = [list(all_sub_langs)[0]]
e632bce2 2294 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2295
2296 formats_query = self.params.get('subtitlesformat', 'best')
2297 formats_preference = formats_query.split('/') if formats_query else []
2298 subs = {}
2299 for lang in requested_langs:
2300 formats = available_subs.get(lang)
2301 if formats is None:
2302 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2303 continue
a504ced0
JMF
2304 for ext in formats_preference:
2305 if ext == 'best':
2306 f = formats[-1]
2307 break
2308 matches = list(filter(lambda f: f['ext'] == ext, formats))
2309 if matches:
2310 f = matches[-1]
2311 break
2312 else:
2313 f = formats[-1]
2314 self.report_warning(
2315 'No subtitle format found matching "%s" for language %s, '
2316 'using %s' % (formats_query, lang, f['ext']))
2317 subs[lang] = f
2318 return subs
2319
d06daf23 2320 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2321 def print_mandatory(field, actual_field=None):
2322 if actual_field is None:
2323 actual_field = field
d06daf23 2324 if (self.params.get('force%s' % field, False)
53c18592 2325 and (not incomplete or info_dict.get(actual_field) is not None)):
2326 self.to_stdout(info_dict[actual_field])
d06daf23
S
2327
2328 def print_optional(field):
2329 if (self.params.get('force%s' % field, False)
2330 and info_dict.get(field) is not None):
2331 self.to_stdout(info_dict[field])
2332
53c18592 2333 info_dict = info_dict.copy()
2334 if filename is not None:
2335 info_dict['filename'] = filename
2336 if info_dict.get('requested_formats') is not None:
2337 # For RTMP URLs, also include the playpath
2338 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2339 elif 'url' in info_dict:
2340 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2341
2342 for tmpl in self.params.get('forceprint', []):
2343 if re.match(r'\w+$', tmpl):
2344 tmpl = '%({})s'.format(tmpl)
2345 tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
2346 self.to_stdout(tmpl % info_copy)
2347
d06daf23
S
2348 print_mandatory('title')
2349 print_mandatory('id')
53c18592 2350 print_mandatory('url', 'urls')
d06daf23
S
2351 print_optional('thumbnail')
2352 print_optional('description')
53c18592 2353 print_optional('filename')
d06daf23
S
2354 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
2355 self.to_stdout(formatSeconds(info_dict['duration']))
2356 print_mandatory('format')
53c18592 2357
d06daf23 2358 if self.params.get('forcejson', False):
277d6ff5 2359 self.post_extract(info_dict)
75d43ca0 2360 self.to_stdout(json.dumps(info_dict, default=repr))
d06daf23 2361
e8e73840 2362 def dl(self, name, info, subtitle=False, test=False):
2363
2364 if test:
2365 verbose = self.params.get('verbose')
2366 params = {
2367 'test': True,
2368 'quiet': not verbose,
2369 'verbose': verbose,
2370 'noprogress': not verbose,
2371 'nopart': True,
2372 'skip_unavailable_fragments': False,
2373 'keep_fragments': False,
2374 'overwrites': True,
2375 '_no_ytdl_file': True,
2376 }
2377 else:
2378 params = self.params
2379 fd = get_suitable_downloader(info, params)(self, params)
2380 if not test:
2381 for ph in self._progress_hooks:
2382 fd.add_progress_hook(ph)
18e674b4 2383 urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
2384 self.write_debug('Invoking downloader on "%s"' % urls)
e8e73840 2385 new_info = dict(info)
2386 if new_info.get('http_headers') is None:
2387 new_info['http_headers'] = self._calc_headers(new_info)
2388 return fd.download(name, new_info, subtitle)
2389
8222d8de
JMF
2390 def process_info(self, info_dict):
2391 """Process a single resolved IE result."""
2392
2393 assert info_dict.get('_type', 'video') == 'video'
fd288278 2394
0202b52a 2395 info_dict.setdefault('__postprocessors', [])
2396
fd288278
PH
2397 max_downloads = self.params.get('max_downloads')
2398 if max_downloads is not None:
2399 if self._num_downloads >= int(max_downloads):
2400 raise MaxDownloadsReached()
8222d8de 2401
d06daf23 2402 # TODO: backward compatibility, to be removed
8222d8de 2403 info_dict['fulltitle'] = info_dict['title']
8222d8de 2404
4513a41a 2405 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2406 info_dict['format'] = info_dict['ext']
2407
c77495e3 2408 if self._match_entry(info_dict) is not None:
8222d8de
JMF
2409 return
2410
277d6ff5 2411 self.post_extract(info_dict)
fd288278 2412 self._num_downloads += 1
8222d8de 2413
dcf64d43 2414 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2415 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2416 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2417 files_to_move = {}
8222d8de
JMF
2418
2419 # Forced printings
4513a41a 2420 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2421
8222d8de 2422 if self.params.get('simulate', False):
2d30509f 2423 if self.params.get('force_write_download_archive', False):
2424 self.record_download_archive(info_dict)
2425
2426 # Do nothing else if in simulate mode
8222d8de
JMF
2427 return
2428
de6000d9 2429 if full_filename is None:
8222d8de
JMF
2430 return
2431
e92caff5 2432 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2433 return
e92caff5 2434 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2435 return
2436
2437 if self.params.get('writedescription', False):
de6000d9 2438 descfn = self.prepare_filename(info_dict, 'description')
e92caff5 2439 if not self._ensure_dir_exists(encodeFilename(descfn)):
0202b52a 2440 return
0c3d0f51 2441 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
6febd1c1 2442 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
2443 elif info_dict.get('description') is None:
2444 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
2445 else:
2446 try:
6febd1c1 2447 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
2448 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
2449 descfile.write(info_dict['description'])
7b6fefc9 2450 except (OSError, IOError):
6febd1c1 2451 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 2452 return
8222d8de 2453
1fb07d10 2454 if self.params.get('writeannotations', False):
de6000d9 2455 annofn = self.prepare_filename(info_dict, 'annotation')
e92caff5 2456 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2457 return
0c3d0f51 2458 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2459 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2460 elif not info_dict.get('annotations'):
2461 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2462 else:
2463 try:
6febd1c1 2464 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2465 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2466 annofile.write(info_dict['annotations'])
2467 except (KeyError, TypeError):
6febd1c1 2468 self.report_warning('There are no annotations to write.')
7b6fefc9 2469 except (OSError, IOError):
6febd1c1 2470 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2471 return
1fb07d10 2472
c4a91be7 2473 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 2474 self.params.get('writeautomaticsub')])
c4a91be7 2475
c84dd8a9 2476 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
2477 # subtitles download errors are already managed as troubles in relevant IE
2478 # that way it will silently go on when used with unsupporting IE
c84dd8a9 2479 subtitles = info_dict['requested_subtitles']
fa57af1e 2480 # ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
2481 for sub_lang, sub_info in subtitles.items():
2482 sub_format = sub_info['ext']
56d868db 2483 sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
2484 sub_filename_final = subtitles_filename(
2485 self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
0c3d0f51 2486 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
5ff1bc0c 2487 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
dcf64d43 2488 sub_info['filepath'] = sub_filename
0202b52a 2489 files_to_move[sub_filename] = sub_filename_final
a504ced0 2490 else:
0c9df79e 2491 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
5ff1bc0c
RA
2492 if sub_info.get('data') is not None:
2493 try:
2494 # Use newline='' to prevent conversion of newline characters
067aa17e 2495 # See https://github.com/ytdl-org/youtube-dl/issues/10268
5ff1bc0c
RA
2496 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
2497 subfile.write(sub_info['data'])
dcf64d43 2498 sub_info['filepath'] = sub_filename
0202b52a 2499 files_to_move[sub_filename] = sub_filename_final
5ff1bc0c
RA
2500 except (OSError, IOError):
2501 self.report_error('Cannot write subtitles file ' + sub_filename)
2502 return
7b6fefc9 2503 else:
5ff1bc0c 2504 try:
e8e73840 2505 self.dl(sub_filename, sub_info.copy(), subtitle=True)
dcf64d43 2506 sub_info['filepath'] = sub_filename
0202b52a 2507 files_to_move[sub_filename] = sub_filename_final
fe346461 2508 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
5ff1bc0c
RA
2509 self.report_warning('Unable to download subtitle for "%s": %s' %
2510 (sub_lang, error_to_compat_str(err)))
2511 continue
8222d8de 2512
8222d8de 2513 if self.params.get('writeinfojson', False):
de6000d9 2514 infofn = self.prepare_filename(info_dict, 'infojson')
e92caff5 2515 if not self._ensure_dir_exists(encodeFilename(infofn)):
0202b52a 2516 return
0c3d0f51 2517 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
66c935fb 2518 self.to_screen('[info] Video metadata is already present')
7b6fefc9 2519 else:
66c935fb 2520 self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
7b6fefc9 2521 try:
75d43ca0 2522 write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
7b6fefc9 2523 except (OSError, IOError):
66c935fb 2524 self.report_error('Cannot write video metadata to JSON file ' + infofn)
7b6fefc9 2525 return
de6000d9 2526 info_dict['__infojson_filename'] = infofn
8222d8de 2527
56d868db 2528 for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
2529 thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
2530 thumb_filename = replace_extension(
2531 self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
dcf64d43 2532 files_to_move[thumb_filename_temp] = thumb_filename
8222d8de 2533
732044af 2534 # Write internet shortcut files
2535 url_link = webloc_link = desktop_link = False
2536 if self.params.get('writelink', False):
2537 if sys.platform == "darwin": # macOS.
2538 webloc_link = True
2539 elif sys.platform.startswith("linux"):
2540 desktop_link = True
2541 else: # if sys.platform in ['win32', 'cygwin']:
2542 url_link = True
2543 if self.params.get('writeurllink', False):
2544 url_link = True
2545 if self.params.get('writewebloclink', False):
2546 webloc_link = True
2547 if self.params.get('writedesktoplink', False):
2548 desktop_link = True
2549
2550 if url_link or webloc_link or desktop_link:
2551 if 'webpage_url' not in info_dict:
2552 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2553 return
2554 ascii_url = iri_to_uri(info_dict['webpage_url'])
2555
2556 def _write_link_file(extension, template, newline, embed_filename):
0202b52a 2557 linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
10e3742e 2558 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
732044af 2559 self.to_screen('[info] Internet shortcut is already present')
2560 else:
2561 try:
2562 self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
2563 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
2564 template_vars = {'url': ascii_url}
2565 if embed_filename:
2566 template_vars['filename'] = linkfn[:-(len(extension) + 1)]
2567 linkfile.write(template % template_vars)
2568 except (OSError, IOError):
2569 self.report_error('Cannot write internet shortcut ' + linkfn)
2570 return False
2571 return True
2572
2573 if url_link:
2574 if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
2575 return
2576 if webloc_link:
2577 if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
2578 return
2579 if desktop_link:
2580 if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
2581 return
2582
56d868db 2583 try:
2584 info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2585 except PostProcessingError as err:
2586 self.report_error('Preprocessing: %s' % str(err))
2587 return
2588
732044af 2589 must_record_download_archive = False
56d868db 2590 if self.params.get('skip_download', False):
2591 info_dict['filepath'] = temp_filename
2592 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2593 info_dict['__files_to_move'] = files_to_move
2594 info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
2595 else:
2596 # Download
4340deca 2597 try:
0202b52a 2598
6b591b29 2599 def existing_file(*filepaths):
2600 ext = info_dict.get('ext')
2601 final_ext = self.params.get('final_ext', ext)
2602 existing_files = []
2603 for file in orderedSet(filepaths):
2604 if final_ext != ext:
2605 converted = replace_extension(file, final_ext, ext)
2606 if os.path.exists(encodeFilename(converted)):
2607 existing_files.append(converted)
2608 if os.path.exists(encodeFilename(file)):
2609 existing_files.append(file)
2610
2611 if not existing_files or self.params.get('overwrites', False):
2612 for file in orderedSet(existing_files):
2613 self.report_file_delete(file)
2614 os.remove(encodeFilename(file))
2615 return None
2616
2617 self.report_file_already_downloaded(existing_files[0])
2618 info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
2619 return existing_files[0]
0202b52a 2620
2621 success = True
4340deca 2622 if info_dict.get('requested_formats') is not None:
81cd954a
S
2623
2624 def compatible_formats(formats):
d03cfdce 2625 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2626 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2627 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2628 if len(video_formats) > 2 or len(audio_formats) > 2:
2629 return False
2630
81cd954a 2631 # Check extension
d03cfdce 2632 exts = set(format.get('ext') for format in formats)
2633 COMPATIBLE_EXTS = (
2634 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2635 set(('webm',)),
2636 )
2637 for ext_sets in COMPATIBLE_EXTS:
2638 if ext_sets.issuperset(exts):
2639 return True
81cd954a
S
2640 # TODO: Check acodec/vcodec
2641 return False
2642
2643 requested_formats = info_dict['requested_formats']
0202b52a 2644 old_ext = info_dict['ext']
3b297919 2645 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
2646 info_dict['ext'] = 'mkv'
2647 self.report_warning(
2648 'Requested formats are incompatible for merge and will be merged into mkv.')
0202b52a 2649
2650 def correct_ext(filename):
2651 filename_real_ext = os.path.splitext(filename)[1][1:]
2652 filename_wo_ext = (
2653 os.path.splitext(filename)[0]
2654 if filename_real_ext == old_ext
2655 else filename)
2656 return '%s.%s' % (filename_wo_ext, info_dict['ext'])
2657
38c6902b 2658 # Ensure filename always has a correct extension for successful merge
0202b52a 2659 full_filename = correct_ext(full_filename)
2660 temp_filename = correct_ext(temp_filename)
2661 dl_filename = existing_file(full_filename, temp_filename)
1ea24129 2662 info_dict['__real_download'] = False
18e674b4 2663
2664 _protocols = set(determine_protocol(f) for f in requested_formats)
2665 if len(_protocols) == 1:
2666 info_dict['protocol'] = _protocols.pop()
2667 directly_mergable = (
2668 'no-direct-merge' not in self.params.get('compat_opts', [])
2669 and info_dict.get('protocol') is not None # All requested formats have same protocol
2670 and not self.params.get('allow_unplayable_formats')
2671 and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
2672 if directly_mergable:
2673 info_dict['url'] = requested_formats[0]['url']
2674 # Treat it as a single download
2675 dl_filename = existing_file(full_filename, temp_filename)
2676 if dl_filename is None:
2677 success, real_download = self.dl(temp_filename, info_dict)
2678 info_dict['__real_download'] = real_download
2679 else:
2680 downloaded = []
2681 merger = FFmpegMergerPP(self)
2682 if self.params.get('allow_unplayable_formats'):
2683 self.report_warning(
2684 'You have requested merging of multiple formats '
2685 'while also allowing unplayable formats to be downloaded. '
2686 'The formats won\'t be merged to prevent data corruption.')
2687 elif not merger.available:
2688 self.report_warning(
2689 'You have requested merging of multiple formats but ffmpeg is not installed. '
2690 'The formats won\'t be merged.')
2691
2692 if dl_filename is None:
2693 for f in requested_formats:
2694 new_info = dict(info_dict)
2695 del new_info['requested_formats']
2696 new_info.update(f)
2697 fname = prepend_extension(
2698 self.prepare_filename(new_info, 'temp'),
2699 'f%s' % f['format_id'], new_info['ext'])
2700 if not self._ensure_dir_exists(fname):
2701 return
2702 downloaded.append(fname)
2703 partial_success, real_download = self.dl(fname, new_info)
2704 info_dict['__real_download'] = info_dict['__real_download'] or real_download
2705 success = success and partial_success
2706 if merger.available and not self.params.get('allow_unplayable_formats'):
2707 info_dict['__postprocessors'].append(merger)
2708 info_dict['__files_to_merge'] = downloaded
2709 # Even if there were no downloads, it is being merged only now
2710 info_dict['__real_download'] = True
2711 else:
2712 for file in downloaded:
2713 files_to_move[file] = None
4340deca
P
2714 else:
2715 # Just a single file
0202b52a 2716 dl_filename = existing_file(full_filename, temp_filename)
2717 if dl_filename is None:
e8e73840 2718 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 2719 info_dict['__real_download'] = real_download
2720
0202b52a 2721 dl_filename = dl_filename or temp_filename
c571435f 2722 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 2723
3158150c 2724 except network_exceptions as err:
7960b056 2725 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
2726 return
2727 except (OSError, IOError) as err:
2728 raise UnavailableVideoError(err)
2729 except (ContentTooShortError, ) as err:
2730 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
2731 return
8222d8de 2732
de6000d9 2733 if success and full_filename != '-':
f17f8651 2734
fd7cfb64 2735 def fixup():
2736 do_fixup = True
2737 fixup_policy = self.params.get('fixup')
2738 vid = info_dict['id']
2739
2740 if fixup_policy in ('ignore', 'never'):
2741 return
2742 elif fixup_policy == 'warn':
2743 do_fixup = False
f89b3e2d 2744 elif fixup_policy != 'force':
2745 assert fixup_policy in ('detect_or_warn', None)
2746 if not info_dict.get('__real_download'):
2747 do_fixup = False
fd7cfb64 2748
2749 def ffmpeg_fixup(cndn, msg, cls):
2750 if not cndn:
2751 return
2752 if not do_fixup:
2753 self.report_warning(f'{vid}: {msg}')
2754 return
2755 pp = cls(self)
2756 if pp.available:
2757 info_dict['__postprocessors'].append(pp)
2758 else:
2759 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
2760
2761 stretched_ratio = info_dict.get('stretched_ratio')
2762 ffmpeg_fixup(
2763 stretched_ratio not in (1, None),
2764 f'Non-uniform pixel ratio {stretched_ratio}',
2765 FFmpegFixupStretchedPP)
2766
2767 ffmpeg_fixup(
2768 (info_dict.get('requested_formats') is None
2769 and info_dict.get('container') == 'm4a_dash'
2770 and info_dict.get('ext') == 'm4a'),
2771 'writing DASH m4a. Only some players support this container',
2772 FFmpegFixupM4aPP)
2773
2774 downloader = (get_suitable_downloader(info_dict, self.params).__name__
2775 if 'protocol' in info_dict else None)
2776 ffmpeg_fixup(downloader == 'HlsFD', 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
e36d50c5 2777 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
2778 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 2779
2780 fixup()
8222d8de 2781 try:
23c1a667 2782 info_dict = self.post_process(dl_filename, info_dict, files_to_move)
af819c21 2783 except PostProcessingError as err:
2784 self.report_error('Postprocessing: %s' % str(err))
8222d8de 2785 return
ab8e5e51
AM
2786 try:
2787 for ph in self._post_hooks:
23c1a667 2788 ph(info_dict['filepath'])
ab8e5e51
AM
2789 except Exception as err:
2790 self.report_error('post hooks: %s' % str(err))
2791 return
2d30509f 2792 must_record_download_archive = True
2793
2794 if must_record_download_archive or self.params.get('force_write_download_archive', False):
2795 self.record_download_archive(info_dict)
c3e6ffba 2796 max_downloads = self.params.get('max_downloads')
2797 if max_downloads is not None and self._num_downloads >= int(max_downloads):
2798 raise MaxDownloadsReached()
8222d8de
JMF
2799
2800 def download(self, url_list):
2801 """Download a given list of URLs."""
de6000d9 2802 outtmpl = self.outtmpl_dict['default']
3089bc74
S
2803 if (len(url_list) > 1
2804 and outtmpl != '-'
2805 and '%' not in outtmpl
2806 and self.params.get('max_downloads') != 1):
acd69589 2807 raise SameFileError(outtmpl)
8222d8de
JMF
2808
2809 for url in url_list:
2810 try:
5f6a1245 2811 # It also downloads the videos
61aa5ba3
S
2812 res = self.extract_info(
2813 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 2814 except UnavailableVideoError:
6febd1c1 2815 self.report_error('unable to download video')
8222d8de 2816 except MaxDownloadsReached:
8b0d7497 2817 self.to_screen('[info] Maximum number of downloaded files reached')
2818 raise
2819 except ExistingVideoReached:
d83cb531 2820 self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
8b0d7497 2821 raise
2822 except RejectedVideoReached:
d83cb531 2823 self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
8222d8de 2824 raise
63e0be34
PH
2825 else:
2826 if self.params.get('dump_single_json', False):
277d6ff5 2827 self.post_extract(res)
75d43ca0 2828 self.to_stdout(json.dumps(res, default=repr))
8222d8de
JMF
2829
2830 return self._download_retcode
2831
1dcc4c0c 2832 def download_with_info_file(self, info_filename):
31bd3925
JMF
2833 with contextlib.closing(fileinput.FileInput(
2834 [info_filename], mode='r',
2835 openhook=fileinput.hook_encoded('utf-8'))) as f:
2836 # FileInput doesn't have a read method, we can't call json.load
498f5606 2837 info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898
JMF
2838 try:
2839 self.process_ie_result(info, download=True)
d3f62c19 2840 except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
d4943898
JMF
2841 webpage_url = info.get('webpage_url')
2842 if webpage_url is not None:
6febd1c1 2843 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
2844 return self.download([webpage_url])
2845 else:
2846 raise
2847 return self._download_retcode
1dcc4c0c 2848
cb202fd2 2849 @staticmethod
75d43ca0 2850 def filter_requested_info(info_dict, actually_filter=True):
ae8f99e6 2851 remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
2852 keep_keys = ['_type'], # Always keep this to facilitate load-info-json
2853 if actually_filter:
bd99f6e6 2854 remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
ae8f99e6 2855 empty_values = (None, {}, [], set(), tuple())
2856 reject = lambda k, v: k not in keep_keys and (
2857 k.startswith('_') or k in remove_keys or v in empty_values)
2858 else:
394dcd44 2859 info_dict['epoch'] = int(time.time())
ae8f99e6 2860 reject = lambda k, v: k in remove_keys
5226731e 2861 filter_fn = lambda obj: (
b0249bca 2862 list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
a515a78d 2863 else obj if not isinstance(obj, dict)
ae8f99e6 2864 else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
5226731e 2865 return filter_fn(info_dict)
cb202fd2 2866
dcf64d43 2867 def run_pp(self, pp, infodict):
5bfa4862 2868 files_to_delete = []
dcf64d43 2869 if '__files_to_move' not in infodict:
2870 infodict['__files_to_move'] = {}
af819c21 2871 files_to_delete, infodict = pp.run(infodict)
5bfa4862 2872 if not files_to_delete:
dcf64d43 2873 return infodict
5bfa4862 2874
2875 if self.params.get('keepvideo', False):
2876 for f in files_to_delete:
dcf64d43 2877 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 2878 else:
2879 for old_filename in set(files_to_delete):
2880 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2881 try:
2882 os.remove(encodeFilename(old_filename))
2883 except (IOError, OSError):
2884 self.report_warning('Unable to remove downloaded original file')
dcf64d43 2885 if old_filename in infodict['__files_to_move']:
2886 del infodict['__files_to_move'][old_filename]
2887 return infodict
5bfa4862 2888
277d6ff5 2889 @staticmethod
2890 def post_extract(info_dict):
2891 def actual_post_extract(info_dict):
2892 if info_dict.get('_type') in ('playlist', 'multi_video'):
2893 for video_dict in info_dict.get('entries', {}):
b050d210 2894 actual_post_extract(video_dict or {})
277d6ff5 2895 return
2896
07cce701 2897 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
4ec82a72 2898 extra = post_extractor().items()
2899 info_dict.update(extra)
07cce701 2900 info_dict.pop('__post_extractor', None)
277d6ff5 2901
4ec82a72 2902 original_infodict = info_dict.get('__original_infodict') or {}
2903 original_infodict.update(extra)
2904 original_infodict.pop('__post_extractor', None)
2905
b050d210 2906 actual_post_extract(info_dict or {})
277d6ff5 2907
56d868db 2908 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 2909 info = dict(ie_info)
56d868db 2910 info['__files_to_move'] = files_to_move or {}
2911 for pp in self._pps[key]:
dcf64d43 2912 info = self.run_pp(pp, info)
56d868db 2913 return info, info.pop('__files_to_move', None)
5bfa4862 2914
dcf64d43 2915 def post_process(self, filename, ie_info, files_to_move=None):
8222d8de
JMF
2916 """Run all the postprocessors on the given file."""
2917 info = dict(ie_info)
2918 info['filepath'] = filename
dcf64d43 2919 info['__files_to_move'] = files_to_move or {}
0202b52a 2920
56d868db 2921 for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
dcf64d43 2922 info = self.run_pp(pp, info)
2923 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
2924 del info['__files_to_move']
56d868db 2925 for pp in self._pps['after_move']:
dcf64d43 2926 info = self.run_pp(pp, info)
23c1a667 2927 return info
c1c9a79c 2928
5db07df6 2929 def _make_archive_id(self, info_dict):
e9fef7ee
S
2930 video_id = info_dict.get('id')
2931 if not video_id:
2932 return
5db07df6
PH
2933 # Future-proof against any change in case
2934 # and backwards compatibility with prior versions
e9fef7ee 2935 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 2936 if extractor is None:
1211bb6d
S
2937 url = str_or_none(info_dict.get('url'))
2938 if not url:
2939 return
e9fef7ee
S
2940 # Try to find matching extractor for the URL and take its ie_key
2941 for ie in self._ies:
1211bb6d 2942 if ie.suitable(url):
e9fef7ee
S
2943 extractor = ie.ie_key()
2944 break
2945 else:
2946 return
d0757229 2947 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
2948
2949 def in_download_archive(self, info_dict):
2950 fn = self.params.get('download_archive')
2951 if fn is None:
2952 return False
2953
2954 vid_id = self._make_archive_id(info_dict)
e9fef7ee 2955 if not vid_id:
7012b23c 2956 return False # Incomplete video information
5db07df6 2957
a45e8619 2958 return vid_id in self.archive
c1c9a79c
PH
2959
2960 def record_download_archive(self, info_dict):
2961 fn = self.params.get('download_archive')
2962 if fn is None:
2963 return
5db07df6
PH
2964 vid_id = self._make_archive_id(info_dict)
2965 assert vid_id
c1c9a79c 2966 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 2967 archive_file.write(vid_id + '\n')
a45e8619 2968 self.archive.add(vid_id)
dd82ffea 2969
8c51aa65 2970 @staticmethod
8abeeb94 2971 def format_resolution(format, default='unknown'):
fb04e403 2972 if format.get('vcodec') == 'none':
8326b00a 2973 if format.get('acodec') == 'none':
2974 return 'images'
fb04e403 2975 return 'audio only'
f49d89ee
PH
2976 if format.get('resolution') is not None:
2977 return format['resolution']
35615307
DA
2978 if format.get('width') and format.get('height'):
2979 res = '%dx%d' % (format['width'], format['height'])
2980 elif format.get('height'):
2981 res = '%sp' % format['height']
2982 elif format.get('width'):
388ae76b 2983 res = '%dx?' % format['width']
8c51aa65 2984 else:
8abeeb94 2985 res = default
8c51aa65
JMF
2986 return res
2987
c57f7757
PH
2988 def _format_note(self, fdict):
2989 res = ''
2990 if fdict.get('ext') in ['f4f', 'f4m']:
2991 res += '(unsupported) '
32f90364
PH
2992 if fdict.get('language'):
2993 if res:
2994 res += ' '
9016d76f 2995 res += '[%s] ' % fdict['language']
c57f7757
PH
2996 if fdict.get('format_note') is not None:
2997 res += fdict['format_note'] + ' '
2998 if fdict.get('tbr') is not None:
2999 res += '%4dk ' % fdict['tbr']
3000 if fdict.get('container') is not None:
3001 if res:
3002 res += ', '
3003 res += '%s container' % fdict['container']
3089bc74
S
3004 if (fdict.get('vcodec') is not None
3005 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3006 if res:
3007 res += ', '
3008 res += fdict['vcodec']
91c7271a 3009 if fdict.get('vbr') is not None:
c57f7757
PH
3010 res += '@'
3011 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3012 res += 'video@'
3013 if fdict.get('vbr') is not None:
3014 res += '%4dk' % fdict['vbr']
fbb21cf5 3015 if fdict.get('fps') is not None:
5d583bdf
S
3016 if res:
3017 res += ', '
3018 res += '%sfps' % fdict['fps']
c57f7757
PH
3019 if fdict.get('acodec') is not None:
3020 if res:
3021 res += ', '
3022 if fdict['acodec'] == 'none':
3023 res += 'video only'
3024 else:
3025 res += '%-5s' % fdict['acodec']
3026 elif fdict.get('abr') is not None:
3027 if res:
3028 res += ', '
3029 res += 'audio'
3030 if fdict.get('abr') is not None:
3031 res += '@%3dk' % fdict['abr']
3032 if fdict.get('asr') is not None:
3033 res += ' (%5dHz)' % fdict['asr']
3034 if fdict.get('filesize') is not None:
3035 if res:
3036 res += ', '
3037 res += format_bytes(fdict['filesize'])
9732d77e
PH
3038 elif fdict.get('filesize_approx') is not None:
3039 if res:
3040 res += ', '
3041 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3042 return res
91c7271a 3043
c57f7757 3044 def list_formats(self, info_dict):
94badb25 3045 formats = info_dict.get('formats', [info_dict])
53ed7066 3046 new_format = (
3047 'list-formats' not in self.params.get('compat_opts', [])
169dbde9 3048 and self.params.get('listformats_table', True) is not False)
76d321f6 3049 if new_format:
3050 table = [
3051 [
3052 format_field(f, 'format_id'),
3053 format_field(f, 'ext'),
3054 self.format_resolution(f),
3055 format_field(f, 'fps', '%d'),
3056 '|',
3057 format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
3058 format_field(f, 'tbr', '%4dk'),
52a8a1e1 3059 shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
76d321f6 3060 '|',
3061 format_field(f, 'vcodec', default='unknown').replace('none', ''),
3062 format_field(f, 'vbr', '%4dk'),
3063 format_field(f, 'acodec', default='unknown').replace('none', ''),
3064 format_field(f, 'abr', '%3dk'),
3065 format_field(f, 'asr', '%5dHz'),
3f698246 3066 ', '.join(filter(None, (
3067 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
3068 format_field(f, 'language', '[%s]'),
3069 format_field(f, 'format_note'),
3070 format_field(f, 'container', ignore=(None, f.get('ext'))),
3071 format_field(f, 'asr', '%5dHz')))),
3072 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
76d321f6 3073 header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
3f698246 3074 '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO']
76d321f6 3075 else:
3076 table = [
3077 [
3078 format_field(f, 'format_id'),
3079 format_field(f, 'ext'),
3080 self.format_resolution(f),
3081 self._format_note(f)]
3082 for f in formats
3083 if f.get('preference') is None or f['preference'] >= -1000]
3084 header_line = ['format code', 'extension', 'resolution', 'note']
57dd9a8f 3085
cfb56d1a 3086 self.to_screen(
169dbde9 3087 '[info] Available formats for %s:' % info_dict['id'])
3088 self.to_stdout(render_table(
bc97cdae 3089 header_line, table, delim=new_format, extraGap=(0 if new_format else 1), hideEmpty=new_format))
cfb56d1a
PH
3090
3091 def list_thumbnails(self, info_dict):
b0249bca 3092 thumbnails = list(info_dict.get('thumbnails'))
cfb56d1a 3093 if not thumbnails:
b7b72db9 3094 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
3095 return
cfb56d1a
PH
3096
3097 self.to_screen(
3098 '[info] Thumbnails for %s:' % info_dict['id'])
169dbde9 3099 self.to_stdout(render_table(
cfb56d1a
PH
3100 ['ID', 'width', 'height', 'URL'],
3101 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 3102
360e1ca5 3103 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 3104 if not subtitles:
360e1ca5 3105 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 3106 return
a504ced0 3107 self.to_screen(
edab9dbf 3108 'Available %s for %s:' % (name, video_id))
2412044c 3109
3110 def _row(lang, formats):
49c258e1 3111 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3112 if len(set(names)) == 1:
7aee40c1 3113 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3114 return [lang, ', '.join(names), ', '.join(exts)]
3115
169dbde9 3116 self.to_stdout(render_table(
2412044c 3117 ['Language', 'Name', 'Formats'],
3118 [_row(lang, formats) for lang, formats in subtitles.items()],
3119 hideEmpty=True))
a504ced0 3120
dca08720
PH
3121 def urlopen(self, req):
3122 """ Start an HTTP download """
82d8a8b6 3123 if isinstance(req, compat_basestring):
67dda517 3124 req = sanitized_Request(req)
19a41fc6 3125 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3126
3127 def print_debug_header(self):
3128 if not self.params.get('verbose'):
3129 return
62fec3b2 3130
4192b51c 3131 if type('') is not compat_str:
067aa17e 3132 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
4192b51c
PH
3133 self.report_warning(
3134 'Your Python is broken! Update to a newer and supported version')
3135
c6afed48
PH
3136 stdout_encoding = getattr(
3137 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 3138 encoding_str = (
734f90bb
PH
3139 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
3140 locale.getpreferredencoding(),
3141 sys.getfilesystemencoding(),
c6afed48 3142 stdout_encoding,
b0472057 3143 self.get_encoding()))
4192b51c 3144 write_string(encoding_str, encoding=None)
734f90bb 3145
e5813e53 3146 source = (
3147 '(exe)' if hasattr(sys, 'frozen')
3148 else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
3149 else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
3150 else '')
3151 self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
e0986e31 3152 if _LAZY_LOADER:
f74980cb 3153 self._write_string('[debug] Lazy loading extractors enabled\n')
3154 if _PLUGIN_CLASSES:
3155 self._write_string(
3156 '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
53ed7066 3157 if self.params.get('compat_opts'):
3158 self._write_string(
3159 '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
dca08720
PH
3160 try:
3161 sp = subprocess.Popen(
3162 ['git', 'rev-parse', '--short', 'HEAD'],
3163 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3164 cwd=os.path.dirname(os.path.abspath(__file__)))
f5b1bca9 3165 out, err = process_communicate_or_kill(sp)
dca08720
PH
3166 out = out.decode().strip()
3167 if re.match('[0-9a-f]+', out):
f74980cb 3168 self._write_string('[debug] Git HEAD: %s\n' % out)
70a1165b 3169 except Exception:
dca08720
PH
3170 try:
3171 sys.exc_clear()
70a1165b 3172 except Exception:
dca08720 3173 pass
b300cda4
S
3174
3175 def python_implementation():
3176 impl_name = platform.python_implementation()
3177 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3178 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3179 return impl_name
3180
e5813e53 3181 self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
3182 platform.python_version(),
3183 python_implementation(),
3184 platform.architecture()[0],
b300cda4 3185 platform_name()))
d28b5171 3186
73fac4e9 3187 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 3188 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3189 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171
PH
3190 exe_str = ', '.join(
3191 '%s %s' % (exe, v)
3192 for exe, v in sorted(exe_versions.items())
3193 if v
3194 )
3195 if not exe_str:
3196 exe_str = 'none'
3197 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
3198
3199 proxy_map = {}
3200 for handler in self._opener.handlers:
3201 if hasattr(handler, 'proxies'):
3202 proxy_map.update(handler.proxies)
734f90bb 3203 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 3204
58b1f00d
PH
3205 if self.params.get('call_home', False):
3206 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3207 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
f5546c0b 3208 return
58b1f00d
PH
3209 latest_version = self.urlopen(
3210 'https://yt-dl.org/latest/version').read().decode('utf-8')
3211 if version_tuple(latest_version) > version_tuple(__version__):
3212 self.report_warning(
3213 'You are using an outdated version (newest version: %s)! '
3214 'See https://yt-dl.org/update if you need help updating.' %
3215 latest_version)
3216
e344693b 3217 def _setup_opener(self):
6ad14cab 3218 timeout_val = self.params.get('socket_timeout')
19a41fc6 3219 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 3220
982ee69a 3221 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3222 opts_cookiefile = self.params.get('cookiefile')
3223 opts_proxy = self.params.get('proxy')
3224
982ee69a 3225 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3226
6a3f4c3f 3227 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3228 if opts_proxy is not None:
3229 if opts_proxy == '':
3230 proxies = {}
3231 else:
3232 proxies = {'http': opts_proxy, 'https': opts_proxy}
3233 else:
3234 proxies = compat_urllib_request.getproxies()
067aa17e 3235 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3236 if 'http' in proxies and 'https' not in proxies:
3237 proxies['https'] = proxies['http']
91410c9b 3238 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3239
3240 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3241 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3242 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3243 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3244 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3245
3246 # When passing our own FileHandler instance, build_opener won't add the
3247 # default FileHandler and allows us to disable the file protocol, which
3248 # can be used for malicious purposes (see
067aa17e 3249 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3250 file_handler = compat_urllib_request.FileHandler()
3251
3252 def file_open(*args, **kwargs):
7a5c1cfe 3253 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3254 file_handler.file_open = file_open
3255
3256 opener = compat_urllib_request.build_opener(
fca6dba8 3257 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3258
dca08720
PH
3259 # Delete the default user-agent header, which would otherwise apply in
3260 # cases where our custom HTTP handler doesn't come into play
067aa17e 3261 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3262 opener.addheaders = []
3263 self._opener = opener
62fec3b2
PH
3264
3265 def encode(self, s):
3266 if isinstance(s, bytes):
3267 return s # Already encoded
3268
3269 try:
3270 return s.encode(self.get_encoding())
3271 except UnicodeEncodeError as err:
3272 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3273 raise
3274
3275 def get_encoding(self):
3276 encoding = self.params.get('encoding')
3277 if encoding is None:
3278 encoding = preferredencoding()
3279 return encoding
ec82d85a 3280
de6000d9 3281 def _write_thumbnails(self, info_dict, filename): # return the extensions
6c4fd172 3282 write_all = self.params.get('write_all_thumbnails', False)
3283 thumbnails = []
3284 if write_all or self.params.get('writethumbnail', False):
0202b52a 3285 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3286 multiple = write_all and len(thumbnails) > 1
ec82d85a 3287
0202b52a 3288 ret = []
981052c9 3289 for t in thumbnails[::-1]:
ec82d85a 3290 thumb_ext = determine_ext(t['url'], 'jpg')
6c4fd172 3291 suffix = '%s.' % t['id'] if multiple else ''
3292 thumb_display_id = '%s ' % t['id'] if multiple else ''
885cc0b7 3293 thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
ec82d85a 3294
0c3d0f51 3295 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
de6000d9 3296 ret.append(suffix + thumb_ext)
8ba87148 3297 t['filepath'] = thumb_filename
ec82d85a
PH
3298 self.to_screen('[%s] %s: Thumbnail %sis already present' %
3299 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3300 else:
5ef7d9bd 3301 self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
ec82d85a
PH
3302 (info_dict['extractor'], info_dict['id'], thumb_display_id))
3303 try:
3304 uf = self.urlopen(t['url'])
d3d89c32 3305 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3306 shutil.copyfileobj(uf, thumbf)
de6000d9 3307 ret.append(suffix + thumb_ext)
ec82d85a
PH
3308 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
3309 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
885cc0b7 3310 t['filepath'] = thumb_filename
3158150c 3311 except network_exceptions as err:
ec82d85a 3312 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 3313 (t['url'], error_to_compat_str(err)))
6c4fd172 3314 if ret and not write_all:
3315 break
0202b52a 3316 return ret