]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[youtube:tab] Fix duration extraction for shorts (#3171)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
8222d8de 3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
b5ae35ee 11import functools
8222d8de 12import io
b82f815f 13import itertools
8694c600 14import json
62fec3b2 15import locale
083c9df9 16import operator
8222d8de 17import os
dca08720 18import platform
8222d8de
JMF
19import re
20import shutil
dca08720 21import subprocess
8222d8de 22import sys
21cd8fae 23import tempfile
8222d8de 24import time
67134eab 25import tokenize
8222d8de 26import traceback
75822ca7 27import random
524e2e4f 28import unicodedata
8222d8de 29
ec11a9f4 30from enum import Enum
961ea474
S
31from string import ascii_letters
32
8c25f81b 33from .compat import (
82d8a8b6 34 compat_basestring,
4390d5ec 35 compat_brotli,
003c69a8 36 compat_get_terminal_size,
4f026faf 37 compat_kwargs,
d0d9ade4 38 compat_numeric_types,
e9c0cdd3 39 compat_os_name,
edf65256 40 compat_pycrypto_AES,
7d1eb38a 41 compat_shlex_quote,
ce02ed60 42 compat_str,
67134eab 43 compat_tokenize_tokenize,
ce02ed60
PH
44 compat_urllib_error,
45 compat_urllib_request,
8b172c2e 46 compat_urllib_request_DataHandler,
819e0531 47 windows_enable_vt_mode,
8c25f81b 48)
982ee69a 49from .cookies import load_cookies
8c25f81b 50from .utils import (
eedb7ba5
S
51 age_restricted,
52 args_to_str,
ce02ed60
PH
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
acd69589 56 DEFAULT_OUTTMPL,
ce02ed60 57 determine_ext,
b5559424 58 determine_protocol,
48f79687 59 DownloadCancelled,
ce02ed60 60 DownloadError,
c0384f22 61 encode_compat_str,
ce02ed60 62 encodeFilename,
498f5606 63 EntryNotInPlaylist,
a06916d9 64 error_to_compat_str,
8b0d7497 65 ExistingVideoReached,
590bc6f6 66 expand_path,
ce02ed60 67 ExtractorError,
e29663c6 68 float_or_none,
02dbf93f 69 format_bytes,
76d321f6 70 format_field,
e0fd9573 71 format_decimal_suffix,
525ef922 72 formatSeconds,
773f291d 73 GeoRestrictedError,
0bb322b9 74 get_domain,
d5820461 75 has_certifi,
b0249bca 76 HEADRequest,
d37707bd 77 InAdvancePagedList,
c9969434 78 int_or_none,
732044af 79 iri_to_uri,
773f291d 80 ISO3166Utils,
34921b43 81 join_nonempty,
56a8fb4f 82 LazyList,
08438d2c 83 LINK_TEMPLATES,
ce02ed60 84 locked_file,
0202b52a 85 make_dir,
dca08720 86 make_HTTPS_handler,
ce02ed60 87 MaxDownloadsReached,
8b7539d2 88 merge_headers,
3158150c 89 network_exceptions,
5c3895ff 90 NO_DEFAULT,
ec11a9f4 91 number_of_digits,
cd6fc19e 92 orderedSet,
a06916d9 93 OUTTMPL_TYPES,
b7ab0590 94 PagedList,
083c9df9 95 parse_filesize,
91410c9b 96 PerRequestProxyHandler,
dca08720 97 platform_name,
d3c93ec2 98 Popen,
1e43a6f7 99 POSTPROCESS_WHEN,
eedb7ba5 100 PostProcessingError,
ce02ed60 101 preferredencoding,
eedb7ba5 102 prepend_extension,
f2ebc5c7 103 ReExtractInfo,
51fb4995 104 register_socks_protocols,
a06916d9 105 RejectedVideoReached,
3efb96a6 106 remove_terminal_sequences,
cfb56d1a 107 render_table,
eedb7ba5 108 replace_extension,
ce02ed60
PH
109 SameFileError,
110 sanitize_filename,
1bb5c511 111 sanitize_path,
dcf77cf1 112 sanitize_url,
67dda517 113 sanitized_Request,
e5660ee6 114 std_headers,
819e0531 115 STR_FORMAT_RE_TMPL,
116 STR_FORMAT_TYPES,
1211bb6d 117 str_or_none,
e29663c6 118 strftime_or_none,
ce02ed60 119 subtitles_filename,
819e0531 120 supports_terminal_sequences,
f2ebc5c7 121 timetuple_from_msec,
732044af 122 to_high_limit_path,
324ad820 123 traverse_obj,
6033d980 124 try_get,
ce02ed60 125 UnavailableVideoError,
29eb5174 126 url_basename,
7d1eb38a 127 variadic,
58b1f00d 128 version_tuple,
ce02ed60
PH
129 write_json_file,
130 write_string,
6a3f4c3f 131 YoutubeDLCookieProcessor,
dca08720 132 YoutubeDLHandler,
fca6dba8 133 YoutubeDLRedirectHandler,
ce02ed60 134)
a0e07d31 135from .cache import Cache
ec11a9f4 136from .minicurses import format_text
52a8a1e1 137from .extractor import (
138 gen_extractor_classes,
139 get_info_extractor,
140 _LAZY_LOADER,
3ae5e797 141 _PLUGIN_CLASSES as plugin_extractors
52a8a1e1 142)
4c54b89e 143from .extractor.openload import PhantomJSwrapper
52a8a1e1 144from .downloader import (
dbf5416a 145 FFmpegFD,
52a8a1e1 146 get_suitable_downloader,
147 shorten_protocol_name
148)
4c83c967 149from .downloader.rtmp import rtmpdump_version
4f026faf 150from .postprocessor import (
e36d50c5 151 get_postprocessor,
4e3b637d 152 EmbedThumbnailPP,
adbc4ec4 153 FFmpegFixupDuplicateMoovPP,
e36d50c5 154 FFmpegFixupDurationPP,
f17f8651 155 FFmpegFixupM3u8PP,
62cd676c 156 FFmpegFixupM4aPP,
6271f1ca 157 FFmpegFixupStretchedPP,
e36d50c5 158 FFmpegFixupTimestampPP,
4f026faf
PH
159 FFmpegMergerPP,
160 FFmpegPostProcessor,
0202b52a 161 MoveFilesAfterDownloadPP,
3ae5e797 162 _PLUGIN_CLASSES as plugin_postprocessors
4f026faf 163)
4c88ff87 164from .update import detect_variant
36eaf303 165from .version import __version__, RELEASE_GIT_HEAD
8222d8de 166
e9c0cdd3
YCH
167if compat_os_name == 'nt':
168 import ctypes
169
2459b6e1 170
8222d8de
JMF
171class YoutubeDL(object):
172 """YoutubeDL class.
173
174 YoutubeDL objects are the ones responsible of downloading the
175 actual video file and writing it to disk if the user has requested
176 it, among some other tasks. In most cases there should be one per
177 program. As, given a video URL, the downloader doesn't know how to
178 extract all the needed information, task that InfoExtractors do, it
179 has to pass the URL to one of them.
180
181 For this, YoutubeDL objects have a method that allows
182 InfoExtractors to be registered in a given order. When it is passed
183 a URL, the YoutubeDL object handles it to the first InfoExtractor it
184 finds that reports being able to handle it. The InfoExtractor extracts
185 all the information about the video or videos the URL refers to, and
186 YoutubeDL process the extracted information, possibly using a File
187 Downloader to download the video.
188
189 YoutubeDL objects accept a lot of parameters. In order not to saturate
190 the object constructor with arguments, it receives a dictionary of
191 options instead. These options are available through the params
192 attribute for the InfoExtractors to use. The YoutubeDL also
193 registers itself as the downloader in charge for the InfoExtractors
194 that are added to it, so this is a "mutual registration".
195
196 Available options:
197
198 username: Username for authentication purposes.
199 password: Password for authentication purposes.
180940e0 200 videopassword: Password for accessing a video.
1da50aa3
S
201 ap_mso: Adobe Pass multiple-system operator identifier.
202 ap_username: Multiple-system operator account username.
203 ap_password: Multiple-system operator account password.
8222d8de
JMF
204 usenetrc: Use netrc for authentication instead.
205 verbose: Print additional info to stdout.
206 quiet: Do not print messages to stdout.
ad8915b7 207 no_warnings: Do not print out anything for warnings.
bb66c247 208 forceprint: A dict with keys WHEN mapped to a list of templates to
209 print to stdout. The allowed keys are video or any of the
210 items in utils.POSTPROCESS_WHEN.
ca30f449 211 For compatibility, a single list is also accepted
bb66c247 212 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
213 a list of tuples with (template, filename)
53c18592 214 forceurl: Force printing final URL. (Deprecated)
215 forcetitle: Force printing title. (Deprecated)
216 forceid: Force printing ID. (Deprecated)
217 forcethumbnail: Force printing thumbnail URL. (Deprecated)
218 forcedescription: Force printing description. (Deprecated)
219 forcefilename: Force printing final filename. (Deprecated)
220 forceduration: Force printing duration. (Deprecated)
8694c600 221 forcejson: Force printing info_dict as JSON.
63e0be34
PH
222 dump_single_json: Force printing the info_dict of the whole playlist
223 (or video) as a single JSON line.
c25228e5 224 force_write_download_archive: Force writing download archive regardless
225 of 'skip_download' or 'simulate'.
b7b04c78 226 simulate: Do not download the video files. If unset (or None),
227 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 228 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 229 You can also pass a function. The function takes 'ctx' as
230 argument and returns the formats to download.
231 See "build_format_selector" for an implementation
63ad4d43 232 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 233 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
234 extracting metadata even if the video is not actually
235 available for download (experimental)
0930b11f 236 format_sort: A list of fields by which to sort the video formats.
237 See "Sorting Formats" for more details.
c25228e5 238 format_sort_force: Force the given format_sort. see "Sorting Formats"
239 for more details.
08d30158 240 prefer_free_formats: Whether to prefer video formats with free containers
241 over non-free ones of same quality.
c25228e5 242 allow_multiple_video_streams: Allow multiple video streams to be merged
243 into a single file
244 allow_multiple_audio_streams: Allow multiple audio streams to be merged
245 into a single file
0ba692ac 246 check_formats Whether to test if the formats are downloadable.
9f1a1c36 247 Can be True (check all), False (check none),
248 'selected' (check selected formats),
0ba692ac 249 or None (check only if requested by extractor)
4524baf0 250 paths: Dictionary of output paths. The allowed keys are 'home'
251 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 252 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 253 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 254 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
255 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
256 restrictfilenames: Do not allow "&" and spaces in file names
257 trim_file_name: Limit length of filename (extension excluded)
4524baf0 258 windowsfilenames: Force the filenames to be windows compatible
b1940459 259 ignoreerrors: Do not stop on download/postprocessing errors.
260 Can be 'only_download' to ignore only download errors.
261 Default is 'only_download' for CLI, but False for API
26e2805c 262 skip_playlist_after_errors: Number of allowed failures until the rest of
263 the playlist is skipped
d22dec74 264 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 265 overwrites: Overwrite all video and metadata files if True,
266 overwrite only non-video files if None
267 and don't overwrite any file if False
34488702 268 For compatibility with youtube-dl,
269 "nooverwrites" may also be used instead
8222d8de
JMF
270 playliststart: Playlist item to start at.
271 playlistend: Playlist item to end at.
c14e88f0 272 playlist_items: Specific indices of playlist to download.
ff815fe6 273 playlistreverse: Download playlist items in reverse order.
75822ca7 274 playlistrandom: Download playlist items in random order.
8222d8de
JMF
275 matchtitle: Download only matching titles.
276 rejecttitle: Reject downloads for matching titles.
8bf9319e 277 logger: Log messages to a logging.Logger instance.
8222d8de 278 logtostderr: Log messages to stderr instead of stdout.
819e0531 279 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
280 writedescription: Write the video description to a .description file
281 writeinfojson: Write the video description to a .info.json file
75d43ca0 282 clean_infojson: Remove private fields from the infojson
34488702 283 getcomments: Extract video comments. This will not be written to disk
06167fbb 284 unless writeinfojson is also given
1fb07d10 285 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 286 writethumbnail: Write the thumbnail image to a file
c25228e5 287 allow_playlist_files: Whether to write playlists' description, infojson etc
288 also to disk when using the 'write*' options
ec82d85a 289 write_all_thumbnails: Write all thumbnail formats to files
732044af 290 writelink: Write an internet shortcut file, depending on the
291 current platform (.url/.webloc/.desktop)
292 writeurllink: Write a Windows internet shortcut file (.url)
293 writewebloclink: Write a macOS internet shortcut file (.webloc)
294 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 295 writesubtitles: Write the video subtitles to a file
741dd8ea 296 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 297 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 298 Downloads all the subtitles of the video
0b7f3118 299 (requires writesubtitles or writeautomaticsub)
8222d8de 300 listsubtitles: Lists all available subtitles for the video
a504ced0 301 subtitlesformat: The format code for subtitles
c32b0aab 302 subtitleslangs: List of languages of the subtitles to download (can be regex).
303 The list may contain "all" to refer to all the available
304 subtitles. The language can be prefixed with a "-" to
305 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
306 keepvideo: Keep the video file after post-processing
307 daterange: A DateRange object, download only if the upload_date is in the range.
308 skip_download: Skip the actual download of the video file
c35f9e72 309 cachedir: Location of the cache files in the filesystem.
a0e07d31 310 False to disable filesystem cache.
47192f92 311 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
312 age_limit: An integer representing the user's age in years.
313 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
314 min_views: An integer representing the minimum view count the video
315 must have in order to not be skipped.
316 Videos without view count information are always
317 downloaded. None for no limit.
318 max_views: An integer representing the maximum view count.
319 Videos that are more popular than that are not
320 downloaded.
321 Videos without view count information are always
322 downloaded. None for no limit.
323 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
324 Videos already present in the file are not downloaded
325 again.
8a51f564 326 break_on_existing: Stop the download process after attempting to download a
327 file that is in the archive.
328 break_on_reject: Stop the download process when encountering a video that
329 has been filtered out.
b222c271 330 break_per_url: Whether break_on_reject and break_on_existing
331 should act on each input URL as opposed to for the entire queue
8a51f564 332 cookiefile: File name where cookies should be read from and dumped to
f59f5ef8
MB
333 cookiesfrombrowser: A tuple containing the name of the browser, the profile
334 name/pathfrom where cookies are loaded, and the name of the
335 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 336 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
337 support RFC 5746 secure renegotiation
f59f5ef8 338 nocheckcertificate: Do not verify SSL certificates
7e8c0af0
PH
339 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
340 At the moment, this is only supported by YouTube.
8b7539d2 341 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 342 proxy: URL of the proxy server to use
38cce791 343 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 344 on geo-restricted sites.
e344693b 345 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
346 bidi_workaround: Work around buggy terminals without bidirectional text
347 support, using fridibi
a0ddb8a2 348 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 349 include_ads: Download ads as well (deprecated)
04b4d394
PH
350 default_search: Prepend this string if an input url is not valid.
351 'auto' for elaborate guessing
62fec3b2 352 encoding: Use this encoding instead of the system-specified.
e8ee972c 353 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
354 Pass in 'in_playlist' to only show this behavior for
355 playlist items.
f2ebc5c7 356 wait_for_video: If given, wait for scheduled streams to become available.
357 The value should be a tuple containing the range
358 (min_secs, max_secs) to wait between retries
4f026faf 359 postprocessors: A list of dictionaries, each with an entry
71b640cc 360 * key: The name of the postprocessor. See
7a5c1cfe 361 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 362 * when: When to run the postprocessor. Allowed values are
363 the entries of utils.POSTPROCESS_WHEN
56d868db 364 Assumed to be 'post_process' if not given
b5ae35ee 365 post_hooks: Deprecated - Register a custom postprocessor instead
366 A list of functions that get called as the final step
ab8e5e51
AM
367 for each video file, after all postprocessors have been
368 called. The filename will be passed as the only argument.
71b640cc
PH
369 progress_hooks: A list of functions that get called on download
370 progress, with a dictionary with the entries
5cda4eda 371 * status: One of "downloading", "error", or "finished".
ee69b99a 372 Check this first and ignore unknown values.
3ba7740d 373 * info_dict: The extracted info_dict
71b640cc 374
5cda4eda 375 If status is one of "downloading", or "finished", the
ee69b99a
PH
376 following properties may also be present:
377 * filename: The final filename (always present)
5cda4eda 378 * tmpfilename: The filename we're currently writing to
71b640cc
PH
379 * downloaded_bytes: Bytes on disk
380 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
381 * total_bytes_estimate: Guess of the eventual file size,
382 None if unavailable.
383 * elapsed: The number of seconds since download started.
71b640cc
PH
384 * eta: The estimated time in seconds, None if unknown
385 * speed: The download speed in bytes/second, None if
386 unknown
5cda4eda
PH
387 * fragment_index: The counter of the currently
388 downloaded video fragment.
389 * fragment_count: The number of fragments (= individual
390 files that will be merged)
71b640cc
PH
391
392 Progress hooks are guaranteed to be called at least once
393 (with status "finished") if the download is successful.
819e0531 394 postprocessor_hooks: A list of functions that get called on postprocessing
395 progress, with a dictionary with the entries
396 * status: One of "started", "processing", or "finished".
397 Check this first and ignore unknown values.
398 * postprocessor: Name of the postprocessor
399 * info_dict: The extracted info_dict
400
401 Progress hooks are guaranteed to be called at least twice
402 (with status "started" and "finished") if the processing is successful.
45598f15 403 merge_output_format: Extension to use when merging formats.
6b591b29 404 final_ext: Expected final extension; used to detect when the file was
59a7a13e 405 already downloaded and converted
6271f1ca
PH
406 fixup: Automatically correct known faults of the file.
407 One of:
408 - "never": do nothing
409 - "warn": only emit a warning
410 - "detect_or_warn": check whether we can do anything
62cd676c 411 about it, warn otherwise (default)
504f20dd 412 source_address: Client-side IP address to bind to.
6ec6cb4e 413 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 414 yt-dlp servers for debugging. (BROKEN)
1cf376f5 415 sleep_interval_requests: Number of seconds to sleep between requests
416 during extraction
7aa589a5
S
417 sleep_interval: Number of seconds to sleep before each download when
418 used alone or a lower bound of a range for randomized
419 sleep before each download (minimum possible number
420 of seconds to sleep) when used along with
421 max_sleep_interval.
422 max_sleep_interval:Upper bound of a range for randomized sleep before each
423 download (maximum possible number of seconds to sleep).
424 Must only be used along with sleep_interval.
425 Actual sleep time will be a random float from range
426 [sleep_interval; max_sleep_interval].
1cf376f5 427 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
428 listformats: Print an overview of available video formats and exit.
429 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
430 match_filter: A function that gets called with the info_dict of
431 every video.
432 If it returns a message, the video is ignored.
433 If it returns None, the video is downloaded.
434 match_filter_func in utils.py is one example for this.
7e5db8c9 435 no_color: Do not emit color codes in output.
0a840f58 436 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 437 HTTP header
0a840f58 438 geo_bypass_country:
773f291d
S
439 Two-letter ISO 3166-2 country code that will be used for
440 explicit geographic restriction bypassing via faking
504f20dd 441 X-Forwarded-For HTTP header
5f95927a
S
442 geo_bypass_ip_block:
443 IP range in CIDR notation that will be used similarly to
504f20dd 444 geo_bypass_country
71b640cc 445
85729c51 446 The following options determine which downloader is picked:
52a8a1e1 447 external_downloader: A dictionary of protocol keys and the executable of the
448 external downloader to use for it. The allowed protocols
449 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
450 Set the value to 'native' to use the native downloader
451 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
452 or {'m3u8': 'ffmpeg'} instead.
453 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
454 if True, otherwise use ffmpeg/avconv if False, otherwise
455 use downloader suggested by extractor if None.
53ed7066 456 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 457 The following options do not work when used through the API:
b5ae35ee 458 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 459 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 460 Refer __init__.py for their implementation
819e0531 461 progress_template: Dictionary of templates for progress outputs.
462 Allowed keys are 'download', 'postprocess',
463 'download-title' (console title) and 'postprocess-title'.
464 The template is mapped on a dictionary with keys 'progress' and 'info'
fe7e0c98 465
8222d8de 466 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 467 the downloader (see yt_dlp/downloader/common.py):
51d9739f 468 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
469 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
470 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 471 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
472
473 The following options are used by the post processors:
d4a24f40 474 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 475 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
476 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
477 to the binary or its containing directory.
43820c03 478 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 479 and a list of additional command-line arguments for the
480 postprocessor/executable. The dict can also have "PP+EXE" keys
481 which are used when the given exe is used by the given PP.
482 Use 'default' as the name for arguments to passed to all PP
483 For compatibility with youtube-dl, a single list of args
484 can also be used
e409895f 485
486 The following options are used by the extractors:
62bff2c1 487 extractor_retries: Number of times to retry for known errors
488 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 489 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 490 discontinuities such as ad breaks (default: False)
5d3a0e79 491 extractor_args: A dictionary of arguments to be passed to the extractors.
492 See "EXTRACTOR ARGUMENTS" for details.
493 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 494 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 495 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
496 If True (default), DASH manifests and related
62bff2c1 497 data will be downloaded and processed by extractor.
498 You can reduce network I/O by disabling it if you don't
499 care about DASH. (only for youtube)
5d3a0e79 500 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
501 If True (default), HLS manifests and related
62bff2c1 502 data will be downloaded and processed by extractor.
503 You can reduce network I/O by disabling it if you don't
504 care about HLS. (only for youtube)
8222d8de
JMF
505 """
506
c9969434
S
507 _NUMERIC_FIELDS = set((
508 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 509 'timestamp', 'release_timestamp',
c9969434
S
510 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
511 'average_rating', 'comment_count', 'age_limit',
512 'start_time', 'end_time',
513 'chapter_number', 'season_number', 'episode_number',
514 'track_number', 'disc_number', 'release_year',
c9969434
S
515 ))
516
6db9c4d5 517 _format_fields = {
518 # NB: Keep in sync with the docstring of extractor/common.py
519 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
520 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
521 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
522 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
523 'preference', 'language', 'language_preference', 'quality', 'source_preference',
524 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
525 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
526 }
48ee10ee 527 _format_selection_exts = {
528 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
529 'video': {'mp4', 'flv', 'webm', '3gp'},
530 'storyboards': {'mhtml'},
531 }
532
3511266b 533 def __init__(self, params=None, auto_init=True):
883d4b1e 534 """Create a FileDownloader object with the given options.
535 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 536 Set to 'no_verbose_header' to not print the header
883d4b1e 537 """
e9f9a10f
JMF
538 if params is None:
539 params = {}
592b7485 540 self.params = params
8b7491c8 541 self._ies = {}
56c73665 542 self._ies_instances = {}
1e43a6f7 543 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 544 self._printed_messages = set()
1cf376f5 545 self._first_webpage_request = True
ab8e5e51 546 self._post_hooks = []
933605d7 547 self._progress_hooks = []
819e0531 548 self._postprocessor_hooks = []
8222d8de
JMF
549 self._download_retcode = 0
550 self._num_downloads = 0
9c906919 551 self._num_videos = 0
592b7485 552 self._playlist_level = 0
553 self._playlist_urls = set()
a0e07d31 554 self.cache = Cache(self)
34308b30 555
819e0531 556 windows_enable_vt_mode()
cf4f42cb 557 self._out_files = {
558 'error': sys.stderr,
559 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
560 'console': None if compat_os_name == 'nt' else next(
561 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
562 }
563 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
ec11a9f4 564 self._allow_colors = {
cf4f42cb 565 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
566 for type_ in ('screen', 'error')
ec11a9f4 567 }
819e0531 568
a61f4b28 569 if sys.version_info < (3, 6):
570 self.report_warning(
0181adef 571 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 572
88acdbc2 573 if self.params.get('allow_unplayable_formats'):
574 self.report_warning(
ec11a9f4 575 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 576 'This is a developer option intended for debugging. \n'
577 ' If you experience any issues while using this option, '
ec11a9f4 578 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 579
be5df5ee
S
580 def check_deprecated(param, option, suggestion):
581 if self.params.get(param) is not None:
53ed7066 582 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
be5df5ee
S
583 return True
584 return False
585
586 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
587 if self.params.get('geo_verification_proxy') is None:
588 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
589
0d1bb027 590 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
591 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 592 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 593
49a57e70 594 for msg in self.params.get('_warnings', []):
0d1bb027 595 self.report_warning(msg)
ee8dd27a 596 for msg in self.params.get('_deprecation_warnings', []):
597 self.deprecation_warning(msg)
0d1bb027 598
ec11a9f4 599 if 'list-formats' in self.params.get('compat_opts', []):
600 self.params['listformats_table'] = False
601
b5ae35ee 602 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 603 # nooverwrites was unnecessarily changed to overwrites
604 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
605 # This ensures compatibility with both keys
606 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 607 elif self.params.get('overwrites') is None:
608 self.params.pop('overwrites', None)
b868936c 609 else:
610 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 611
455a15e2 612 self.params.setdefault('forceprint', {})
613 self.params.setdefault('print_to_file', {})
bb66c247 614
615 # Compatibility with older syntax
ca30f449 616 if not isinstance(params['forceprint'], dict):
455a15e2 617 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 618
455a15e2 619 if self.params.get('bidi_workaround', False):
1c088fa8
PH
620 try:
621 import pty
622 master, slave = pty.openpty()
003c69a8 623 width = compat_get_terminal_size().columns
1c088fa8
PH
624 if width is None:
625 width_args = []
626 else:
627 width_args = ['-w', str(width)]
5d681e96 628 sp_kwargs = dict(
1c088fa8
PH
629 stdin=subprocess.PIPE,
630 stdout=slave,
cf4f42cb 631 stderr=self._out_files['error'])
5d681e96 632 try:
d3c93ec2 633 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 634 except OSError:
d3c93ec2 635 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 636 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 637 except OSError as ose:
66e7ace1 638 if ose.errno == errno.ENOENT:
49a57e70 639 self.report_warning(
640 'Could not find fribidi executable, ignoring --bidi-workaround. '
641 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
642 else:
643 raise
0783b09b 644
3089bc74
S
645 if (sys.platform != 'win32'
646 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 647 and not self.params.get('restrictfilenames', False)):
e9137224 648 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 649 self.report_warning(
6febd1c1 650 'Assuming --restrict-filenames since file system encoding '
1b725173 651 'cannot encode all characters. '
6febd1c1 652 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 653 self.params['restrictfilenames'] = True
34308b30 654
de6000d9 655 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 656
187986a8 657 # Creating format selector here allows us to catch syntax errors before the extraction
658 self.format_selector = (
fa9f30b8 659 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 660 else self.params['format'] if callable(self.params['format'])
187986a8 661 else self.build_format_selector(self.params['format']))
662
8b7539d2 663 # Set http_headers defaults according to std_headers
664 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
665
dca08720
PH
666 self._setup_opener()
667
3511266b 668 if auto_init:
883d4b1e 669 if auto_init != 'no_verbose_header':
670 self.print_debug_header()
3511266b
PH
671 self.add_default_info_extractors()
672
013b50b7 673 hooks = {
674 'post_hooks': self.add_post_hook,
675 'progress_hooks': self.add_progress_hook,
676 'postprocessor_hooks': self.add_postprocessor_hook,
677 }
678 for opt, fn in hooks.items():
679 for ph in self.params.get(opt, []):
680 fn(ph)
71b640cc 681
5bfc8bee 682 for pp_def_raw in self.params.get('postprocessors', []):
683 pp_def = dict(pp_def_raw)
684 when = pp_def.pop('when', 'post_process')
685 self.add_post_processor(
686 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
687 when=when)
688
51fb4995
YCH
689 register_socks_protocols()
690
ed39cac5 691 def preload_download_archive(fn):
692 """Preload the archive, if any is specified"""
693 if fn is None:
694 return False
49a57e70 695 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 696 try:
697 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
698 for line in archive_file:
699 self.archive.add(line.strip())
700 except IOError as ioe:
701 if ioe.errno != errno.ENOENT:
702 raise
703 return False
704 return True
705
706 self.archive = set()
707 preload_download_archive(self.params.get('download_archive'))
708
7d4111ed
PH
709 def warn_if_short_id(self, argv):
710 # short YouTube ID starting with dash?
711 idxs = [
712 i for i, a in enumerate(argv)
713 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
714 if idxs:
715 correct_argv = (
7a5c1cfe 716 ['yt-dlp']
3089bc74
S
717 + [a for i, a in enumerate(argv) if i not in idxs]
718 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
719 )
720 self.report_warning(
721 'Long argument string detected. '
49a57e70 722 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
723 args_to_str(correct_argv))
724
8222d8de
JMF
725 def add_info_extractor(self, ie):
726 """Add an InfoExtractor object to the end of the list."""
8b7491c8 727 ie_key = ie.ie_key()
728 self._ies[ie_key] = ie
e52d7f85 729 if not isinstance(ie, type):
8b7491c8 730 self._ies_instances[ie_key] = ie
e52d7f85 731 ie.set_downloader(self)
8222d8de 732
8b7491c8 733 def _get_info_extractor_class(self, ie_key):
734 ie = self._ies.get(ie_key)
735 if ie is None:
736 ie = get_info_extractor(ie_key)
737 self.add_info_extractor(ie)
738 return ie
739
56c73665
JMF
740 def get_info_extractor(self, ie_key):
741 """
742 Get an instance of an IE with name ie_key, it will try to get one from
743 the _ies list, if there's no instance it will create a new one and add
744 it to the extractor list.
745 """
746 ie = self._ies_instances.get(ie_key)
747 if ie is None:
748 ie = get_info_extractor(ie_key)()
749 self.add_info_extractor(ie)
750 return ie
751
023fa8c4
JMF
752 def add_default_info_extractors(self):
753 """
754 Add the InfoExtractors returned by gen_extractors to the end of the list
755 """
e52d7f85 756 for ie in gen_extractor_classes():
023fa8c4
JMF
757 self.add_info_extractor(ie)
758
56d868db 759 def add_post_processor(self, pp, when='post_process'):
8222d8de 760 """Add a PostProcessor object to the end of the chain."""
5bfa4862 761 self._pps[when].append(pp)
8222d8de
JMF
762 pp.set_downloader(self)
763
ab8e5e51
AM
764 def add_post_hook(self, ph):
765 """Add the post hook"""
766 self._post_hooks.append(ph)
767
933605d7 768 def add_progress_hook(self, ph):
819e0531 769 """Add the download progress hook"""
933605d7 770 self._progress_hooks.append(ph)
8ab470f1 771
819e0531 772 def add_postprocessor_hook(self, ph):
773 """Add the postprocessing progress hook"""
774 self._postprocessor_hooks.append(ph)
5bfc8bee 775 for pps in self._pps.values():
776 for pp in pps:
777 pp.add_progress_hook(ph)
819e0531 778
1c088fa8 779 def _bidi_workaround(self, message):
5d681e96 780 if not hasattr(self, '_output_channel'):
1c088fa8
PH
781 return message
782
5d681e96 783 assert hasattr(self, '_output_process')
11b85ce6 784 assert isinstance(message, compat_str)
6febd1c1
PH
785 line_count = message.count('\n') + 1
786 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 787 self._output_process.stdin.flush()
6febd1c1 788 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 789 for _ in range(line_count))
6febd1c1 790 return res[:-len('\n')]
1c088fa8 791
b35496d8 792 def _write_string(self, message, out=None, only_once=False):
793 if only_once:
794 if message in self._printed_messages:
795 return
796 self._printed_messages.add(message)
797 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 798
cf4f42cb 799 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 800 """Print message to stdout"""
cf4f42cb 801 if quiet is not None:
ae6a1b95 802 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 803 self._write_string(
804 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
805 self._out_files['print'])
806
807 def to_screen(self, message, skip_eol=False, quiet=None):
808 """Print message to screen if not in quiet mode"""
8bf9319e 809 if self.params.get('logger'):
43afe285 810 self.params['logger'].debug(message)
cf4f42cb 811 return
812 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
813 return
814 self._write_string(
815 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
816 self._out_files['screen'])
8222d8de 817
b35496d8 818 def to_stderr(self, message, only_once=False):
0760b0a7 819 """Print message to stderr"""
11b85ce6 820 assert isinstance(message, compat_str)
8bf9319e 821 if self.params.get('logger'):
43afe285
IB
822 self.params['logger'].error(message)
823 else:
cf4f42cb 824 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
825
826 def _send_console_code(self, code):
827 if compat_os_name == 'nt' or not self._out_files['console']:
828 return
829 self._write_string(code, self._out_files['console'])
8222d8de 830
1e5b9a95
PH
831 def to_console_title(self, message):
832 if not self.params.get('consoletitle', False):
833 return
3efb96a6 834 message = remove_terminal_sequences(message)
4bede0d8
C
835 if compat_os_name == 'nt':
836 if ctypes.windll.kernel32.GetConsoleWindow():
837 # c_wchar_p() might not be necessary if `message` is
838 # already of type unicode()
839 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 840 else:
841 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 842
bdde425c 843 def save_console_title(self):
cf4f42cb 844 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 845 return
592b7485 846 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
847
848 def restore_console_title(self):
cf4f42cb 849 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 850 return
592b7485 851 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
852
853 def __enter__(self):
854 self.save_console_title()
855 return self
856
857 def __exit__(self, *args):
858 self.restore_console_title()
f89197d7 859
dca08720 860 if self.params.get('cookiefile') is not None:
1bab3437 861 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 862
fa9f30b8 863 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
864 """Determine action to take when a download problem appears.
865
866 Depending on if the downloader has been configured to ignore
867 download errors or not, this method may throw an exception or
868 not when errors are found, after printing the message.
869
fa9f30b8 870 @param tb If given, is additional traceback information
871 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
872 """
873 if message is not None:
874 self.to_stderr(message)
875 if self.params.get('verbose'):
876 if tb is None:
877 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 878 tb = ''
8222d8de 879 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 880 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 881 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
882 else:
883 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 884 tb = ''.join(tb_data)
c19bc311 885 if tb:
886 self.to_stderr(tb)
fa9f30b8 887 if not is_error:
888 return
b1940459 889 if not self.params.get('ignoreerrors'):
8222d8de
JMF
890 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
891 exc_info = sys.exc_info()[1].exc_info
892 else:
893 exc_info = sys.exc_info()
894 raise DownloadError(message, exc_info)
895 self._download_retcode = 1
896
ec11a9f4 897 class Styles(Enum):
898 HEADERS = 'yellow'
f304da8a 899 EMPHASIS = 'light blue'
ec11a9f4 900 ID = 'green'
901 DELIM = 'blue'
902 ERROR = 'red'
903 WARNING = 'yellow'
ff51ed58 904 SUPPRESS = 'light black'
ec11a9f4 905
7578d77d 906 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
ec11a9f4 907 if test_encoding:
908 original_text = text
5c104538 909 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
910 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 911 text = text.encode(encoding, 'ignore').decode(encoding)
912 if fallback is not None and text != original_text:
913 text = fallback
914 if isinstance(f, self.Styles):
f304da8a 915 f = f.value
7578d77d 916 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 917
918 def _format_screen(self, *args, **kwargs):
7578d77d 919 return self._format_text(
cf4f42cb 920 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
ec11a9f4 921
922 def _format_err(self, *args, **kwargs):
7578d77d 923 return self._format_text(
cf4f42cb 924 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
819e0531 925
c84aeac6 926 def report_warning(self, message, only_once=False):
8222d8de
JMF
927 '''
928 Print the message to stderr, it will be prefixed with 'WARNING:'
929 If stderr is a tty file the 'WARNING:' will be colored
930 '''
6d07ce01
JMF
931 if self.params.get('logger') is not None:
932 self.params['logger'].warning(message)
8222d8de 933 else:
ad8915b7
PH
934 if self.params.get('no_warnings'):
935 return
ec11a9f4 936 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 937
ee8dd27a 938 def deprecation_warning(self, message):
939 if self.params.get('logger') is not None:
940 self.params['logger'].warning('DeprecationWarning: {message}')
941 else:
942 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
943
fa9f30b8 944 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
945 '''
946 Do the same as trouble, but prefixes the message with 'ERROR:', colored
947 in red if stderr is a tty file.
948 '''
fa9f30b8 949 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 950
b35496d8 951 def write_debug(self, message, only_once=False):
0760b0a7 952 '''Log debug message or Print message to stderr'''
953 if not self.params.get('verbose', False):
954 return
955 message = '[debug] %s' % message
956 if self.params.get('logger'):
957 self.params['logger'].debug(message)
958 else:
b35496d8 959 self.to_stderr(message, only_once)
0760b0a7 960
8222d8de
JMF
961 def report_file_already_downloaded(self, file_name):
962 """Report file has already been fully downloaded."""
963 try:
6febd1c1 964 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 965 except UnicodeEncodeError:
6febd1c1 966 self.to_screen('[download] The file has already been downloaded')
8222d8de 967
0c3d0f51 968 def report_file_delete(self, file_name):
969 """Report that existing file will be deleted."""
970 try:
c25228e5 971 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 972 except UnicodeEncodeError:
c25228e5 973 self.to_screen('Deleting existing file')
0c3d0f51 974
319b6059 975 def raise_no_formats(self, info, forced=False, *, msg=None):
1151c407 976 has_drm = info.get('__has_drm')
319b6059 977 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
978 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
979 if forced or not ignored:
1151c407 980 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 981 expected=has_drm or ignored or expected)
88acdbc2 982 else:
983 self.report_warning(msg)
984
de6000d9 985 def parse_outtmpl(self):
986 outtmpl_dict = self.params.get('outtmpl', {})
987 if not isinstance(outtmpl_dict, dict):
988 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 989 # Remove spaces in the default template
990 if self.params.get('restrictfilenames'):
991 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
992 else:
993 sanitize = lambda x: x
de6000d9 994 outtmpl_dict.update({
71ce444a 995 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 996 if outtmpl_dict.get(k) is None})
de6000d9 997 for key, val in outtmpl_dict.items():
998 if isinstance(val, bytes):
999 self.report_warning(
1000 'Parameter outtmpl is bytes, but should be a unicode string. '
1001 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
1002 return outtmpl_dict
1003
21cd8fae 1004 def get_output_path(self, dir_type='', filename=None):
1005 paths = self.params.get('paths', {})
1006 assert isinstance(paths, dict)
1007 path = os.path.join(
1008 expand_path(paths.get('home', '').strip()),
1009 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1010 filename or '')
1011
1012 # Temporary fix for #4787
1013 # 'Treat' all problem characters by passing filename through preferredencoding
1014 # to workaround encoding issues with subprocess on python2 @ Windows
1015 if sys.version_info < (3, 0) and sys.platform == 'win32':
1016 path = encodeFilename(path, True).decode(preferredencoding())
1017 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1018
76a264ac 1019 @staticmethod
901130bb 1020 def _outtmpl_expandpath(outtmpl):
1021 # expand_path translates '%%' into '%' and '$$' into '$'
1022 # correspondingly that is not what we want since we need to keep
1023 # '%%' intact for template dict substitution step. Working around
1024 # with boundary-alike separator hack.
1025 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1026 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1027
1028 # outtmpl should be expand_path'ed before template dict substitution
1029 # because meta fields may contain env variables we don't want to
1030 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1031 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1032 return expand_path(outtmpl).replace(sep, '')
1033
1034 @staticmethod
1035 def escape_outtmpl(outtmpl):
1036 ''' Escape any remaining strings like %s, %abc% etc. '''
1037 return re.sub(
1038 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1039 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1040 outtmpl)
1041
1042 @classmethod
1043 def validate_outtmpl(cls, outtmpl):
76a264ac 1044 ''' @return None or Exception object '''
7d1eb38a 1045 outtmpl = re.sub(
37893bb0 1046 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1047 lambda mobj: f'{mobj.group(0)[:-1]}s',
1048 cls._outtmpl_expandpath(outtmpl))
76a264ac 1049 try:
7d1eb38a 1050 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1051 return None
1052 except ValueError as err:
1053 return err
1054
03b4de72 1055 @staticmethod
1056 def _copy_infodict(info_dict):
1057 info_dict = dict(info_dict)
09b49e1f 1058 info_dict.pop('__postprocessors', None)
03b4de72 1059 return info_dict
1060
e0fd9573 1061 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1062 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1063 @param sanitize Whether to sanitize the output as a filename.
1064 For backward compatibility, a function can also be passed
1065 """
1066
6e84b215 1067 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1068
03b4de72 1069 info_dict = self._copy_infodict(info_dict)
752cda38 1070 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1071 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1072 if info_dict.get('duration', None) is not None
1073 else None)
752cda38 1074 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
9c906919 1075 info_dict['video_autonumber'] = self._num_videos
752cda38 1076 if info_dict.get('resolution') is None:
1077 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1078
e6f21b3d 1079 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1080 # of %(field)s to %(field)0Nd for backward compatibility
1081 field_size_compat_map = {
ec11a9f4 1082 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1083 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1084 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1085 }
752cda38 1086
385a27fa 1087 TMPL_DICT = {}
37893bb0 1088 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1089 MATH_FUNCTIONS = {
1090 '+': float.__add__,
1091 '-': float.__sub__,
1092 }
e625be0d 1093 # Field is of the form key1.key2...
1094 # where keys (except first) can be string, int or slice
2b8a2973 1095 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
e0fd9573 1096 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
385a27fa 1097 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
e625be0d 1098 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1099 (?P<negate>-)?
385a27fa 1100 (?P<fields>{field})
1101 (?P<maths>(?:{math_op}{math_field})*)
e625be0d 1102 (?:>(?P<strf_format>.+?))?
34baa9fd 1103 (?P<remaining>
1104 (?P<alternate>(?<!\\),[^|&)]+)?
1105 (?:&(?P<replacement>.*?))?
1106 (?:\|(?P<default>.*?))?
1107 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
752cda38 1108
2b8a2973 1109 def _traverse_infodict(k):
1110 k = k.split('.')
1111 if k[0] == '':
1112 k.pop(0)
1113 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1114
752cda38 1115 def get_value(mdict):
1116 # Object traversal
2b8a2973 1117 value = _traverse_infodict(mdict['fields'])
752cda38 1118 # Negative
1119 if mdict['negate']:
1120 value = float_or_none(value)
1121 if value is not None:
1122 value *= -1
1123 # Do maths
385a27fa 1124 offset_key = mdict['maths']
1125 if offset_key:
752cda38 1126 value = float_or_none(value)
1127 operator = None
385a27fa 1128 while offset_key:
1129 item = re.match(
1130 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1131 offset_key).group(0)
1132 offset_key = offset_key[len(item):]
1133 if operator is None:
752cda38 1134 operator = MATH_FUNCTIONS[item]
385a27fa 1135 continue
1136 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1137 offset = float_or_none(item)
1138 if offset is None:
2b8a2973 1139 offset = float_or_none(_traverse_infodict(item))
385a27fa 1140 try:
1141 value = operator(value, multiplier * offset)
1142 except (TypeError, ZeroDivisionError):
1143 return None
1144 operator = None
752cda38 1145 # Datetime formatting
1146 if mdict['strf_format']:
7c37ff97 1147 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1148
1149 return value
1150
b868936c 1151 na = self.params.get('outtmpl_na_placeholder', 'NA')
1152
e0fd9573 1153 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1154 return sanitize_filename(str(value), restricted=restricted, is_id=(
1155 bool(re.search(r'(^|[_.])id(\.|$)', key))
1156 if 'filename-sanitization' in self.params.get('compat_opts', [])
1157 else NO_DEFAULT))
e0fd9573 1158
1159 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1160 sanitize = bool(sanitize)
1161
6e84b215 1162 def _dumpjson_default(obj):
1163 if isinstance(obj, (set, LazyList)):
1164 return list(obj)
adbc4ec4 1165 return repr(obj)
6e84b215 1166
752cda38 1167 def create_key(outer_mobj):
1168 if not outer_mobj.group('has_key'):
b836dc94 1169 return outer_mobj.group(0)
752cda38 1170 key = outer_mobj.group('key')
752cda38 1171 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1172 initial_field = mobj.group('fields') if mobj else ''
e978789f 1173 value, replacement, default = None, None, na
7c37ff97 1174 while mobj:
e625be0d 1175 mobj = mobj.groupdict()
7c37ff97 1176 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1177 value = get_value(mobj)
e978789f 1178 replacement = mobj['replacement']
7c37ff97 1179 if value is None and mobj['alternate']:
34baa9fd 1180 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1181 else:
1182 break
752cda38 1183
b868936c 1184 fmt = outer_mobj.group('format')
752cda38 1185 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1186 fmt = '0{:d}d'.format(field_size_compat_map[key])
1187
e978789f 1188 value = default if value is None else value if replacement is None else replacement
752cda38 1189
4476d2c7 1190 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1191 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1192 if fmt[-1] == 'l': # list
4476d2c7 1193 delim = '\n' if '#' in flags else ', '
9e907ebd 1194 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1195 elif fmt[-1] == 'j': # json
4476d2c7 1196 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1197 elif fmt[-1] == 'q': # quoted
4476d2c7 1198 value = map(str, variadic(value) if '#' in flags else [value])
1199 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1200 elif fmt[-1] == 'B': # bytes
f5aa5cfb 1201 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1202 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1203 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1204 value, fmt = unicodedata.normalize(
1205 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1206 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1207 value), str_fmt
e0fd9573 1208 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1209 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1210 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1211 factor=1024 if '#' in flags else 1000)
37893bb0 1212 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1213 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1214 elif fmt[-1] == 'c':
524e2e4f 1215 if value:
1216 value = str(value)[0]
76a264ac 1217 else:
524e2e4f 1218 fmt = str_fmt
76a264ac 1219 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1220 value = float_or_none(value)
752cda38 1221 if value is None:
1222 value, fmt = default, 's'
901130bb 1223
752cda38 1224 if sanitize:
1225 if fmt[-1] == 'r':
1226 # If value is an object, sanitize might convert it to a string
1227 # So we convert it to repr first
7d1eb38a 1228 value, fmt = repr(value), str_fmt
639f1cea 1229 if fmt[-1] in 'csr':
e0fd9573 1230 value = sanitizer(initial_field, value)
901130bb 1231
b868936c 1232 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1233 TMPL_DICT[key] = value
b868936c 1234 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1235
385a27fa 1236 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1237
819e0531 1238 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1239 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1240 return self.escape_outtmpl(outtmpl) % info_dict
1241
de6000d9 1242 def _prepare_filename(self, info_dict, tmpl_type='default'):
8222d8de 1243 try:
b836dc94 1244 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
e0fd9573 1245 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1246 if not filename:
1247 return None
15da37c7 1248
6a0546e3 1249 if tmpl_type in ('default', 'temp'):
1250 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1251 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1252 filename = replace_extension(filename, ext, final_ext)
1253 else:
1254 force_ext = OUTTMPL_TYPES[tmpl_type]
1255 if force_ext:
1256 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1257
bdc3fd2f
U
1258 # https://github.com/blackjack4494/youtube-dlc/issues/85
1259 trim_file_name = self.params.get('trim_file_name', False)
1260 if trim_file_name:
5c22c63d 1261 no_ext, *ext = filename.rsplit('.', 2)
1262 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1263
0202b52a 1264 return filename
8222d8de 1265 except ValueError as err:
6febd1c1 1266 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1267 return None
1268
de6000d9 1269 def prepare_filename(self, info_dict, dir_type='', warn=False):
1270 """Generate the output filename."""
21cd8fae 1271
de6000d9 1272 filename = self._prepare_filename(info_dict, dir_type or 'default')
80c03fa9 1273 if not filename and dir_type not in ('', 'temp'):
1274 return ''
de6000d9 1275
c84aeac6 1276 if warn:
21cd8fae 1277 if not self.params.get('paths'):
de6000d9 1278 pass
1279 elif filename == '-':
c84aeac6 1280 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1281 elif os.path.isabs(filename):
c84aeac6 1282 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1283 if filename == '-' or not filename:
1284 return filename
1285
21cd8fae 1286 return self.get_output_path(dir_type, filename)
0202b52a 1287
120fe513 1288 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1289 """ Returns None if the file should be downloaded """
8222d8de 1290
c77495e3 1291 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1292
8b0d7497 1293 def check_filter():
8b0d7497 1294 if 'title' in info_dict:
1295 # This can happen when we're just evaluating the playlist
1296 title = info_dict['title']
1297 matchtitle = self.params.get('matchtitle', False)
1298 if matchtitle:
1299 if not re.search(matchtitle, title, re.IGNORECASE):
1300 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1301 rejecttitle = self.params.get('rejecttitle', False)
1302 if rejecttitle:
1303 if re.search(rejecttitle, title, re.IGNORECASE):
1304 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1305 date = info_dict.get('upload_date')
1306 if date is not None:
1307 dateRange = self.params.get('daterange', DateRange())
1308 if date not in dateRange:
1309 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1310 view_count = info_dict.get('view_count')
1311 if view_count is not None:
1312 min_views = self.params.get('min_views')
1313 if min_views is not None and view_count < min_views:
1314 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1315 max_views = self.params.get('max_views')
1316 if max_views is not None and view_count > max_views:
1317 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1318 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1319 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1320
8f18aca8 1321 match_filter = self.params.get('match_filter')
1322 if match_filter is not None:
1323 try:
1324 ret = match_filter(info_dict, incomplete=incomplete)
1325 except TypeError:
1326 # For backward compatibility
1327 ret = None if incomplete else match_filter(info_dict)
1328 if ret is not None:
1329 return ret
8b0d7497 1330 return None
1331
c77495e3 1332 if self.in_download_archive(info_dict):
1333 reason = '%s has already been recorded in the archive' % video_title
1334 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1335 else:
1336 reason = check_filter()
1337 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1338 if reason is not None:
120fe513 1339 if not silent:
1340 self.to_screen('[download] ' + reason)
c77495e3 1341 if self.params.get(break_opt, False):
1342 raise break_err()
8b0d7497 1343 return reason
fe7e0c98 1344
b6c45014
JMF
1345 @staticmethod
1346 def add_extra_info(info_dict, extra_info):
1347 '''Set the keys from extra_info in info dict if they are missing'''
1348 for key, value in extra_info.items():
1349 info_dict.setdefault(key, value)
1350
409e1828 1351 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1352 process=True, force_generic_extractor=False):
41d1cca3 1353 """
1354 Return a list with a dictionary for each video extracted.
1355
1356 Arguments:
1357 url -- URL to extract
1358
1359 Keyword arguments:
1360 download -- whether to download videos during extraction
1361 ie_key -- extractor key hint
1362 extra_info -- dictionary containing the extra values to add to each result
1363 process -- whether to resolve all unresolved references (URLs, playlist items),
1364 must be True for download to work.
1365 force_generic_extractor -- force using the generic extractor
1366 """
fe7e0c98 1367
409e1828 1368 if extra_info is None:
1369 extra_info = {}
1370
61aa5ba3 1371 if not ie_key and force_generic_extractor:
d22dec74
S
1372 ie_key = 'Generic'
1373
8222d8de 1374 if ie_key:
8b7491c8 1375 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1376 else:
1377 ies = self._ies
1378
8b7491c8 1379 for ie_key, ie in ies.items():
8222d8de
JMF
1380 if not ie.suitable(url):
1381 continue
1382
1383 if not ie.working():
6febd1c1
PH
1384 self.report_warning('The program functionality for this site has been marked as broken, '
1385 'and will probably not work.')
8222d8de 1386
1151c407 1387 temp_id = ie.get_temp_id(url)
a0566bbf 1388 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1389 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1390 if self.params.get('break_on_existing', False):
1391 raise ExistingVideoReached()
a0566bbf 1392 break
8b7491c8 1393 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1394 else:
1395 self.report_error('no suitable InfoExtractor for URL %s' % url)
1396
8e5fecc8 1397 def __handle_extraction_exceptions(func):
b5ae35ee 1398 @functools.wraps(func)
a0566bbf 1399 def wrapper(self, *args, **kwargs):
6da22e7d 1400 while True:
1401 try:
1402 return func(self, *args, **kwargs)
1403 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1404 raise
6da22e7d 1405 except ReExtractInfo as e:
1406 if e.expected:
1407 self.to_screen(f'{e}; Re-extracting data')
1408 else:
1409 self.to_stderr('\r')
1410 self.report_warning(f'{e}; Re-extracting data')
1411 continue
1412 except GeoRestrictedError as e:
1413 msg = e.msg
1414 if e.countries:
1415 msg += '\nThis video is available in %s.' % ', '.join(
1416 map(ISO3166Utils.short2full, e.countries))
1417 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1418 self.report_error(msg)
1419 except ExtractorError as e: # An error we somewhat expected
1420 self.report_error(str(e), e.format_traceback())
1421 except Exception as e:
1422 if self.params.get('ignoreerrors'):
1423 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1424 else:
1425 raise
1426 break
a0566bbf 1427 return wrapper
1428
f2ebc5c7 1429 def _wait_for_video(self, ie_result):
1430 if (not self.params.get('wait_for_video')
1431 or ie_result.get('_type', 'video') != 'video'
1432 or ie_result.get('formats') or ie_result.get('url')):
1433 return
1434
1435 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1436 last_msg = ''
1437
1438 def progress(msg):
1439 nonlocal last_msg
1440 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1441 last_msg = msg
1442
1443 min_wait, max_wait = self.params.get('wait_for_video')
1444 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1445 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1446 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1447 self.report_warning('Release time of video is not known')
1448 elif (diff or 0) <= 0:
1449 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1450 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1451 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1452
1453 wait_till = time.time() + diff
1454 try:
1455 while True:
1456 diff = wait_till - time.time()
1457 if diff <= 0:
1458 progress('')
1459 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1460 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1461 time.sleep(1)
1462 except KeyboardInterrupt:
1463 progress('')
1464 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1465 except BaseException as e:
1466 if not isinstance(e, ReExtractInfo):
1467 self.to_screen('')
1468 raise
1469
a0566bbf 1470 @__handle_extraction_exceptions
58f197b7 1471 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1472 ie_result = ie.extract(url)
1473 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1474 return
1475 if isinstance(ie_result, list):
1476 # Backwards compatibility: old IE result format
1477 ie_result = {
1478 '_type': 'compat_list',
1479 'entries': ie_result,
1480 }
e37d0efb 1481 if extra_info.get('original_url'):
1482 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1483 self.add_default_extra_info(ie_result, ie, url)
1484 if process:
f2ebc5c7 1485 self._wait_for_video(ie_result)
a0566bbf 1486 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1487 else:
a0566bbf 1488 return ie_result
fe7e0c98 1489
ea38e55f 1490 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1491 if url is not None:
1492 self.add_extra_info(ie_result, {
1493 'webpage_url': url,
1494 'original_url': url,
57ebfca3 1495 })
1496 webpage_url = ie_result.get('webpage_url')
1497 if webpage_url:
1498 self.add_extra_info(ie_result, {
1499 'webpage_url_basename': url_basename(webpage_url),
1500 'webpage_url_domain': get_domain(webpage_url),
6033d980 1501 })
1502 if ie is not None:
1503 self.add_extra_info(ie_result, {
1504 'extractor': ie.IE_NAME,
1505 'extractor_key': ie.ie_key(),
1506 })
ea38e55f 1507
58adec46 1508 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1509 """
1510 Take the result of the ie(may be modified) and resolve all unresolved
1511 references (URLs, playlist items).
1512
1513 It will also download the videos if 'download'.
1514 Returns the resolved ie_result.
1515 """
58adec46 1516 if extra_info is None:
1517 extra_info = {}
e8ee972c
PH
1518 result_type = ie_result.get('_type', 'video')
1519
057a5206 1520 if result_type in ('url', 'url_transparent'):
134c6ea8 1521 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1522 if ie_result.get('original_url'):
1523 extra_info.setdefault('original_url', ie_result['original_url'])
1524
057a5206 1525 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1526 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1527 or extract_flat is True):
ecb54191 1528 info_copy = ie_result.copy()
6033d980 1529 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1530 if ie and not ie_result.get('id'):
4614bc22 1531 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1532 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1533 self.add_extra_info(info_copy, extra_info)
b5475f11 1534 info_copy, _ = self.pre_process(info_copy)
ecb54191 1535 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1536 if self.params.get('force_write_download_archive', False):
1537 self.record_download_archive(info_copy)
e8ee972c
PH
1538 return ie_result
1539
8222d8de 1540 if result_type == 'video':
b6c45014 1541 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1542 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1543 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1544 if additional_urls:
e9f4ccd1 1545 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1546 if isinstance(additional_urls, compat_str):
1547 additional_urls = [additional_urls]
1548 self.to_screen(
1549 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1550 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1551 ie_result['additional_entries'] = [
1552 self.extract_info(
b69fd25c 1553 url, download, extra_info=extra_info,
9c2b75b5 1554 force_generic_extractor=self.params.get('force_generic_extractor'))
1555 for url in additional_urls
1556 ]
1557 return ie_result
8222d8de
JMF
1558 elif result_type == 'url':
1559 # We have to add extra_info to the results because it may be
1560 # contained in a playlist
07cce701 1561 return self.extract_info(
1562 ie_result['url'], download,
1563 ie_key=ie_result.get('ie_key'),
1564 extra_info=extra_info)
7fc3fa05
PH
1565 elif result_type == 'url_transparent':
1566 # Use the information from the embedding page
1567 info = self.extract_info(
1568 ie_result['url'], ie_key=ie_result.get('ie_key'),
1569 extra_info=extra_info, download=False, process=False)
1570
1640eb09
S
1571 # extract_info may return None when ignoreerrors is enabled and
1572 # extraction failed with an error, don't crash and return early
1573 # in this case
1574 if not info:
1575 return info
1576
412c617d
PH
1577 force_properties = dict(
1578 (k, v) for k, v in ie_result.items() if v is not None)
0396806f 1579 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
412c617d
PH
1580 if f in force_properties:
1581 del force_properties[f]
1582 new_result = info.copy()
1583 new_result.update(force_properties)
7fc3fa05 1584
0563f7ac
S
1585 # Extracted info may not be a video result (i.e.
1586 # info.get('_type', 'video') != video) but rather an url or
1587 # url_transparent. In such cases outer metadata (from ie_result)
1588 # should be propagated to inner one (info). For this to happen
1589 # _type of info should be overridden with url_transparent. This
067aa17e 1590 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1591 if new_result.get('_type') == 'url':
1592 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1593
1594 return self.process_ie_result(
1595 new_result, download=download, extra_info=extra_info)
40fcba5e 1596 elif result_type in ('playlist', 'multi_video'):
30a074c2 1597 # Protect from infinite recursion due to recursively nested playlists
1598 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1599 webpage_url = ie_result['webpage_url']
1600 if webpage_url in self._playlist_urls:
7e85e872 1601 self.to_screen(
30a074c2 1602 '[download] Skipping already downloaded playlist: %s'
1603 % ie_result.get('title') or ie_result.get('id'))
1604 return
7e85e872 1605
30a074c2 1606 self._playlist_level += 1
1607 self._playlist_urls.add(webpage_url)
03f83004 1608 self._fill_common_fields(ie_result, False)
bc516a3f 1609 self._sanitize_thumbnails(ie_result)
30a074c2 1610 try:
1611 return self.__process_playlist(ie_result, download)
1612 finally:
1613 self._playlist_level -= 1
1614 if not self._playlist_level:
1615 self._playlist_urls.clear()
8222d8de 1616 elif result_type == 'compat_list':
c9bf4114
PH
1617 self.report_warning(
1618 'Extractor %s returned a compat_list result. '
1619 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1620
8222d8de 1621 def _fixup(r):
b868936c 1622 self.add_extra_info(r, {
1623 'extractor': ie_result['extractor'],
1624 'webpage_url': ie_result['webpage_url'],
1625 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1626 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1627 'extractor_key': ie_result['extractor_key'],
1628 })
8222d8de
JMF
1629 return r
1630 ie_result['entries'] = [
b6c45014 1631 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1632 for r in ie_result['entries']
1633 ]
1634 return ie_result
1635 else:
1636 raise Exception('Invalid result type: %s' % result_type)
1637
e92caff5 1638 def _ensure_dir_exists(self, path):
1639 return make_dir(path, self.report_error)
1640
3b603dbd 1641 @staticmethod
1642 def _playlist_infodict(ie_result, **kwargs):
1643 return {
1644 **ie_result,
1645 'playlist': ie_result.get('title') or ie_result.get('id'),
1646 'playlist_id': ie_result.get('id'),
1647 'playlist_title': ie_result.get('title'),
1648 'playlist_uploader': ie_result.get('uploader'),
1649 'playlist_uploader_id': ie_result.get('uploader_id'),
1650 'playlist_index': 0,
1651 **kwargs,
1652 }
1653
30a074c2 1654 def __process_playlist(self, ie_result, download):
1655 # We process each entry in the playlist
1656 playlist = ie_result.get('title') or ie_result.get('id')
1657 self.to_screen('[download] Downloading playlist: %s' % playlist)
1658
498f5606 1659 if 'entries' not in ie_result:
aa9369a2 1660 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1661
1662 MissingEntry = object()
498f5606 1663 incomplete_entries = bool(ie_result.get('requested_entries'))
1664 if incomplete_entries:
bf5f605e 1665 def fill_missing_entries(entries, indices):
7c7f7161 1666 ret = [MissingEntry] * max(indices)
bf5f605e 1667 for i, entry in zip(indices, entries):
498f5606 1668 ret[i - 1] = entry
1669 return ret
1670 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1671
30a074c2 1672 playlist_results = []
1673
56a8fb4f 1674 playliststart = self.params.get('playliststart', 1)
30a074c2 1675 playlistend = self.params.get('playlistend')
1676 # For backwards compatibility, interpret -1 as whole list
1677 if playlistend == -1:
1678 playlistend = None
1679
1680 playlistitems_str = self.params.get('playlist_items')
1681 playlistitems = None
1682 if playlistitems_str is not None:
1683 def iter_playlistitems(format):
1684 for string_segment in format.split(','):
1685 if '-' in string_segment:
1686 start, end = string_segment.split('-')
1687 for item in range(int(start), int(end) + 1):
1688 yield int(item)
1689 else:
1690 yield int(string_segment)
1691 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1692
1693 ie_entries = ie_result['entries']
8e5fecc8 1694 if isinstance(ie_entries, list):
ed8d87f9 1695 playlist_count = len(ie_entries)
f0d785d3 1696 msg = f'Collected {playlist_count} videos; downloading %d of them'
1697 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1698
8e5fecc8 1699 def get_entry(i):
1700 return ie_entries[i - 1]
1701 else:
f0d785d3 1702 msg = 'Downloading %d videos'
c586f9e8 1703 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1704 ie_entries = LazyList(ie_entries)
d37707bd 1705 elif isinstance(ie_entries, InAdvancePagedList):
1706 if ie_entries._pagesize == 1:
1707 playlist_count = ie_entries._pagecount
8e5fecc8 1708
1709 def get_entry(i):
1710 return YoutubeDL.__handle_extraction_exceptions(
1711 lambda self, i: ie_entries[i - 1]
1712 )(self, i)
50fed816 1713
f0d785d3 1714 entries, broken = [], False
ff1c7fc9 1715 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1716 for i in items:
1717 if i == 0:
1718 continue
56a8fb4f 1719 if playlistitems is None and playlistend is not None and playlistend < i:
1720 break
1721 entry = None
1722 try:
50fed816 1723 entry = get_entry(i)
7c7f7161 1724 if entry is MissingEntry:
498f5606 1725 raise EntryNotInPlaylist()
56a8fb4f 1726 except (IndexError, EntryNotInPlaylist):
1727 if incomplete_entries:
aa9369a2 1728 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1729 elif not playlistitems:
1730 break
1731 entries.append(entry)
120fe513 1732 try:
1733 if entry is not None:
1734 self._match_entry(entry, incomplete=True, silent=True)
1735 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1736 broken = True
120fe513 1737 break
56a8fb4f 1738 ie_result['entries'] = entries
30a074c2 1739
56a8fb4f 1740 # Save playlist_index before re-ordering
1741 entries = [
9e598870 1742 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1743 for i, entry in enumerate(entries, 1)
1744 if entry is not None]
1745 n_entries = len(entries)
498f5606 1746
f0d785d3 1747 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1748 ie_result['playlist_count'] = n_entries
1749
e08a85d8 1750 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1751 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1752 ie_result['requested_entries'] = playlistitems
1753
e08a85d8 1754 _infojson_written = False
0bfc53d0 1755 write_playlist_files = self.params.get('allow_playlist_files', True)
1756 if write_playlist_files and self.params.get('list_thumbnails'):
1757 self.list_thumbnails(ie_result)
1758 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1759 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1760 _infojson_written = self._write_info_json(
1761 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1762 if _infojson_written is None:
80c03fa9 1763 return
1764 if self._write_description('playlist', ie_result,
1765 self.prepare_filename(ie_copy, 'pl_description')) is None:
1766 return
681de68e 1767 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1768 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1769
1770 if self.params.get('playlistreverse', False):
1771 entries = entries[::-1]
30a074c2 1772 if self.params.get('playlistrandom', False):
1773 random.shuffle(entries)
1774
1775 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1776
56a8fb4f 1777 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
26e2805c 1778 failures = 0
1779 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1780 for i, entry_tuple in enumerate(entries, 1):
1781 playlist_index, entry = entry_tuple
81139999 1782 if 'playlist-index' in self.params.get('compat_opts', []):
1783 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
30a074c2 1784 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1785 # This __x_forwarded_for_ip thing is a bit ugly but requires
1786 # minimal changes
1787 if x_forwarded_for:
1788 entry['__x_forwarded_for_ip'] = x_forwarded_for
1789 extra = {
1790 'n_entries': n_entries,
f59ae581 1791 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1792 'playlist_count': ie_result.get('playlist_count'),
71729754 1793 'playlist_index': playlist_index,
1794 'playlist_autonumber': i,
30a074c2 1795 'playlist': playlist,
1796 'playlist_id': ie_result.get('id'),
1797 'playlist_title': ie_result.get('title'),
1798 'playlist_uploader': ie_result.get('uploader'),
1799 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1800 'extractor': ie_result['extractor'],
1801 'webpage_url': ie_result['webpage_url'],
1802 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1803 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1804 'extractor_key': ie_result['extractor_key'],
1805 }
1806
1807 if self._match_entry(entry, incomplete=True) is not None:
1808 continue
1809
1810 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1811 if not entry_result:
1812 failures += 1
1813 if failures >= max_failures:
1814 self.report_error(
1815 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1816 break
30a074c2 1817 playlist_results.append(entry_result)
1818 ie_result['entries'] = playlist_results
e08a85d8 1819
1820 # Write the updated info to json
1821 if _infojson_written and self._write_info_json(
1822 'updated playlist', ie_result,
1823 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1824 return
ca30f449 1825
ed5835b4 1826 ie_result = self.run_all_pps('playlist', ie_result)
1827 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1828 return ie_result
1829
a0566bbf 1830 @__handle_extraction_exceptions
1831 def __process_iterable_entry(self, entry, download, extra_info):
1832 return self.process_ie_result(
1833 entry, download=download, extra_info=extra_info)
1834
67134eab
JMF
1835 def _build_format_filter(self, filter_spec):
1836 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1837
1838 OPERATORS = {
1839 '<': operator.lt,
1840 '<=': operator.le,
1841 '>': operator.gt,
1842 '>=': operator.ge,
1843 '=': operator.eq,
1844 '!=': operator.ne,
1845 }
67134eab 1846 operator_rex = re.compile(r'''(?x)\s*
187986a8 1847 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1848 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1850 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1851 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1852 if m:
1853 try:
1854 comparison_value = int(m.group('value'))
1855 except ValueError:
1856 comparison_value = parse_filesize(m.group('value'))
1857 if comparison_value is None:
1858 comparison_value = parse_filesize(m.group('value') + 'B')
1859 if comparison_value is None:
1860 raise ValueError(
1861 'Invalid value %r in format specification %r' % (
67134eab 1862 m.group('value'), filter_spec))
9ddb6925
S
1863 op = OPERATORS[m.group('op')]
1864
083c9df9 1865 if not m:
9ddb6925
S
1866 STR_OPERATORS = {
1867 '=': operator.eq,
10d33b34
YCH
1868 '^=': lambda attr, value: attr.startswith(value),
1869 '$=': lambda attr, value: attr.endswith(value),
1870 '*=': lambda attr, value: value in attr,
1ce9a3cb 1871 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1872 }
187986a8 1873 str_operator_rex = re.compile(r'''(?x)\s*
1874 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1875 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1876 (?P<quote>["'])?
1877 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1878 (?(quote)(?P=quote))\s*
9ddb6925 1879 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1880 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1881 if m:
1ce9a3cb
LF
1882 if m.group('op') == '~=':
1883 comparison_value = re.compile(m.group('value'))
1884 else:
1885 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1886 str_op = STR_OPERATORS[m.group('op')]
1887 if m.group('negation'):
e118a879 1888 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1889 else:
1890 op = str_op
083c9df9 1891
9ddb6925 1892 if not m:
187986a8 1893 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1894
1895 def _filter(f):
1896 actual_value = f.get(m.group('key'))
1897 if actual_value is None:
1898 return m.group('none_inclusive')
1899 return op(actual_value, comparison_value)
67134eab
JMF
1900 return _filter
1901
9f1a1c36 1902 def _check_formats(self, formats):
1903 for f in formats:
1904 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1905 path = self.get_output_path('temp')
1906 if not self._ensure_dir_exists(f'{path}/'):
1907 continue
1908 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1909 temp_file.close()
1910 try:
1911 success, _ = self.dl(temp_file.name, f, test=True)
1912 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1913 success = False
1914 finally:
1915 if os.path.exists(temp_file.name):
1916 try:
1917 os.remove(temp_file.name)
1918 except OSError:
1919 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1920 if success:
1921 yield f
1922 else:
1923 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1924
0017d9ad 1925 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1926
af0f7428
S
1927 def can_merge():
1928 merger = FFmpegMergerPP(self)
1929 return merger.available and merger.can_merge()
1930
91ebc640 1931 prefer_best = (
b7b04c78 1932 not self.params.get('simulate')
91ebc640 1933 and download
1934 and (
1935 not can_merge()
19807826 1936 or info_dict.get('is_live', False)
de6000d9 1937 or self.outtmpl_dict['default'] == '-'))
53ed7066 1938 compat = (
1939 prefer_best
1940 or self.params.get('allow_multiple_audio_streams', False)
1941 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1942
1943 return (
53ed7066 1944 'best/bestvideo+bestaudio' if prefer_best
1945 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1946 else 'bestvideo+bestaudio/best')
0017d9ad 1947
67134eab
JMF
1948 def build_format_selector(self, format_spec):
1949 def syntax_error(note, start):
1950 message = (
1951 'Invalid format specification: '
1952 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1953 return SyntaxError(message)
1954
1955 PICKFIRST = 'PICKFIRST'
1956 MERGE = 'MERGE'
1957 SINGLE = 'SINGLE'
0130afb7 1958 GROUP = 'GROUP'
67134eab
JMF
1959 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1960
91ebc640 1961 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1962 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1963
9f1a1c36 1964 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1965
67134eab
JMF
1966 def _parse_filter(tokens):
1967 filter_parts = []
1968 for type, string, start, _, _ in tokens:
1969 if type == tokenize.OP and string == ']':
1970 return ''.join(filter_parts)
1971 else:
1972 filter_parts.append(string)
1973
232541df 1974 def _remove_unused_ops(tokens):
17cc1534 1975 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1976 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1977 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1978 last_string, last_start, last_end, last_line = None, None, None, None
1979 for type, string, start, end, line in tokens:
1980 if type == tokenize.OP and string == '[':
1981 if last_string:
1982 yield tokenize.NAME, last_string, last_start, last_end, last_line
1983 last_string = None
1984 yield type, string, start, end, line
1985 # everything inside brackets will be handled by _parse_filter
1986 for type, string, start, end, line in tokens:
1987 yield type, string, start, end, line
1988 if type == tokenize.OP and string == ']':
1989 break
1990 elif type == tokenize.OP and string in ALLOWED_OPS:
1991 if last_string:
1992 yield tokenize.NAME, last_string, last_start, last_end, last_line
1993 last_string = None
1994 yield type, string, start, end, line
1995 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1996 if not last_string:
1997 last_string = string
1998 last_start = start
1999 last_end = end
2000 else:
2001 last_string += string
2002 if last_string:
2003 yield tokenize.NAME, last_string, last_start, last_end, last_line
2004
cf2ac6df 2005 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2006 selectors = []
2007 current_selector = None
2008 for type, string, start, _, _ in tokens:
2009 # ENCODING is only defined in python 3.x
2010 if type == getattr(tokenize, 'ENCODING', None):
2011 continue
2012 elif type in [tokenize.NAME, tokenize.NUMBER]:
2013 current_selector = FormatSelector(SINGLE, string, [])
2014 elif type == tokenize.OP:
cf2ac6df
JMF
2015 if string == ')':
2016 if not inside_group:
2017 # ')' will be handled by the parentheses group
2018 tokens.restore_last_token()
67134eab 2019 break
cf2ac6df 2020 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2021 tokens.restore_last_token()
2022 break
cf2ac6df
JMF
2023 elif inside_choice and string == ',':
2024 tokens.restore_last_token()
2025 break
2026 elif string == ',':
0a31a350
JMF
2027 if not current_selector:
2028 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2029 selectors.append(current_selector)
2030 current_selector = None
2031 elif string == '/':
d96d604e
JMF
2032 if not current_selector:
2033 raise syntax_error('"/" must follow a format selector', start)
67134eab 2034 first_choice = current_selector
cf2ac6df 2035 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2036 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2037 elif string == '[':
2038 if not current_selector:
2039 current_selector = FormatSelector(SINGLE, 'best', [])
2040 format_filter = _parse_filter(tokens)
2041 current_selector.filters.append(format_filter)
0130afb7
JMF
2042 elif string == '(':
2043 if current_selector:
2044 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2045 group = _parse_format_selection(tokens, inside_group=True)
2046 current_selector = FormatSelector(GROUP, group, [])
67134eab 2047 elif string == '+':
d03cfdce 2048 if not current_selector:
2049 raise syntax_error('Unexpected "+"', start)
2050 selector_1 = current_selector
2051 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2052 if not selector_2:
2053 raise syntax_error('Expected a selector', start)
2054 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab
JMF
2055 else:
2056 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2057 elif type == tokenize.ENDMARKER:
2058 break
2059 if current_selector:
2060 selectors.append(current_selector)
2061 return selectors
2062
f8d4ad9a 2063 def _merge(formats_pair):
2064 format_1, format_2 = formats_pair
2065
2066 formats_info = []
2067 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2068 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2069
2070 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2071 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2072 for (i, fmt_info) in enumerate(formats_info):
551f9388 2073 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2074 formats_info.pop(i)
2075 continue
2076 for aud_vid in ['audio', 'video']:
f8d4ad9a 2077 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2078 if get_no_more[aud_vid]:
2079 formats_info.pop(i)
f5510afe 2080 break
f8d4ad9a 2081 get_no_more[aud_vid] = True
2082
2083 if len(formats_info) == 1:
2084 return formats_info[0]
2085
2086 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2087 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2088
2089 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2090 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2091
2092 output_ext = self.params.get('merge_output_format')
2093 if not output_ext:
2094 if the_only_video:
2095 output_ext = the_only_video['ext']
2096 elif the_only_audio and not video_fmts:
2097 output_ext = the_only_audio['ext']
2098 else:
2099 output_ext = 'mkv'
2100
975a0d0d 2101 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2102
f8d4ad9a 2103 new_dict = {
2104 'requested_formats': formats_info,
975a0d0d 2105 'format': '+'.join(filtered('format')),
2106 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2107 'ext': output_ext,
975a0d0d 2108 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2109 'language': '+'.join(orderedSet(filtered('language'))) or None,
2110 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2111 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2112 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2113 }
2114
2115 if the_only_video:
2116 new_dict.update({
2117 'width': the_only_video.get('width'),
2118 'height': the_only_video.get('height'),
2119 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2120 'fps': the_only_video.get('fps'),
49a57e70 2121 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2122 'vcodec': the_only_video.get('vcodec'),
2123 'vbr': the_only_video.get('vbr'),
2124 'stretched_ratio': the_only_video.get('stretched_ratio'),
2125 })
2126
2127 if the_only_audio:
2128 new_dict.update({
2129 'acodec': the_only_audio.get('acodec'),
2130 'abr': the_only_audio.get('abr'),
975a0d0d 2131 'asr': the_only_audio.get('asr'),
f8d4ad9a 2132 })
2133
2134 return new_dict
2135
e8e73840 2136 def _check_formats(formats):
981052c9 2137 if not check_formats:
2138 yield from formats
b5ac45b1 2139 return
9f1a1c36 2140 yield from self._check_formats(formats)
e8e73840 2141
67134eab 2142 def _build_selector_function(selector):
909d24dd 2143 if isinstance(selector, list): # ,
67134eab
JMF
2144 fs = [_build_selector_function(s) for s in selector]
2145
317f7ab6 2146 def selector_function(ctx):
67134eab 2147 for f in fs:
981052c9 2148 yield from f(ctx)
67134eab 2149 return selector_function
909d24dd 2150
2151 elif selector.type == GROUP: # ()
0130afb7 2152 selector_function = _build_selector_function(selector.selector)
909d24dd 2153
2154 elif selector.type == PICKFIRST: # /
67134eab
JMF
2155 fs = [_build_selector_function(s) for s in selector.selector]
2156
317f7ab6 2157 def selector_function(ctx):
67134eab 2158 for f in fs:
317f7ab6 2159 picked_formats = list(f(ctx))
67134eab
JMF
2160 if picked_formats:
2161 return picked_formats
2162 return []
67134eab 2163
981052c9 2164 elif selector.type == MERGE: # +
2165 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2166
2167 def selector_function(ctx):
adbc4ec4 2168 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2169 yield _merge(pair)
2170
909d24dd 2171 elif selector.type == SINGLE: # atom
598d185d 2172 format_spec = selector.selector or 'best'
909d24dd 2173
f8d4ad9a 2174 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2175 if format_spec == 'all':
2176 def selector_function(ctx):
9222c381 2177 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2178 elif format_spec == 'mergeall':
2179 def selector_function(ctx):
dd2a987d 2180 formats = list(_check_formats(ctx['formats']))
e01d6aa4 2181 if not formats:
2182 return
921b76ca 2183 merged_format = formats[-1]
2184 for f in formats[-2::-1]:
f8d4ad9a 2185 merged_format = _merge((merged_format, f))
2186 yield merged_format
909d24dd 2187
2188 else:
e8e73840 2189 format_fallback, format_reverse, format_idx = False, True, 1
eff63539 2190 mobj = re.match(
2191 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2192 format_spec)
2193 if mobj is not None:
2194 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2195 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2196 format_type = (mobj.group('type') or [None])[0]
2197 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2198 format_modified = mobj.group('mod') is not None
909d24dd 2199
2200 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2201 _filter_f = (
eff63539 2202 (lambda f: f.get('%scodec' % format_type) != 'none')
2203 if format_type and format_modified # bv*, ba*, wv*, wa*
2204 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2205 if format_type # bv, ba, wv, wa
2206 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2207 if not format_modified # b, w
8326b00a 2208 else lambda f: True) # b*, w*
2209 filter_f = lambda f: _filter_f(f) and (
2210 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2211 else:
48ee10ee 2212 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2213 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2214 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2215 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
48ee10ee 2216 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2217 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2218 else:
b5ae35ee 2219 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2220
2221 def selector_function(ctx):
2222 formats = list(ctx['formats'])
909d24dd 2223 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
e8e73840 2224 if format_fallback and ctx['incomplete_formats'] and not matches:
909d24dd 2225 # for extractors with incomplete formats (audio only (soundcloud)
2226 # or video only (imgur)) best/worst will fallback to
2227 # best/worst {video,audio}-only format
e8e73840 2228 matches = formats
981052c9 2229 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2230 try:
e8e73840 2231 yield matches[format_idx - 1]
981052c9 2232 except IndexError:
2233 return
083c9df9 2234
67134eab 2235 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2236
317f7ab6 2237 def final_selector(ctx):
adbc4ec4 2238 ctx_copy = dict(ctx)
67134eab 2239 for _filter in filters:
317f7ab6
S
2240 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2241 return selector_function(ctx_copy)
67134eab 2242 return final_selector
083c9df9 2243
67134eab 2244 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 2245 try:
232541df 2246 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
2247 except tokenize.TokenError:
2248 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2249
2250 class TokenIterator(object):
2251 def __init__(self, tokens):
2252 self.tokens = tokens
2253 self.counter = 0
2254
2255 def __iter__(self):
2256 return self
2257
2258 def __next__(self):
2259 if self.counter >= len(self.tokens):
2260 raise StopIteration()
2261 value = self.tokens[self.counter]
2262 self.counter += 1
2263 return value
2264
2265 next = __next__
2266
2267 def restore_last_token(self):
2268 self.counter -= 1
2269
2270 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2271 return _build_selector_function(parsed_selector)
a9c58ad9 2272
e5660ee6 2273 def _calc_headers(self, info_dict):
8b7539d2 2274 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2275
2276 cookies = self._calc_cookies(info_dict)
2277 if cookies:
2278 res['Cookie'] = cookies
2279
0016b84e
S
2280 if 'X-Forwarded-For' not in res:
2281 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2282 if x_forwarded_for_ip:
2283 res['X-Forwarded-For'] = x_forwarded_for_ip
2284
e5660ee6
JMF
2285 return res
2286
2287 def _calc_cookies(self, info_dict):
5c2266df 2288 pr = sanitized_Request(info_dict['url'])
e5660ee6 2289 self.cookiejar.add_cookie_header(pr)
662435f7 2290 return pr.get_header('Cookie')
e5660ee6 2291
9f1a1c36 2292 def _sort_thumbnails(self, thumbnails):
2293 thumbnails.sort(key=lambda t: (
2294 t.get('preference') if t.get('preference') is not None else -1,
2295 t.get('width') if t.get('width') is not None else -1,
2296 t.get('height') if t.get('height') is not None else -1,
2297 t.get('id') if t.get('id') is not None else '',
2298 t.get('url')))
2299
b0249bca 2300 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2301 thumbnails = info_dict.get('thumbnails')
2302 if thumbnails is None:
2303 thumbnail = info_dict.get('thumbnail')
2304 if thumbnail:
2305 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2306 if not thumbnails:
2307 return
2308
2309 def check_thumbnails(thumbnails):
2310 for t in thumbnails:
2311 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2312 try:
2313 self.urlopen(HEADRequest(t['url']))
2314 except network_exceptions as err:
2315 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2316 continue
2317 yield t
2318
2319 self._sort_thumbnails(thumbnails)
2320 for i, t in enumerate(thumbnails):
2321 if t.get('id') is None:
2322 t['id'] = '%d' % i
2323 if t.get('width') and t.get('height'):
2324 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2325 t['url'] = sanitize_url(t['url'])
2326
2327 if self.params.get('check_formats') is True:
282f5709 2328 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2329 else:
2330 info_dict['thumbnails'] = thumbnails
bc516a3f 2331
03f83004
LNO
2332 def _fill_common_fields(self, info_dict, is_video=True):
2333 # TODO: move sanitization here
2334 if is_video:
2335 # playlists are allowed to lack "title"
2336 info_dict['fulltitle'] = info_dict.get('title')
2337 if 'title' not in info_dict:
2338 raise ExtractorError('Missing "title" field in extractor result',
2339 video_id=info_dict['id'], ie=info_dict['extractor'])
2340 elif not info_dict.get('title'):
2341 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2342 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2343
2344 if info_dict.get('duration') is not None:
2345 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2346
2347 for ts_key, date_key in (
2348 ('timestamp', 'upload_date'),
2349 ('release_timestamp', 'release_date'),
2350 ('modified_timestamp', 'modified_date'),
2351 ):
2352 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2353 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2354 # see http://bugs.python.org/issue1646728)
2355 try:
2356 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2357 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2358 except (ValueError, OverflowError, OSError):
2359 pass
2360
2361 live_keys = ('is_live', 'was_live')
2362 live_status = info_dict.get('live_status')
2363 if live_status is None:
2364 for key in live_keys:
2365 if info_dict.get(key) is False:
2366 continue
2367 if info_dict.get(key):
2368 live_status = key
2369 break
2370 if all(info_dict.get(key) is False for key in live_keys):
2371 live_status = 'not_live'
2372 if live_status:
2373 info_dict['live_status'] = live_status
2374 for key in live_keys:
2375 if info_dict.get(key) is None:
2376 info_dict[key] = (live_status == key)
2377
2378 # Auto generate title fields corresponding to the *_number fields when missing
2379 # in order to always have clean titles. This is very common for TV series.
2380 for field in ('chapter', 'season', 'episode'):
2381 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2382 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2383
dd82ffea
JMF
2384 def process_video_result(self, info_dict, download=True):
2385 assert info_dict.get('_type', 'video') == 'video'
9c906919 2386 self._num_videos += 1
dd82ffea 2387
bec1fad2 2388 if 'id' not in info_dict:
fc08bdd6 2389 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2390 elif not info_dict.get('id'):
2391 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2392
c9969434
S
2393 def report_force_conversion(field, field_not, conversion):
2394 self.report_warning(
2395 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2396 % (field, field_not, conversion))
2397
2398 def sanitize_string_field(info, string_field):
2399 field = info.get(string_field)
2400 if field is None or isinstance(field, compat_str):
2401 return
2402 report_force_conversion(string_field, 'a string', 'string')
2403 info[string_field] = compat_str(field)
2404
2405 def sanitize_numeric_fields(info):
2406 for numeric_field in self._NUMERIC_FIELDS:
2407 field = info.get(numeric_field)
2408 if field is None or isinstance(field, compat_numeric_types):
2409 continue
2410 report_force_conversion(numeric_field, 'numeric', 'int')
2411 info[numeric_field] = int_or_none(field)
2412
2413 sanitize_string_field(info_dict, 'id')
2414 sanitize_numeric_fields(info_dict)
4c3f8c3f 2415 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2416 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2417
dd82ffea
JMF
2418 if 'playlist' not in info_dict:
2419 # It isn't part of a playlist
2420 info_dict['playlist'] = None
2421 info_dict['playlist_index'] = None
2422
bc516a3f 2423 self._sanitize_thumbnails(info_dict)
d5519808 2424
536a55da 2425 thumbnail = info_dict.get('thumbnail')
bc516a3f 2426 thumbnails = info_dict.get('thumbnails')
536a55da
S
2427 if thumbnail:
2428 info_dict['thumbnail'] = sanitize_url(thumbnail)
2429 elif thumbnails:
d5519808
PH
2430 info_dict['thumbnail'] = thumbnails[-1]['url']
2431
ae30b840 2432 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2433 info_dict['display_id'] = info_dict['id']
2434
03f83004 2435 self._fill_common_fields(info_dict)
33d2fc2f 2436
05108a49
S
2437 for cc_kind in ('subtitles', 'automatic_captions'):
2438 cc = info_dict.get(cc_kind)
2439 if cc:
2440 for _, subtitle in cc.items():
2441 for subtitle_format in subtitle:
2442 if subtitle_format.get('url'):
2443 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2444 if subtitle_format.get('ext') is None:
2445 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2446
2447 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2448 subtitles = info_dict.get('subtitles')
4bba3716 2449
360e1ca5 2450 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2451 info_dict['id'], subtitles, automatic_captions)
a504ced0 2452
dd82ffea
JMF
2453 if info_dict.get('formats') is None:
2454 # There's only one format available
2455 formats = [info_dict]
2456 else:
2457 formats = info_dict['formats']
2458
e0493e90 2459 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
88acdbc2 2460 if not self.params.get('allow_unplayable_formats'):
2461 formats = [f for f in formats if not f.get('has_drm')]
c0b6e5c7 2462 if info_dict['__has_drm'] and all(
2463 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2464 self.report_warning(
2465 'This video is DRM protected and only images are available for download. '
2466 'Use --list-formats to see them')
88acdbc2 2467
319b6059 2468 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2469 if not get_from_start:
2470 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2471 if info_dict.get('is_live') and formats:
adbc4ec4 2472 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2473 if get_from_start and not formats:
2474 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2475 'If you want to download from the current time, pass --no-live-from-start')
adbc4ec4 2476
db95dc13 2477 if not formats:
1151c407 2478 self.raise_no_formats(info_dict)
db95dc13 2479
73af5cc8
S
2480 def is_wellformed(f):
2481 url = f.get('url')
a5ac0c47 2482 if not url:
73af5cc8
S
2483 self.report_warning(
2484 '"url" field is missing or empty - skipping format, '
2485 'there is an error in extractor')
a5ac0c47
S
2486 return False
2487 if isinstance(url, bytes):
2488 sanitize_string_field(f, 'url')
2489 return True
73af5cc8
S
2490
2491 # Filter out malformed formats for better extraction robustness
2492 formats = list(filter(is_wellformed, formats))
2493
181c7053
S
2494 formats_dict = {}
2495
dd82ffea 2496 # We check that all the formats have the format and format_id fields
db95dc13 2497 for i, format in enumerate(formats):
c9969434
S
2498 sanitize_string_field(format, 'format_id')
2499 sanitize_numeric_fields(format)
dcf77cf1 2500 format['url'] = sanitize_url(format['url'])
e74e3b63 2501 if not format.get('format_id'):
8016c922 2502 format['format_id'] = compat_str(i)
e2effb08
S
2503 else:
2504 # Sanitize format_id from characters used in format selector expression
ec85ded8 2505 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2506 format_id = format['format_id']
2507 if format_id not in formats_dict:
2508 formats_dict[format_id] = []
2509 formats_dict[format_id].append(format)
2510
2511 # Make sure all formats have unique format_id
03b4de72 2512 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2513 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2514 ambigious_id = len(ambiguous_formats) > 1
2515 for i, format in enumerate(ambiguous_formats):
2516 if ambigious_id:
181c7053 2517 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2518 if format.get('ext') is None:
2519 format['ext'] = determine_ext(format['url']).lower()
2520 # Ensure there is no conflict between id and ext in format selection
2521 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2522 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2523 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2524
2525 for i, format in enumerate(formats):
8c51aa65 2526 if format.get('format') is None:
6febd1c1 2527 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2528 id=format['format_id'],
2529 res=self.format_resolution(format),
b868936c 2530 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2531 )
6f0be937 2532 if format.get('protocol') is None:
b5559424 2533 format['protocol'] = determine_protocol(format)
239df021 2534 if format.get('resolution') is None:
2535 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2536 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2537 format['dynamic_range'] = 'SDR'
f2fe69c7 2538 if (info_dict.get('duration') and format.get('tbr')
2539 and not format.get('filesize') and not format.get('filesize_approx')):
2540 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2541
e5660ee6
JMF
2542 # Add HTTP headers, so that external programs can use them from the
2543 # json output
2544 full_format_info = info_dict.copy()
2545 full_format_info.update(format)
2546 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2547 # Remove private housekeeping stuff
2548 if '__x_forwarded_for_ip' in info_dict:
2549 del info_dict['__x_forwarded_for_ip']
dd82ffea 2550
9f1a1c36 2551 if self.params.get('check_formats') is True:
282f5709 2552 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2553
88acdbc2 2554 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2555 # only set the 'formats' fields if the original info_dict list them
2556 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2557 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2558 # which can't be exported to json
b3d9ef88 2559 info_dict['formats'] = formats
4ec82a72 2560
2561 info_dict, _ = self.pre_process(info_dict)
2562
6db9c4d5 2563 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2564 return info_dict
2565
2566 self.post_extract(info_dict)
2567 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2568
093a1710 2569 # The pre-processors may have modified the formats
2570 formats = info_dict.get('formats', [info_dict])
2571
fa9f30b8 2572 list_only = self.params.get('simulate') is None and (
2573 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2574 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2575 if self.params.get('list_thumbnails'):
2576 self.list_thumbnails(info_dict)
b7b04c78 2577 if self.params.get('listsubtitles'):
2578 if 'automatic_captions' in info_dict:
2579 self.list_subtitles(
2580 info_dict['id'], automatic_captions, 'automatic captions')
2581 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2582 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2583 self.list_formats(info_dict)
169dbde9 2584 if list_only:
b7b04c78 2585 # Without this printing, -F --print-json will not work
169dbde9 2586 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2587 return
2588
187986a8 2589 format_selector = self.format_selector
2590 if format_selector is None:
0017d9ad 2591 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2592 self.write_debug('Default format spec: %s' % req_format)
187986a8 2593 format_selector = self.build_format_selector(req_format)
317f7ab6 2594
fa9f30b8 2595 while True:
2596 if interactive_format_selection:
2597 req_format = input(
2598 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2599 try:
2600 format_selector = self.build_format_selector(req_format)
2601 except SyntaxError as err:
2602 self.report_error(err, tb=False, is_error=False)
2603 continue
2604
2605 # While in format selection we may need to have an access to the original
2606 # format set in order to calculate some metrics or do some processing.
2607 # For now we need to be able to guess whether original formats provided
2608 # by extractor are incomplete or not (i.e. whether extractor provides only
2609 # video-only or audio-only formats) for proper formats selection for
2610 # extractors with such incomplete formats (see
2611 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2612 # Since formats may be filtered during format selection and may not match
2613 # the original formats the results may be incorrect. Thus original formats
2614 # or pre-calculated metrics should be passed to format selection routines
2615 # as well.
2616 # We will pass a context object containing all necessary additional data
2617 # instead of just formats.
2618 # This fixes incorrect format selection issue (see
2619 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2620 incomplete_formats = (
2621 # All formats are video-only or
2622 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2623 # all formats are audio-only
2624 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2625
2626 ctx = {
2627 'formats': formats,
2628 'incomplete_formats': incomplete_formats,
2629 }
2630
2631 formats_to_download = list(format_selector(ctx))
2632 if interactive_format_selection and not formats_to_download:
2633 self.report_error('Requested format is not available', tb=False, is_error=False)
2634 continue
2635 break
317f7ab6 2636
dd82ffea 2637 if not formats_to_download:
b7da73eb 2638 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2639 raise ExtractorError(
2640 'Requested format is not available. Use --list-formats for a list of available formats',
2641 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2642 self.report_warning('Requested format is not available')
2643 # Process what we can, even without any available formats.
2644 formats_to_download = [{}]
a13e6848 2645
b62fa6d7 2646 best_format = formats_to_download[-1]
2647 if download:
2648 if best_format:
2649 self.to_screen(
2650 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2651 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2652 max_downloads_reached = False
f46e2f9d 2653 for i, fmt in enumerate(formats_to_download):
09b49e1f 2654 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2655 new_info.update(fmt)
a13e6848 2656 try:
2657 self.process_info(new_info)
2658 except MaxDownloadsReached:
2659 max_downloads_reached = True
f46e2f9d 2660 # Remove copied info
2661 for key, val in tuple(new_info.items()):
2662 if info_dict.get(key) == val:
2663 new_info.pop(key)
a13e6848 2664 if max_downloads_reached:
2665 break
ebed8b37 2666
9e907ebd 2667 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
a13e6848 2668 assert write_archive.issubset({True, False, 'ignore'})
2669 if True in write_archive and False not in write_archive:
2670 self.record_download_archive(info_dict)
be72c624 2671
2672 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2673 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2674 if max_downloads_reached:
2675 raise MaxDownloadsReached()
ebed8b37 2676
49a57e70 2677 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2678 info_dict.update(best_format)
dd82ffea
JMF
2679 return info_dict
2680
98c70d6f 2681 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2682 """Select the requested subtitles and their format"""
98c70d6f
JMF
2683 available_subs = {}
2684 if normal_subtitles and self.params.get('writesubtitles'):
2685 available_subs.update(normal_subtitles)
2686 if automatic_captions and self.params.get('writeautomaticsub'):
2687 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2688 if lang not in available_subs:
2689 available_subs[lang] = cap_info
2690
4d171848
JMF
2691 if (not self.params.get('writesubtitles') and not
2692 self.params.get('writeautomaticsub') or not
2693 available_subs):
2694 return None
a504ced0 2695
c32b0aab 2696 all_sub_langs = available_subs.keys()
a504ced0 2697 if self.params.get('allsubtitles', False):
c32b0aab 2698 requested_langs = all_sub_langs
2699 elif self.params.get('subtitleslangs', False):
77c4a9ef 2700 # A list is used so that the order of languages will be the same as
2701 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2702 requested_langs = []
2703 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2704 discard = lang_re[0] == '-'
c32b0aab 2705 if discard:
77c4a9ef 2706 lang_re = lang_re[1:]
3aa91540 2707 if lang_re == 'all':
2708 if discard:
2709 requested_langs = []
2710 else:
2711 requested_langs.extend(all_sub_langs)
2712 continue
77c4a9ef 2713 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2714 if discard:
2715 for lang in current_langs:
77c4a9ef 2716 while lang in requested_langs:
2717 requested_langs.remove(lang)
c32b0aab 2718 else:
77c4a9ef 2719 requested_langs.extend(current_langs)
2720 requested_langs = orderedSet(requested_langs)
c32b0aab 2721 elif 'en' in available_subs:
2722 requested_langs = ['en']
a504ced0 2723 else:
c32b0aab 2724 requested_langs = [list(all_sub_langs)[0]]
ad3dc496 2725 if requested_langs:
2726 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2727
2728 formats_query = self.params.get('subtitlesformat', 'best')
2729 formats_preference = formats_query.split('/') if formats_query else []
2730 subs = {}
2731 for lang in requested_langs:
2732 formats = available_subs.get(lang)
2733 if formats is None:
2734 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2735 continue
a504ced0
JMF
2736 for ext in formats_preference:
2737 if ext == 'best':
2738 f = formats[-1]
2739 break
2740 matches = list(filter(lambda f: f['ext'] == ext, formats))
2741 if matches:
2742 f = matches[-1]
2743 break
2744 else:
2745 f = formats[-1]
2746 self.report_warning(
2747 'No subtitle format found matching "%s" for language %s, '
2748 'using %s' % (formats_query, lang, f['ext']))
2749 subs[lang] = f
2750 return subs
2751
bb66c247 2752 def _forceprint(self, key, info_dict):
2753 if info_dict is None:
2754 return
2755 info_copy = info_dict.copy()
2756 info_copy['formats_table'] = self.render_formats_table(info_dict)
2757 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2758 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2759 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2760
2761 def format_tmpl(tmpl):
2762 mobj = re.match(r'\w+(=?)$', tmpl)
2763 if mobj and mobj.group(1):
2764 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2765 elif mobj:
2766 return f'%({tmpl})s'
2767 return tmpl
8130779d 2768
bb66c247 2769 for tmpl in self.params['forceprint'].get(key, []):
2770 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2771
2772 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2773 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2774 tmpl = format_tmpl(tmpl)
2775 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2776 if self._ensure_dir_exists(filename):
2777 with io.open(filename, 'a', encoding='utf-8') as f:
2778 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2779
d06daf23 2780 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2781 def print_mandatory(field, actual_field=None):
2782 if actual_field is None:
2783 actual_field = field
d06daf23 2784 if (self.params.get('force%s' % field, False)
53c18592 2785 and (not incomplete or info_dict.get(actual_field) is not None)):
2786 self.to_stdout(info_dict[actual_field])
d06daf23
S
2787
2788 def print_optional(field):
2789 if (self.params.get('force%s' % field, False)
2790 and info_dict.get(field) is not None):
2791 self.to_stdout(info_dict[field])
2792
53c18592 2793 info_dict = info_dict.copy()
2794 if filename is not None:
2795 info_dict['filename'] = filename
2796 if info_dict.get('requested_formats') is not None:
2797 # For RTMP URLs, also include the playpath
2798 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2799 elif info_dict.get('url'):
53c18592 2800 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2801
bb66c247 2802 if (self.params.get('forcejson')
2803 or self.params['forceprint'].get('video')
2804 or self.params['print_to_file'].get('video')):
2b8a2973 2805 self.post_extract(info_dict)
bb66c247 2806 self._forceprint('video', info_dict)
53c18592 2807
d06daf23
S
2808 print_mandatory('title')
2809 print_mandatory('id')
53c18592 2810 print_mandatory('url', 'urls')
d06daf23
S
2811 print_optional('thumbnail')
2812 print_optional('description')
53c18592 2813 print_optional('filename')
b868936c 2814 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2815 self.to_stdout(formatSeconds(info_dict['duration']))
2816 print_mandatory('format')
53c18592 2817
2b8a2973 2818 if self.params.get('forcejson'):
6e84b215 2819 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2820
e8e73840 2821 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2822 if not info.get('url'):
1151c407 2823 self.raise_no_formats(info, True)
e8e73840 2824
2825 if test:
2826 verbose = self.params.get('verbose')
2827 params = {
2828 'test': True,
a169858f 2829 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2830 'verbose': verbose,
2831 'noprogress': not verbose,
2832 'nopart': True,
2833 'skip_unavailable_fragments': False,
2834 'keep_fragments': False,
2835 'overwrites': True,
2836 '_no_ytdl_file': True,
2837 }
2838 else:
2839 params = self.params
96fccc10 2840 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2841 if not test:
2842 for ph in self._progress_hooks:
2843 fd.add_progress_hook(ph)
42676437
M
2844 urls = '", "'.join(
2845 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2846 for f in info.get('requested_formats', []) or [info])
18e674b4 2847 self.write_debug('Invoking downloader on "%s"' % urls)
03b4de72 2848
adbc4ec4
THD
2849 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2850 # But it may contain objects that are not deep-copyable
2851 new_info = self._copy_infodict(info)
e8e73840 2852 if new_info.get('http_headers') is None:
2853 new_info['http_headers'] = self._calc_headers(new_info)
2854 return fd.download(name, new_info, subtitle)
2855
e04938ab 2856 def existing_file(self, filepaths, *, default_overwrite=True):
2857 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2858 if existing_files and not self.params.get('overwrites', default_overwrite):
2859 return existing_files[0]
2860
2861 for file in existing_files:
2862 self.report_file_delete(file)
2863 os.remove(file)
2864 return None
2865
8222d8de 2866 def process_info(self, info_dict):
09b49e1f 2867 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2868
2869 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2870 original_infodict = info_dict
fd288278 2871
4513a41a 2872 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2873 info_dict['format'] = info_dict['ext']
2874
09b49e1f 2875 # This is mostly just for backward compatibility of process_info
2876 # As a side-effect, this allows for format-specific filters
c77495e3 2877 if self._match_entry(info_dict) is not None:
9e907ebd 2878 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2879 return
2880
09b49e1f 2881 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2882 self.post_extract(info_dict)
0c14d66a 2883 self._num_downloads += 1
8222d8de 2884
dcf64d43 2885 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2886 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2887 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2888 files_to_move = {}
8222d8de
JMF
2889
2890 # Forced printings
4513a41a 2891 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2892
b7b04c78 2893 if self.params.get('simulate'):
9e907ebd 2894 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
8222d8de
JMF
2895 return
2896
de6000d9 2897 if full_filename is None:
8222d8de 2898 return
e92caff5 2899 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2900 return
e92caff5 2901 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2902 return
2903
80c03fa9 2904 if self._write_description('video', info_dict,
2905 self.prepare_filename(info_dict, 'description')) is None:
2906 return
2907
2908 sub_files = self._write_subtitles(info_dict, temp_filename)
2909 if sub_files is None:
2910 return
2911 files_to_move.update(dict(sub_files))
2912
2913 thumb_files = self._write_thumbnails(
2914 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2915 if thumb_files is None:
2916 return
2917 files_to_move.update(dict(thumb_files))
8222d8de 2918
80c03fa9 2919 infofn = self.prepare_filename(info_dict, 'infojson')
2920 _infojson_written = self._write_info_json('video', info_dict, infofn)
2921 if _infojson_written:
dac5df5a 2922 info_dict['infojson_filename'] = infofn
e75bb0d6 2923 # For backward compatibility, even though it was a private field
80c03fa9 2924 info_dict['__infojson_filename'] = infofn
2925 elif _infojson_written is None:
2926 return
2927
2928 # Note: Annotations are deprecated
2929 annofn = None
1fb07d10 2930 if self.params.get('writeannotations', False):
de6000d9 2931 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2932 if annofn:
e92caff5 2933 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2934 return
0c3d0f51 2935 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2936 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2937 elif not info_dict.get('annotations'):
2938 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2939 else:
2940 try:
6febd1c1 2941 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
2942 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2943 annofile.write(info_dict['annotations'])
2944 except (KeyError, TypeError):
6febd1c1 2945 self.report_warning('There are no annotations to write.')
7b6fefc9 2946 except (OSError, IOError):
6febd1c1 2947 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2948 return
1fb07d10 2949
732044af 2950 # Write internet shortcut files
08438d2c 2951 def _write_link_file(link_type):
60f3e995 2952 url = try_get(info_dict['webpage_url'], iri_to_uri)
2953 if not url:
2954 self.report_warning(
2955 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2956 return True
08438d2c 2957 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2958 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2959 return False
10e3742e 2960 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2961 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2962 return True
2963 try:
2964 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2965 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2966 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2967 template_vars = {'url': url}
08438d2c 2968 if link_type == 'desktop':
2969 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2970 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2971 except (OSError, IOError):
2972 self.report_error(f'Cannot write internet shortcut {linkfn}')
2973 return False
732044af 2974 return True
2975
08438d2c 2976 write_links = {
2977 'url': self.params.get('writeurllink'),
2978 'webloc': self.params.get('writewebloclink'),
2979 'desktop': self.params.get('writedesktoplink'),
2980 }
2981 if self.params.get('writelink'):
2982 link_type = ('webloc' if sys.platform == 'darwin'
2983 else 'desktop' if sys.platform.startswith('linux')
2984 else 'url')
2985 write_links[link_type] = True
2986
2987 if any(should_write and not _write_link_file(link_type)
2988 for link_type, should_write in write_links.items()):
2989 return
732044af 2990
f46e2f9d 2991 def replace_info_dict(new_info):
2992 nonlocal info_dict
2993 if new_info == info_dict:
2994 return
2995 info_dict.clear()
2996 info_dict.update(new_info)
2997
56d868db 2998 try:
f46e2f9d 2999 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3000 replace_info_dict(new_info)
56d868db 3001 except PostProcessingError as err:
3002 self.report_error('Preprocessing: %s' % str(err))
3003 return
3004
a13e6848 3005 if self.params.get('skip_download'):
56d868db 3006 info_dict['filepath'] = temp_filename
3007 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3008 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3009 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3010 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3011 else:
3012 # Download
b868936c 3013 info_dict.setdefault('__postprocessors', [])
4340deca 3014 try:
0202b52a 3015
e04938ab 3016 def existing_video_file(*filepaths):
6b591b29 3017 ext = info_dict.get('ext')
e04938ab 3018 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3019 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3020 default_overwrite=False)
3021 if file:
3022 info_dict['ext'] = os.path.splitext(file)[1][1:]
3023 return file
0202b52a 3024
3025 success = True
4340deca 3026 if info_dict.get('requested_formats') is not None:
81cd954a
S
3027
3028 def compatible_formats(formats):
d03cfdce 3029 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3030 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3031 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3032 if len(video_formats) > 2 or len(audio_formats) > 2:
3033 return False
3034
81cd954a 3035 # Check extension
d03cfdce 3036 exts = set(format.get('ext') for format in formats)
3037 COMPATIBLE_EXTS = (
3038 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3039 set(('webm',)),
3040 )
3041 for ext_sets in COMPATIBLE_EXTS:
3042 if ext_sets.issuperset(exts):
3043 return True
81cd954a
S
3044 # TODO: Check acodec/vcodec
3045 return False
3046
3047 requested_formats = info_dict['requested_formats']
0202b52a 3048 old_ext = info_dict['ext']
4e3b637d 3049 if self.params.get('merge_output_format') is None:
3050 if not compatible_formats(requested_formats):
3051 info_dict['ext'] = 'mkv'
3052 self.report_warning(
3053 'Requested formats are incompatible for merge and will be merged into mkv')
3054 if (info_dict['ext'] == 'webm'
3055 and info_dict.get('thumbnails')
3056 # check with type instead of pp_key, __name__, or isinstance
3057 # since we dont want any custom PPs to trigger this
3058 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3059 info_dict['ext'] = 'mkv'
3060 self.report_warning(
3061 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3062 new_ext = info_dict['ext']
0202b52a 3063
124bc071 3064 def correct_ext(filename, ext=new_ext):
96fccc10 3065 if filename == '-':
3066 return filename
0202b52a 3067 filename_real_ext = os.path.splitext(filename)[1][1:]
3068 filename_wo_ext = (
3069 os.path.splitext(filename)[0]
124bc071 3070 if filename_real_ext in (old_ext, new_ext)
0202b52a 3071 else filename)
124bc071 3072 return '%s.%s' % (filename_wo_ext, ext)
0202b52a 3073
38c6902b 3074 # Ensure filename always has a correct extension for successful merge
0202b52a 3075 full_filename = correct_ext(full_filename)
3076 temp_filename = correct_ext(temp_filename)
e04938ab 3077 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3078 info_dict['__real_download'] = False
18e674b4 3079
adbc4ec4
THD
3080 downloaded = []
3081 merger = FFmpegMergerPP(self)
3082
3083 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3084 if dl_filename is not None:
6c7274ec 3085 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3086 elif fd:
3087 for f in requested_formats if fd != FFmpegFD else []:
3088 f['filepath'] = fname = prepend_extension(
3089 correct_ext(temp_filename, info_dict['ext']),
3090 'f%s' % f['format_id'], info_dict['ext'])
3091 downloaded.append(fname)
dbf5416a 3092 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3093 success, real_download = self.dl(temp_filename, info_dict)
3094 info_dict['__real_download'] = real_download
18e674b4 3095 else:
18e674b4 3096 if self.params.get('allow_unplayable_formats'):
3097 self.report_warning(
3098 'You have requested merging of multiple formats '
3099 'while also allowing unplayable formats to be downloaded. '
3100 'The formats won\'t be merged to prevent data corruption.')
3101 elif not merger.available:
e8969bda 3102 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3103 if not self.params.get('ignoreerrors'):
3104 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3105 return
3106 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3107
96fccc10 3108 if temp_filename == '-':
adbc4ec4 3109 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3110 else 'but the formats are incompatible for simultaneous download' if merger.available
3111 else 'but ffmpeg is not installed')
3112 self.report_warning(
3113 f'You have requested downloading multiple formats to stdout {reason}. '
3114 'The formats will be streamed one after the other')
3115 fname = temp_filename
dbf5416a 3116 for f in requested_formats:
3117 new_info = dict(info_dict)
3118 del new_info['requested_formats']
3119 new_info.update(f)
96fccc10 3120 if temp_filename != '-':
124bc071 3121 fname = prepend_extension(
3122 correct_ext(temp_filename, new_info['ext']),
3123 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3124 if not self._ensure_dir_exists(fname):
3125 return
a21e0ab1 3126 f['filepath'] = fname
96fccc10 3127 downloaded.append(fname)
dbf5416a 3128 partial_success, real_download = self.dl(fname, new_info)
3129 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3130 success = success and partial_success
adbc4ec4
THD
3131
3132 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3133 info_dict['__postprocessors'].append(merger)
3134 info_dict['__files_to_merge'] = downloaded
3135 # Even if there were no downloads, it is being merged only now
3136 info_dict['__real_download'] = True
3137 else:
3138 for file in downloaded:
3139 files_to_move[file] = None
4340deca
P
3140 else:
3141 # Just a single file
e04938ab 3142 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3143 if dl_filename is None or dl_filename == temp_filename:
3144 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3145 # So we should try to resume the download
e8e73840 3146 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3147 info_dict['__real_download'] = real_download
6c7274ec 3148 else:
3149 self.report_file_already_downloaded(dl_filename)
0202b52a 3150
0202b52a 3151 dl_filename = dl_filename or temp_filename
c571435f 3152 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3153
3158150c 3154 except network_exceptions as err:
7960b056 3155 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca
P
3156 return
3157 except (OSError, IOError) as err:
3158 raise UnavailableVideoError(err)
3159 except (ContentTooShortError, ) as err:
3160 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3161 return
8222d8de 3162
de6000d9 3163 if success and full_filename != '-':
f17f8651 3164
fd7cfb64 3165 def fixup():
3166 do_fixup = True
3167 fixup_policy = self.params.get('fixup')
3168 vid = info_dict['id']
3169
3170 if fixup_policy in ('ignore', 'never'):
3171 return
3172 elif fixup_policy == 'warn':
3173 do_fixup = False
f89b3e2d 3174 elif fixup_policy != 'force':
3175 assert fixup_policy in ('detect_or_warn', None)
3176 if not info_dict.get('__real_download'):
3177 do_fixup = False
fd7cfb64 3178
3179 def ffmpeg_fixup(cndn, msg, cls):
3180 if not cndn:
3181 return
3182 if not do_fixup:
3183 self.report_warning(f'{vid}: {msg}')
3184 return
3185 pp = cls(self)
3186 if pp.available:
3187 info_dict['__postprocessors'].append(pp)
3188 else:
3189 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3190
3191 stretched_ratio = info_dict.get('stretched_ratio')
3192 ffmpeg_fixup(
3193 stretched_ratio not in (1, None),
3194 f'Non-uniform pixel ratio {stretched_ratio}',
3195 FFmpegFixupStretchedPP)
3196
3197 ffmpeg_fixup(
3198 (info_dict.get('requested_formats') is None
3199 and info_dict.get('container') == 'm4a_dash'
3200 and info_dict.get('ext') == 'm4a'),
3201 'writing DASH m4a. Only some players support this container',
3202 FFmpegFixupM4aPP)
3203
993191c0 3204 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3205 downloader = downloader.__name__ if downloader else None
adbc4ec4
THD
3206
3207 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3208 ffmpeg_fixup(downloader == 'HlsFD',
3209 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3210 FFmpegFixupM3u8PP)
3211 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3212 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3213
e04b003e 3214 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3215 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3216
3217 fixup()
8222d8de 3218 try:
f46e2f9d 3219 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3220 except PostProcessingError as err:
3221 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3222 return
ab8e5e51
AM
3223 try:
3224 for ph in self._post_hooks:
23c1a667 3225 ph(info_dict['filepath'])
ab8e5e51
AM
3226 except Exception as err:
3227 self.report_error('post hooks: %s' % str(err))
3228 return
9e907ebd 3229 info_dict['__write_download_archive'] = True
2d30509f 3230
a13e6848 3231 if self.params.get('force_write_download_archive'):
9e907ebd 3232 info_dict['__write_download_archive'] = True
a13e6848 3233
3234 # Make sure the info_dict was modified in-place
f46e2f9d 3235 assert info_dict is original_infodict
a13e6848 3236
c3e6ffba 3237 max_downloads = self.params.get('max_downloads')
3238 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3239 raise MaxDownloadsReached()
8222d8de 3240
aa9369a2 3241 def __download_wrapper(self, func):
3242 @functools.wraps(func)
3243 def wrapper(*args, **kwargs):
3244 try:
3245 res = func(*args, **kwargs)
3246 except UnavailableVideoError as e:
3247 self.report_error(e)
b222c271 3248 except MaxDownloadsReached as e:
aa9369a2 3249 self.to_screen(f'[info] {e}')
3250 raise
b222c271 3251 except DownloadCancelled as e:
3252 self.to_screen(f'[info] {e}')
3253 if not self.params.get('break_per_url'):
3254 raise
aa9369a2 3255 else:
3256 if self.params.get('dump_single_json', False):
3257 self.post_extract(res)
3258 self.to_stdout(json.dumps(self.sanitize_info(res)))
3259 return wrapper
3260
8222d8de
JMF
3261 def download(self, url_list):
3262 """Download a given list of URLs."""
aa9369a2 3263 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3264 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3265 if (len(url_list) > 1
3266 and outtmpl != '-'
3267 and '%' not in outtmpl
3268 and self.params.get('max_downloads') != 1):
acd69589 3269 raise SameFileError(outtmpl)
8222d8de
JMF
3270
3271 for url in url_list:
aa9369a2 3272 self.__download_wrapper(self.extract_info)(
3273 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3274
3275 return self._download_retcode
3276
1dcc4c0c 3277 def download_with_info_file(self, info_filename):
31bd3925
JMF
3278 with contextlib.closing(fileinput.FileInput(
3279 [info_filename], mode='r',
3280 openhook=fileinput.hook_encoded('utf-8'))) as f:
3281 # FileInput doesn't have a read method, we can't call json.load
8012d892 3282 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3283 try:
aa9369a2 3284 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3285 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3286 if not isinstance(e, EntryNotInPlaylist):
3287 self.to_stderr('\r')
d4943898
JMF
3288 webpage_url = info.get('webpage_url')
3289 if webpage_url is not None:
aa9369a2 3290 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3291 return self.download([webpage_url])
3292 else:
3293 raise
3294 return self._download_retcode
1dcc4c0c 3295
cb202fd2 3296 @staticmethod
8012d892 3297 def sanitize_info(info_dict, remove_private_keys=False):
3298 ''' Sanitize the infodict for converting to json '''
3ad56b42 3299 if info_dict is None:
3300 return info_dict
6e84b215 3301 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3302 info_dict.setdefault('_type', 'video')
09b49e1f 3303
8012d892 3304 if remove_private_keys:
09b49e1f 3305 reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
f46e2f9d 3306 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3307 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3308 }
ae8f99e6 3309 else:
09b49e1f 3310 reject = lambda k, v: False
adbc4ec4
THD
3311
3312 def filter_fn(obj):
3313 if isinstance(obj, dict):
3314 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3315 elif isinstance(obj, (list, tuple, set, LazyList)):
3316 return list(map(filter_fn, obj))
3317 elif obj is None or isinstance(obj, (str, int, float, bool)):
3318 return obj
3319 else:
3320 return repr(obj)
3321
5226731e 3322 return filter_fn(info_dict)
cb202fd2 3323
8012d892 3324 @staticmethod
3325 def filter_requested_info(info_dict, actually_filter=True):
3326 ''' Alias of sanitize_info for backward compatibility '''
3327 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3328
ed5835b4 3329 @staticmethod
3330 def post_extract(info_dict):
3331 def actual_post_extract(info_dict):
3332 if info_dict.get('_type') in ('playlist', 'multi_video'):
3333 for video_dict in info_dict.get('entries', {}):
3334 actual_post_extract(video_dict or {})
3335 return
3336
09b49e1f 3337 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3338 info_dict.update(post_extractor())
ed5835b4 3339
3340 actual_post_extract(info_dict or {})
3341
dcf64d43 3342 def run_pp(self, pp, infodict):
5bfa4862 3343 files_to_delete = []
dcf64d43 3344 if '__files_to_move' not in infodict:
3345 infodict['__files_to_move'] = {}
b1940459 3346 try:
3347 files_to_delete, infodict = pp.run(infodict)
3348 except PostProcessingError as e:
3349 # Must be True and not 'only_download'
3350 if self.params.get('ignoreerrors') is True:
3351 self.report_error(e)
3352 return infodict
3353 raise
3354
5bfa4862 3355 if not files_to_delete:
dcf64d43 3356 return infodict
5bfa4862 3357 if self.params.get('keepvideo', False):
3358 for f in files_to_delete:
dcf64d43 3359 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3360 else:
3361 for old_filename in set(files_to_delete):
3362 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3363 try:
3364 os.remove(encodeFilename(old_filename))
3365 except (IOError, OSError):
3366 self.report_warning('Unable to remove downloaded original file')
dcf64d43 3367 if old_filename in infodict['__files_to_move']:
3368 del infodict['__files_to_move'][old_filename]
3369 return infodict
5bfa4862 3370
ed5835b4 3371 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3372 self._forceprint(key, info)
ed5835b4 3373 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3374 info = self.run_pp(pp, info)
ed5835b4 3375 return info
277d6ff5 3376
56d868db 3377 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3378 info = dict(ie_info)
56d868db 3379 info['__files_to_move'] = files_to_move or {}
ed5835b4 3380 info = self.run_all_pps(key, info)
56d868db 3381 return info, info.pop('__files_to_move', None)
5bfa4862 3382
f46e2f9d 3383 def post_process(self, filename, info, files_to_move=None):
8222d8de 3384 """Run all the postprocessors on the given file."""
8222d8de 3385 info['filepath'] = filename
dcf64d43 3386 info['__files_to_move'] = files_to_move or {}
ed5835b4 3387 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3388 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3389 del info['__files_to_move']
ed5835b4 3390 return self.run_all_pps('after_move', info)
c1c9a79c 3391
5db07df6 3392 def _make_archive_id(self, info_dict):
e9fef7ee
S
3393 video_id = info_dict.get('id')
3394 if not video_id:
3395 return
5db07df6
PH
3396 # Future-proof against any change in case
3397 # and backwards compatibility with prior versions
e9fef7ee 3398 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3399 if extractor is None:
1211bb6d
S
3400 url = str_or_none(info_dict.get('url'))
3401 if not url:
3402 return
e9fef7ee 3403 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3404 for ie_key, ie in self._ies.items():
1211bb6d 3405 if ie.suitable(url):
8b7491c8 3406 extractor = ie_key
e9fef7ee
S
3407 break
3408 else:
3409 return
d0757229 3410 return '%s %s' % (extractor.lower(), video_id)
5db07df6
PH
3411
3412 def in_download_archive(self, info_dict):
3413 fn = self.params.get('download_archive')
3414 if fn is None:
3415 return False
3416
3417 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3418 if not vid_id:
7012b23c 3419 return False # Incomplete video information
5db07df6 3420
a45e8619 3421 return vid_id in self.archive
c1c9a79c
PH
3422
3423 def record_download_archive(self, info_dict):
3424 fn = self.params.get('download_archive')
3425 if fn is None:
3426 return
5db07df6
PH
3427 vid_id = self._make_archive_id(info_dict)
3428 assert vid_id
a13e6848 3429 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3430 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3431 archive_file.write(vid_id + '\n')
a45e8619 3432 self.archive.add(vid_id)
dd82ffea 3433
8c51aa65 3434 @staticmethod
8abeeb94 3435 def format_resolution(format, default='unknown'):
9359f3d4 3436 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3437 return 'audio only'
f49d89ee
PH
3438 if format.get('resolution') is not None:
3439 return format['resolution']
35615307 3440 if format.get('width') and format.get('height'):
ff51ed58 3441 return '%dx%d' % (format['width'], format['height'])
35615307 3442 elif format.get('height'):
ff51ed58 3443 return '%sp' % format['height']
35615307 3444 elif format.get('width'):
ff51ed58 3445 return '%dx?' % format['width']
3446 return default
8c51aa65 3447
8130779d 3448 def _list_format_headers(self, *headers):
3449 if self.params.get('listformats_table', True) is not False:
3450 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3451 return headers
3452
c57f7757
PH
3453 def _format_note(self, fdict):
3454 res = ''
3455 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3456 res += '(unsupported)'
32f90364
PH
3457 if fdict.get('language'):
3458 if res:
3459 res += ' '
f304da8a 3460 res += '[%s]' % fdict['language']
c57f7757 3461 if fdict.get('format_note') is not None:
f304da8a 3462 if res:
3463 res += ' '
3464 res += fdict['format_note']
c57f7757 3465 if fdict.get('tbr') is not None:
f304da8a 3466 if res:
3467 res += ', '
3468 res += '%4dk' % fdict['tbr']
c57f7757
PH
3469 if fdict.get('container') is not None:
3470 if res:
3471 res += ', '
3472 res += '%s container' % fdict['container']
3089bc74
S
3473 if (fdict.get('vcodec') is not None
3474 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3475 if res:
3476 res += ', '
3477 res += fdict['vcodec']
91c7271a 3478 if fdict.get('vbr') is not None:
c57f7757
PH
3479 res += '@'
3480 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3481 res += 'video@'
3482 if fdict.get('vbr') is not None:
3483 res += '%4dk' % fdict['vbr']
fbb21cf5 3484 if fdict.get('fps') is not None:
5d583bdf
S
3485 if res:
3486 res += ', '
3487 res += '%sfps' % fdict['fps']
c57f7757
PH
3488 if fdict.get('acodec') is not None:
3489 if res:
3490 res += ', '
3491 if fdict['acodec'] == 'none':
3492 res += 'video only'
3493 else:
3494 res += '%-5s' % fdict['acodec']
3495 elif fdict.get('abr') is not None:
3496 if res:
3497 res += ', '
3498 res += 'audio'
3499 if fdict.get('abr') is not None:
3500 res += '@%3dk' % fdict['abr']
3501 if fdict.get('asr') is not None:
3502 res += ' (%5dHz)' % fdict['asr']
3503 if fdict.get('filesize') is not None:
3504 if res:
3505 res += ', '
3506 res += format_bytes(fdict['filesize'])
9732d77e
PH
3507 elif fdict.get('filesize_approx') is not None:
3508 if res:
3509 res += ', '
3510 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3511 return res
91c7271a 3512
8130779d 3513 def render_formats_table(self, info_dict):
b69fd25c 3514 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3515 return None
b69fd25c 3516
94badb25 3517 formats = info_dict.get('formats', [info_dict])
8130779d 3518 if not self.params.get('listformats_table', True) is not False:
76d321f6 3519 table = [
3520 [
3521 format_field(f, 'format_id'),
3522 format_field(f, 'ext'),
3523 self.format_resolution(f),
8130779d 3524 self._format_note(f)
3525 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3526 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3527
3528 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3529 table = [
3530 [
3531 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3532 format_field(f, 'ext'),
3533 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3534 format_field(f, 'fps', '\t%d'),
3535 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3536 delim,
3537 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3538 format_field(f, 'tbr', '\t%dk'),
3539 shorten_protocol_name(f.get('protocol', '')),
3540 delim,
3541 format_field(f, 'vcodec', default='unknown').replace(
3542 'none', 'images' if f.get('acodec') == 'none'
3543 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3544 format_field(f, 'vbr', '\t%dk'),
3545 format_field(f, 'acodec', default='unknown').replace(
3546 'none', '' if f.get('vcodec') == 'none'
3547 else self._format_screen('video only', self.Styles.SUPPRESS)),
3548 format_field(f, 'abr', '\t%dk'),
3549 format_field(f, 'asr', '\t%dHz'),
3550 join_nonempty(
3551 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3552 format_field(f, 'language', '[%s]'),
3553 join_nonempty(format_field(f, 'format_note'),
3554 format_field(f, 'container', ignore=(None, f.get('ext'))),
3555 delim=', '),
3556 delim=' '),
3557 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3558 header_line = self._list_format_headers(
3559 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3560 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3561
3562 return render_table(
3563 header_line, table, hide_empty=True,
3564 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3565
3566 def render_thumbnails_table(self, info_dict):
88f23a18 3567 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3568 if not thumbnails:
8130779d 3569 return None
3570 return render_table(
ec11a9f4 3571 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3572 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3573
8130779d 3574 def render_subtitles_table(self, video_id, subtitles):
2412044c 3575 def _row(lang, formats):
49c258e1 3576 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3577 if len(set(names)) == 1:
7aee40c1 3578 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3579 return [lang, ', '.join(names), ', '.join(exts)]
3580
8130779d 3581 if not subtitles:
3582 return None
3583 return render_table(
ec11a9f4 3584 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3585 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3586 hide_empty=True)
3587
3588 def __list_table(self, video_id, name, func, *args):
3589 table = func(*args)
3590 if not table:
3591 self.to_screen(f'{video_id} has no {name}')
3592 return
3593 self.to_screen(f'[info] Available {name} for {video_id}:')
3594 self.to_stdout(table)
3595
3596 def list_formats(self, info_dict):
3597 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3598
3599 def list_thumbnails(self, info_dict):
3600 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3601
3602 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3603 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3604
dca08720
PH
3605 def urlopen(self, req):
3606 """ Start an HTTP download """
82d8a8b6 3607 if isinstance(req, compat_basestring):
67dda517 3608 req = sanitized_Request(req)
19a41fc6 3609 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3610
3611 def print_debug_header(self):
3612 if not self.params.get('verbose'):
3613 return
49a57e70 3614
3615 def get_encoding(stream):
2a938746 3616 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3617 if not supports_terminal_sequences(stream):
e3c7d495 3618 from .compat import WINDOWS_VT_MODE
3619 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3620 return ret
3621
3622 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3623 locale.getpreferredencoding(),
3624 sys.getfilesystemencoding(),
cf4f42cb 3625 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
49a57e70 3626 self.get_encoding())
883d4b1e 3627
3628 logger = self.params.get('logger')
3629 if logger:
3630 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3631 write_debug(encoding_str)
3632 else:
96565c7e 3633 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3634 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3635
4c88ff87 3636 source = detect_variant()
36eaf303 3637 write_debug(join_nonempty(
3638 'yt-dlp version', __version__,
3639 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3640 '' if source == 'unknown' else f'({source})',
3641 delim=' '))
6e21fdd2 3642 if not _LAZY_LOADER:
3643 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3644 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3645 else:
49a57e70 3646 write_debug('Lazy loading extractors is disabled')
3ae5e797 3647 if plugin_extractors or plugin_postprocessors:
49a57e70 3648 write_debug('Plugins: %s' % [
3ae5e797 3649 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3650 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3651 if self.params.get('compat_opts'):
49a57e70 3652 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3653
3654 if source == 'source':
dca08720 3655 try:
36eaf303 3656 sp = Popen(
3657 ['git', 'rev-parse', '--short', 'HEAD'],
3658 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3659 cwd=os.path.dirname(os.path.abspath(__file__)))
3660 out, err = sp.communicate_or_kill()
3661 out = out.decode().strip()
3662 if re.match('[0-9a-f]+', out):
3663 write_debug('Git HEAD: %s' % out)
70a1165b 3664 except Exception:
36eaf303 3665 try:
3666 sys.exc_clear()
3667 except Exception:
3668 pass
b300cda4
S
3669
3670 def python_implementation():
3671 impl_name = platform.python_implementation()
3672 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3673 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3674 return impl_name
3675
49a57e70 3676 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3677 platform.python_version(),
3678 python_implementation(),
3679 platform.architecture()[0],
b300cda4 3680 platform_name()))
d28b5171 3681
8913ef74 3682 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3683 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3684 if ffmpeg_features:
a4211baf 3685 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
8913ef74 3686
4c83c967 3687 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3688 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3689 exe_str = ', '.join(
2831b468 3690 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3691 ) or 'none'
49a57e70 3692 write_debug('exe versions: %s' % exe_str)
dca08720 3693
2831b468 3694 from .downloader.websocket import has_websockets
3695 from .postprocessor.embedthumbnail import has_mutagen
f59f5ef8 3696 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
2831b468 3697
c586f9e8 3698 lib_str = join_nonempty(
4390d5ec 3699 compat_brotli and compat_brotli.__name__,
d5820461 3700 has_certifi and 'certifi',
edf65256 3701 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
f59f5ef8 3702 SECRETSTORAGE_AVAILABLE and 'secretstorage',
2831b468 3703 has_mutagen and 'mutagen',
3704 SQLITE_AVAILABLE and 'sqlite',
c586f9e8 3705 has_websockets and 'websockets',
3706 delim=', ') or 'none'
49a57e70 3707 write_debug('Optional libraries: %s' % lib_str)
2831b468 3708
dca08720
PH
3709 proxy_map = {}
3710 for handler in self._opener.handlers:
3711 if hasattr(handler, 'proxies'):
3712 proxy_map.update(handler.proxies)
49a57e70 3713 write_debug(f'Proxy map: {proxy_map}')
dca08720 3714
49a57e70 3715 # Not implemented
3716 if False and self.params.get('call_home'):
58b1f00d 3717 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
49a57e70 3718 write_debug('Public IP address: %s' % ipaddr)
58b1f00d
PH
3719 latest_version = self.urlopen(
3720 'https://yt-dl.org/latest/version').read().decode('utf-8')
3721 if version_tuple(latest_version) > version_tuple(__version__):
3722 self.report_warning(
3723 'You are using an outdated version (newest version: %s)! '
3724 'See https://yt-dl.org/update if you need help updating.' %
3725 latest_version)
3726
e344693b 3727 def _setup_opener(self):
6ad14cab 3728 timeout_val = self.params.get('socket_timeout')
17bddf3e 3729 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3730
982ee69a 3731 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3732 opts_cookiefile = self.params.get('cookiefile')
3733 opts_proxy = self.params.get('proxy')
3734
982ee69a 3735 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3736
6a3f4c3f 3737 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3738 if opts_proxy is not None:
3739 if opts_proxy == '':
3740 proxies = {}
3741 else:
3742 proxies = {'http': opts_proxy, 'https': opts_proxy}
3743 else:
3744 proxies = compat_urllib_request.getproxies()
067aa17e 3745 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3746 if 'http' in proxies and 'https' not in proxies:
3747 proxies['https'] = proxies['http']
91410c9b 3748 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3749
3750 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3751 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3752 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3753 redirect_handler = YoutubeDLRedirectHandler()
8b172c2e 3754 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
3755
3756 # When passing our own FileHandler instance, build_opener won't add the
3757 # default FileHandler and allows us to disable the file protocol, which
3758 # can be used for malicious purposes (see
067aa17e 3759 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3760 file_handler = compat_urllib_request.FileHandler()
3761
3762 def file_open(*args, **kwargs):
7a5c1cfe 3763 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3764 file_handler.file_open = file_open
3765
3766 opener = compat_urllib_request.build_opener(
fca6dba8 3767 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3768
dca08720
PH
3769 # Delete the default user-agent header, which would otherwise apply in
3770 # cases where our custom HTTP handler doesn't come into play
067aa17e 3771 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3772 opener.addheaders = []
3773 self._opener = opener
62fec3b2
PH
3774
3775 def encode(self, s):
3776 if isinstance(s, bytes):
3777 return s # Already encoded
3778
3779 try:
3780 return s.encode(self.get_encoding())
3781 except UnicodeEncodeError as err:
3782 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3783 raise
3784
3785 def get_encoding(self):
3786 encoding = self.params.get('encoding')
3787 if encoding is None:
3788 encoding = preferredencoding()
3789 return encoding
ec82d85a 3790
e08a85d8 3791 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
80c03fa9 3792 ''' Write infojson and returns True = written, False = skip, None = error '''
e08a85d8 3793 if overwrite is None:
3794 overwrite = self.params.get('overwrites', True)
80c03fa9 3795 if not self.params.get('writeinfojson'):
3796 return False
3797 elif not infofn:
3798 self.write_debug(f'Skipping writing {label} infojson')
3799 return False
3800 elif not self._ensure_dir_exists(infofn):
3801 return None
e08a85d8 3802 elif not overwrite and os.path.exists(infofn):
80c03fa9 3803 self.to_screen(f'[info] {label.title()} metadata is already present')
3804 else:
3805 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3806 try:
3807 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3808 except (OSError, IOError):
3809 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3810 return None
3811 return True
3812
3813 def _write_description(self, label, ie_result, descfn):
3814 ''' Write description and returns True = written, False = skip, None = error '''
3815 if not self.params.get('writedescription'):
3816 return False
3817 elif not descfn:
3818 self.write_debug(f'Skipping writing {label} description')
3819 return False
3820 elif not self._ensure_dir_exists(descfn):
3821 return None
3822 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3823 self.to_screen(f'[info] {label.title()} description is already present')
3824 elif ie_result.get('description') is None:
3825 self.report_warning(f'There\'s no {label} description to write')
3826 return False
3827 else:
3828 try:
3829 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3830 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3831 descfile.write(ie_result['description'])
3832 except (OSError, IOError):
3833 self.report_error(f'Cannot write {label} description file {descfn}')
3834 return None
3835 return True
3836
3837 def _write_subtitles(self, info_dict, filename):
3838 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3839 ret = []
3840 subtitles = info_dict.get('requested_subtitles')
3841 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3842 # subtitles download errors are already managed as troubles in relevant IE
3843 # that way it will silently go on when used with unsupporting IE
3844 return ret
3845
3846 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3847 if not sub_filename_base:
3848 self.to_screen('[info] Skipping writing video subtitles')
3849 return ret
3850 for sub_lang, sub_info in subtitles.items():
3851 sub_format = sub_info['ext']
3852 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3853 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3854 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3855 if existing_sub:
80c03fa9 3856 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3857 sub_info['filepath'] = existing_sub
3858 ret.append((existing_sub, sub_filename_final))
80c03fa9 3859 continue
3860
3861 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3862 if sub_info.get('data') is not None:
3863 try:
3864 # Use newline='' to prevent conversion of newline characters
3865 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3866 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3867 subfile.write(sub_info['data'])
3868 sub_info['filepath'] = sub_filename
3869 ret.append((sub_filename, sub_filename_final))
3870 continue
3871 except (OSError, IOError):
3872 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3873 return None
3874
3875 try:
3876 sub_copy = sub_info.copy()
3877 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3878 self.dl(sub_filename, sub_copy, subtitle=True)
3879 sub_info['filepath'] = sub_filename
3880 ret.append((sub_filename, sub_filename_final))
6020e05d 3881 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3882 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3883 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3884 if not self.params.get('ignoreerrors'):
3885 self.report_error(msg)
3886 raise DownloadError(msg)
3887 self.report_warning(msg)
519804a9 3888 return ret
80c03fa9 3889
3890 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3891 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3892 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3893 thumbnails, ret = [], []
6c4fd172 3894 if write_all or self.params.get('writethumbnail', False):
0202b52a 3895 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3896 multiple = write_all and len(thumbnails) > 1
ec82d85a 3897
80c03fa9 3898 if thumb_filename_base is None:
3899 thumb_filename_base = filename
3900 if thumbnails and not thumb_filename_base:
3901 self.write_debug(f'Skipping writing {label} thumbnail')
3902 return ret
3903
dd0228ce 3904 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3905 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3906 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3907 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3908 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3909
e04938ab 3910 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3911 if existing_thumb:
aa9369a2 3912 self.to_screen('[info] %s is already present' % (
3913 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3914 t['filepath'] = existing_thumb
3915 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3916 else:
80c03fa9 3917 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3918 try:
297e9952 3919 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3920 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3921 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3922 shutil.copyfileobj(uf, thumbf)
80c03fa9 3923 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3924 t['filepath'] = thumb_filename
3158150c 3925 except network_exceptions as err:
dd0228ce 3926 thumbnails.pop(idx)
80c03fa9 3927 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3928 if ret and not write_all:
3929 break
0202b52a 3930 return ret