]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
Fix `--simulate --max-downloads`
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
26e63931 2import collections
31bd3925 3import contextlib
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
b5ae35ee 7import functools
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
dca08720 14import platform
f8271158 15import random
8222d8de
JMF
16import re
17import shutil
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
f9934b96 25import urllib.request
961ea474
S
26from string import ascii_letters
27
f8271158 28from .cache import Cache
8c25f81b 29from .compat import (
003c69a8 30 compat_get_terminal_size,
e9c0cdd3 31 compat_os_name,
7d1eb38a 32 compat_shlex_quote,
ce02ed60
PH
33 compat_str,
34 compat_urllib_error,
35 compat_urllib_request,
8c25f81b 36)
982ee69a 37from .cookies import load_cookies
f8271158 38from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
39from .downloader.rtmp import rtmpdump_version
40from .extractor import _LAZY_LOADER
41from .extractor import _PLUGIN_CLASSES as plugin_extractors
42from .extractor import gen_extractor_classes, get_info_extractor
43from .extractor.openload import PhantomJSwrapper
44from .minicurses import format_text
45from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
46from .postprocessor import (
47 EmbedThumbnailPP,
48 FFmpegFixupDuplicateMoovPP,
49 FFmpegFixupDurationPP,
50 FFmpegFixupM3u8PP,
51 FFmpegFixupM4aPP,
52 FFmpegFixupStretchedPP,
53 FFmpegFixupTimestampPP,
54 FFmpegMergerPP,
55 FFmpegPostProcessor,
56 MoveFilesAfterDownloadPP,
57 get_postprocessor,
58)
59from .update import detect_variant
8c25f81b 60from .utils import (
f8271158 61 DEFAULT_OUTTMPL,
62 LINK_TEMPLATES,
63 NO_DEFAULT,
1d485a1a 64 NUMBER_RE,
f8271158 65 OUTTMPL_TYPES,
66 POSTPROCESS_WHEN,
67 STR_FORMAT_RE_TMPL,
68 STR_FORMAT_TYPES,
69 ContentTooShortError,
70 DateRange,
71 DownloadCancelled,
72 DownloadError,
73 EntryNotInPlaylist,
74 ExistingVideoReached,
75 ExtractorError,
76 GeoRestrictedError,
77 HEADRequest,
78 InAdvancePagedList,
79 ISO3166Utils,
80 LazyList,
81 MaxDownloadsReached,
19a03940 82 Namespace,
f8271158 83 PagedList,
84 PerRequestProxyHandler,
85 Popen,
86 PostProcessingError,
87 ReExtractInfo,
88 RejectedVideoReached,
89 SameFileError,
90 UnavailableVideoError,
91 YoutubeDLCookieProcessor,
92 YoutubeDLHandler,
93 YoutubeDLRedirectHandler,
eedb7ba5
S
94 age_restricted,
95 args_to_str,
ce02ed60 96 date_from_str,
ce02ed60 97 determine_ext,
b5559424 98 determine_protocol,
c0384f22 99 encode_compat_str,
ce02ed60 100 encodeFilename,
a06916d9 101 error_to_compat_str,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
34921b43 112 join_nonempty,
ce02ed60 113 locked_file,
0202b52a 114 make_dir,
dca08720 115 make_HTTPS_handler,
8b7539d2 116 merge_headers,
3158150c 117 network_exceptions,
ec11a9f4 118 number_of_digits,
cd6fc19e 119 orderedSet,
083c9df9 120 parse_filesize,
dca08720 121 platform_name,
ce02ed60 122 preferredencoding,
eedb7ba5 123 prepend_extension,
51fb4995 124 register_socks_protocols,
3efb96a6 125 remove_terminal_sequences,
cfb56d1a 126 render_table,
eedb7ba5 127 replace_extension,
ce02ed60 128 sanitize_filename,
1bb5c511 129 sanitize_path,
dcf77cf1 130 sanitize_url,
67dda517 131 sanitized_Request,
e5660ee6 132 std_headers,
1211bb6d 133 str_or_none,
e29663c6 134 strftime_or_none,
ce02ed60 135 subtitles_filename,
819e0531 136 supports_terminal_sequences,
f2ebc5c7 137 timetuple_from_msec,
732044af 138 to_high_limit_path,
324ad820 139 traverse_obj,
6033d980 140 try_get,
29eb5174 141 url_basename,
7d1eb38a 142 variadic,
58b1f00d 143 version_tuple,
53973b4d 144 windows_enable_vt_mode,
ce02ed60
PH
145 write_json_file,
146 write_string,
4f026faf 147)
f8271158 148from .version import RELEASE_GIT_HEAD, __version__
8222d8de 149
e9c0cdd3
YCH
150if compat_os_name == 'nt':
151 import ctypes
152
2459b6e1 153
86e5f3ed 154class YoutubeDL:
8222d8de
JMF
155 """YoutubeDL class.
156
157 YoutubeDL objects are the ones responsible of downloading the
158 actual video file and writing it to disk if the user has requested
159 it, among some other tasks. In most cases there should be one per
160 program. As, given a video URL, the downloader doesn't know how to
161 extract all the needed information, task that InfoExtractors do, it
162 has to pass the URL to one of them.
163
164 For this, YoutubeDL objects have a method that allows
165 InfoExtractors to be registered in a given order. When it is passed
166 a URL, the YoutubeDL object handles it to the first InfoExtractor it
167 finds that reports being able to handle it. The InfoExtractor extracts
168 all the information about the video or videos the URL refers to, and
169 YoutubeDL process the extracted information, possibly using a File
170 Downloader to download the video.
171
172 YoutubeDL objects accept a lot of parameters. In order not to saturate
173 the object constructor with arguments, it receives a dictionary of
174 options instead. These options are available through the params
175 attribute for the InfoExtractors to use. The YoutubeDL also
176 registers itself as the downloader in charge for the InfoExtractors
177 that are added to it, so this is a "mutual registration".
178
179 Available options:
180
181 username: Username for authentication purposes.
182 password: Password for authentication purposes.
180940e0 183 videopassword: Password for accessing a video.
1da50aa3
S
184 ap_mso: Adobe Pass multiple-system operator identifier.
185 ap_username: Multiple-system operator account username.
186 ap_password: Multiple-system operator account password.
8222d8de
JMF
187 usenetrc: Use netrc for authentication instead.
188 verbose: Print additional info to stdout.
189 quiet: Do not print messages to stdout.
ad8915b7 190 no_warnings: Do not print out anything for warnings.
bb66c247 191 forceprint: A dict with keys WHEN mapped to a list of templates to
192 print to stdout. The allowed keys are video or any of the
193 items in utils.POSTPROCESS_WHEN.
ca30f449 194 For compatibility, a single list is also accepted
bb66c247 195 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
196 a list of tuples with (template, filename)
53c18592 197 forceurl: Force printing final URL. (Deprecated)
198 forcetitle: Force printing title. (Deprecated)
199 forceid: Force printing ID. (Deprecated)
200 forcethumbnail: Force printing thumbnail URL. (Deprecated)
201 forcedescription: Force printing description. (Deprecated)
202 forcefilename: Force printing final filename. (Deprecated)
203 forceduration: Force printing duration. (Deprecated)
8694c600 204 forcejson: Force printing info_dict as JSON.
63e0be34
PH
205 dump_single_json: Force printing the info_dict of the whole playlist
206 (or video) as a single JSON line.
c25228e5 207 force_write_download_archive: Force writing download archive regardless
208 of 'skip_download' or 'simulate'.
b7b04c78 209 simulate: Do not download the video files. If unset (or None),
210 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 211 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 212 You can also pass a function. The function takes 'ctx' as
213 argument and returns the formats to download.
214 See "build_format_selector" for an implementation
63ad4d43 215 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 216 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
217 extracting metadata even if the video is not actually
218 available for download (experimental)
0930b11f 219 format_sort: A list of fields by which to sort the video formats.
220 See "Sorting Formats" for more details.
c25228e5 221 format_sort_force: Force the given format_sort. see "Sorting Formats"
222 for more details.
08d30158 223 prefer_free_formats: Whether to prefer video formats with free containers
224 over non-free ones of same quality.
c25228e5 225 allow_multiple_video_streams: Allow multiple video streams to be merged
226 into a single file
227 allow_multiple_audio_streams: Allow multiple audio streams to be merged
228 into a single file
0ba692ac 229 check_formats Whether to test if the formats are downloadable.
9f1a1c36 230 Can be True (check all), False (check none),
231 'selected' (check selected formats),
0ba692ac 232 or None (check only if requested by extractor)
4524baf0 233 paths: Dictionary of output paths. The allowed keys are 'home'
234 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 235 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 236 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 237 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
238 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
239 restrictfilenames: Do not allow "&" and spaces in file names
240 trim_file_name: Limit length of filename (extension excluded)
4524baf0 241 windowsfilenames: Force the filenames to be windows compatible
b1940459 242 ignoreerrors: Do not stop on download/postprocessing errors.
243 Can be 'only_download' to ignore only download errors.
244 Default is 'only_download' for CLI, but False for API
26e2805c 245 skip_playlist_after_errors: Number of allowed failures until the rest of
246 the playlist is skipped
d22dec74 247 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 248 overwrites: Overwrite all video and metadata files if True,
249 overwrite only non-video files if None
250 and don't overwrite any file if False
34488702 251 For compatibility with youtube-dl,
252 "nooverwrites" may also be used instead
8222d8de
JMF
253 playliststart: Playlist item to start at.
254 playlistend: Playlist item to end at.
c14e88f0 255 playlist_items: Specific indices of playlist to download.
ff815fe6 256 playlistreverse: Download playlist items in reverse order.
75822ca7 257 playlistrandom: Download playlist items in random order.
8222d8de
JMF
258 matchtitle: Download only matching titles.
259 rejecttitle: Reject downloads for matching titles.
8bf9319e 260 logger: Log messages to a logging.Logger instance.
8222d8de 261 logtostderr: Log messages to stderr instead of stdout.
819e0531 262 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
263 writedescription: Write the video description to a .description file
264 writeinfojson: Write the video description to a .info.json file
75d43ca0 265 clean_infojson: Remove private fields from the infojson
34488702 266 getcomments: Extract video comments. This will not be written to disk
06167fbb 267 unless writeinfojson is also given
1fb07d10 268 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 269 writethumbnail: Write the thumbnail image to a file
c25228e5 270 allow_playlist_files: Whether to write playlists' description, infojson etc
271 also to disk when using the 'write*' options
ec82d85a 272 write_all_thumbnails: Write all thumbnail formats to files
732044af 273 writelink: Write an internet shortcut file, depending on the
274 current platform (.url/.webloc/.desktop)
275 writeurllink: Write a Windows internet shortcut file (.url)
276 writewebloclink: Write a macOS internet shortcut file (.webloc)
277 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 278 writesubtitles: Write the video subtitles to a file
741dd8ea 279 writeautomaticsub: Write the automatically generated subtitles to a file
245524e6 280 allsubtitles: Deprecated - Use subtitleslangs = ['all']
c32b0aab 281 Downloads all the subtitles of the video
0b7f3118 282 (requires writesubtitles or writeautomaticsub)
8222d8de 283 listsubtitles: Lists all available subtitles for the video
a504ced0 284 subtitlesformat: The format code for subtitles
c32b0aab 285 subtitleslangs: List of languages of the subtitles to download (can be regex).
286 The list may contain "all" to refer to all the available
287 subtitles. The language can be prefixed with a "-" to
288 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
289 keepvideo: Keep the video file after post-processing
290 daterange: A DateRange object, download only if the upload_date is in the range.
291 skip_download: Skip the actual download of the video file
c35f9e72 292 cachedir: Location of the cache files in the filesystem.
a0e07d31 293 False to disable filesystem cache.
47192f92 294 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
295 age_limit: An integer representing the user's age in years.
296 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
297 min_views: An integer representing the minimum view count the video
298 must have in order to not be skipped.
299 Videos without view count information are always
300 downloaded. None for no limit.
301 max_views: An integer representing the maximum view count.
302 Videos that are more popular than that are not
303 downloaded.
304 Videos without view count information are always
305 downloaded. None for no limit.
306 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
307 Videos already present in the file are not downloaded
308 again.
8a51f564 309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_on_reject: Stop the download process when encountering a video that
312 has been filtered out.
b222c271 313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
d76fa1f3 315 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8
MB
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/pathfrom where cookies are loaded, and the name of the
318 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 319 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
320 support RFC 5746 secure renegotiation
f59f5ef8 321 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 322 client_certificate: Path to client certificate file in PEM format. May include the private key
323 client_certificate_key: Path to private key file for client certificate
324 client_certificate_password: Password for client certificate private key, if encrypted.
325 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0
PH
326 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
327 At the moment, this is only supported by YouTube.
8b7539d2 328 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 329 proxy: URL of the proxy server to use
38cce791 330 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 331 on geo-restricted sites.
e344693b 332 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
a0ddb8a2 335 debug_printtraffic:Print out sent and received HTTP traffic
91f071af 336 include_ads: Download ads as well (deprecated)
04b4d394
PH
337 default_search: Prepend this string if an input url is not valid.
338 'auto' for elaborate guessing
62fec3b2 339 encoding: Use this encoding instead of the system-specified.
e8ee972c 340 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
341 Pass in 'in_playlist' to only show this behavior for
342 playlist items.
f2ebc5c7 343 wait_for_video: If given, wait for scheduled streams to become available.
344 The value should be a tuple containing the range
345 (min_secs, max_secs) to wait between retries
4f026faf 346 postprocessors: A list of dictionaries, each with an entry
71b640cc 347 * key: The name of the postprocessor. See
7a5c1cfe 348 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 349 * when: When to run the postprocessor. Allowed values are
350 the entries of utils.POSTPROCESS_WHEN
56d868db 351 Assumed to be 'post_process' if not given
b5ae35ee 352 post_hooks: Deprecated - Register a custom postprocessor instead
353 A list of functions that get called as the final step
ab8e5e51
AM
354 for each video file, after all postprocessors have been
355 called. The filename will be passed as the only argument.
71b640cc
PH
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
5cda4eda 358 * status: One of "downloading", "error", or "finished".
ee69b99a 359 Check this first and ignore unknown values.
3ba7740d 360 * info_dict: The extracted info_dict
71b640cc 361
5cda4eda 362 If status is one of "downloading", or "finished", the
ee69b99a
PH
363 following properties may also be present:
364 * filename: The final filename (always present)
5cda4eda 365 * tmpfilename: The filename we're currently writing to
71b640cc
PH
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
368 * total_bytes_estimate: Guess of the eventual file size,
369 None if unavailable.
370 * elapsed: The number of seconds since download started.
71b640cc
PH
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
373 unknown
5cda4eda
PH
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
71b640cc
PH
378
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
819e0531 381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
387
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
45598f15 390 merge_output_format: Extension to use when merging formats.
6b591b29 391 final_ext: Expected final extension; used to detect when the file was
59a7a13e 392 already downloaded and converted
6271f1ca
PH
393 fixup: Automatically correct known faults of the file.
394 One of:
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
62cd676c 398 about it, warn otherwise (default)
504f20dd 399 source_address: Client-side IP address to bind to.
6ec6cb4e 400 call_home: Boolean, true iff we are allowed to contact the
7a5c1cfe 401 yt-dlp servers for debugging. (BROKEN)
1cf376f5 402 sleep_interval_requests: Number of seconds to sleep between requests
403 during extraction
7aa589a5
S
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
408 max_sleep_interval.
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
1cf376f5 414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
347de493 425 match_filter_func in utils.py is one example for this.
7e5db8c9 426 no_color: Do not emit color codes in output.
0a840f58 427 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 428 HTTP header
0a840f58 429 geo_bypass_country:
773f291d
S
430 Two-letter ISO 3166-2 country code that will be used for
431 explicit geographic restriction bypassing via faking
504f20dd 432 X-Forwarded-For HTTP header
5f95927a
S
433 geo_bypass_ip_block:
434 IP range in CIDR notation that will be used similarly to
504f20dd 435 geo_bypass_country
71b640cc 436
85729c51 437 The following options determine which downloader is picked:
52a8a1e1 438 external_downloader: A dictionary of protocol keys and the executable of the
439 external downloader to use for it. The allowed protocols
440 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
441 Set the value to 'native' to use the native downloader
442 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
443 or {'m3u8': 'ffmpeg'} instead.
444 Use the native HLS downloader instead of ffmpeg/avconv
bf09af3a
S
445 if True, otherwise use ffmpeg/avconv if False, otherwise
446 use downloader suggested by extractor if None.
53ed7066 447 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 448 The following options do not work when used through the API:
b5ae35ee 449 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 451 Refer __init__.py for their implementation
819e0531 452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
fe7e0c98 459
8222d8de 460 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 461 the downloader (see yt_dlp/downloader/common.py):
51d9739f 462 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
463 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
464 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 465 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
466
467 The following options are used by the post processors:
d4a24f40 468 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
e4172ac9 469 otherwise prefer ffmpeg. (avconv support is deprecated)
c0b7d117
S
470 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
471 to the binary or its containing directory.
43820c03 472 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 473 and a list of additional command-line arguments for the
474 postprocessor/executable. The dict can also have "PP+EXE" keys
475 which are used when the given exe is used by the given PP.
476 Use 'default' as the name for arguments to passed to all PP
477 For compatibility with youtube-dl, a single list of args
478 can also be used
e409895f 479
480 The following options are used by the extractors:
62bff2c1 481 extractor_retries: Number of times to retry for known errors
482 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 483 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 484 discontinuities such as ad breaks (default: False)
5d3a0e79 485 extractor_args: A dictionary of arguments to be passed to the extractors.
486 See "EXTRACTOR ARGUMENTS" for details.
487 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 488 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
5d3a0e79 489 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
490 If True (default), DASH manifests and related
62bff2c1 491 data will be downloaded and processed by extractor.
492 You can reduce network I/O by disabling it if you don't
493 care about DASH. (only for youtube)
5d3a0e79 494 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
495 If True (default), HLS manifests and related
62bff2c1 496 data will be downloaded and processed by extractor.
497 You can reduce network I/O by disabling it if you don't
498 care about HLS. (only for youtube)
8222d8de
JMF
499 """
500
86e5f3ed 501 _NUMERIC_FIELDS = {
c9969434 502 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 503 'timestamp', 'release_timestamp',
c9969434
S
504 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
505 'average_rating', 'comment_count', 'age_limit',
506 'start_time', 'end_time',
507 'chapter_number', 'season_number', 'episode_number',
508 'track_number', 'disc_number', 'release_year',
86e5f3ed 509 }
c9969434 510
6db9c4d5 511 _format_fields = {
512 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 513 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
6db9c4d5 514 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
515 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
516 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
517 'preference', 'language', 'language_preference', 'quality', 'source_preference',
518 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
519 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
520 }
48ee10ee 521 _format_selection_exts = {
522 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
523 'video': {'mp4', 'flv', 'webm', '3gp'},
524 'storyboards': {'mhtml'},
525 }
526
3511266b 527 def __init__(self, params=None, auto_init=True):
883d4b1e 528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 530 Set to 'no_verbose_header' to not print the header
883d4b1e 531 """
e9f9a10f
JMF
532 if params is None:
533 params = {}
592b7485 534 self.params = params
8b7491c8 535 self._ies = {}
56c73665 536 self._ies_instances = {}
1e43a6f7 537 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 538 self._printed_messages = set()
1cf376f5 539 self._first_webpage_request = True
ab8e5e51 540 self._post_hooks = []
933605d7 541 self._progress_hooks = []
819e0531 542 self._postprocessor_hooks = []
8222d8de
JMF
543 self._download_retcode = 0
544 self._num_downloads = 0
9c906919 545 self._num_videos = 0
592b7485 546 self._playlist_level = 0
547 self._playlist_urls = set()
a0e07d31 548 self.cache = Cache(self)
34308b30 549
819e0531 550 windows_enable_vt_mode()
591bb9d3 551 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
552 self._out_files = Namespace(
553 out=stdout,
554 error=sys.stderr,
555 screen=sys.stderr if self.params.get('quiet') else stdout,
556 console=None if compat_os_name == 'nt' else next(
cf4f42cb 557 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 558 )
559 self._allow_colors = Namespace(**{
560 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
7896214c 561 for type_, stream in self._out_files if type_ != 'console'
591bb9d3 562 })
819e0531 563
a61f4b28 564 if sys.version_info < (3, 6):
565 self.report_warning(
0181adef 566 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
a61f4b28 567
88acdbc2 568 if self.params.get('allow_unplayable_formats'):
569 self.report_warning(
ec11a9f4 570 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 571 'This is a developer option intended for debugging. \n'
572 ' If you experience any issues while using this option, '
ec11a9f4 573 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 574
be5df5ee
S
575 def check_deprecated(param, option, suggestion):
576 if self.params.get(param) is not None:
86e5f3ed 577 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
578 return True
579 return False
580
581 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
582 if self.params.get('geo_verification_proxy') is None:
583 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
584
0d1bb027 585 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
586 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 587 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 588
49a57e70 589 for msg in self.params.get('_warnings', []):
0d1bb027 590 self.report_warning(msg)
ee8dd27a 591 for msg in self.params.get('_deprecation_warnings', []):
592 self.deprecation_warning(msg)
0d1bb027 593
ec11a9f4 594 if 'list-formats' in self.params.get('compat_opts', []):
595 self.params['listformats_table'] = False
596
b5ae35ee 597 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 598 # nooverwrites was unnecessarily changed to overwrites
599 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
600 # This ensures compatibility with both keys
601 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 602 elif self.params.get('overwrites') is None:
603 self.params.pop('overwrites', None)
b868936c 604 else:
605 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 606
455a15e2 607 self.params.setdefault('forceprint', {})
608 self.params.setdefault('print_to_file', {})
bb66c247 609
610 # Compatibility with older syntax
ca30f449 611 if not isinstance(params['forceprint'], dict):
455a15e2 612 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 613
455a15e2 614 if self.params.get('bidi_workaround', False):
1c088fa8
PH
615 try:
616 import pty
617 master, slave = pty.openpty()
003c69a8 618 width = compat_get_terminal_size().columns
591bb9d3 619 width_args = [] if width is None else ['-w', str(width)]
620 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
5d681e96 621 try:
d3c93ec2 622 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 623 except OSError:
d3c93ec2 624 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 625 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 626 except OSError as ose:
66e7ace1 627 if ose.errno == errno.ENOENT:
49a57e70 628 self.report_warning(
629 'Could not find fribidi executable, ignoring --bidi-workaround. '
630 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
631 else:
632 raise
0783b09b 633
97ec5bc5 634 if auto_init:
635 if auto_init != 'no_verbose_header':
636 self.print_debug_header()
637 self.add_default_info_extractors()
638
3089bc74
S
639 if (sys.platform != 'win32'
640 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 641 and not self.params.get('restrictfilenames', False)):
e9137224 642 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 643 self.report_warning(
6febd1c1 644 'Assuming --restrict-filenames since file system encoding '
1b725173 645 'cannot encode all characters. '
6febd1c1 646 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 647 self.params['restrictfilenames'] = True
34308b30 648
de6000d9 649 self.outtmpl_dict = self.parse_outtmpl()
486dd09e 650
187986a8 651 # Creating format selector here allows us to catch syntax errors before the extraction
652 self.format_selector = (
fa9f30b8 653 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 654 else self.params['format'] if callable(self.params['format'])
187986a8 655 else self.build_format_selector(self.params['format']))
656
8b7539d2 657 # Set http_headers defaults according to std_headers
658 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
659
013b50b7 660 hooks = {
661 'post_hooks': self.add_post_hook,
662 'progress_hooks': self.add_progress_hook,
663 'postprocessor_hooks': self.add_postprocessor_hook,
664 }
665 for opt, fn in hooks.items():
666 for ph in self.params.get(opt, []):
667 fn(ph)
71b640cc 668
5bfc8bee 669 for pp_def_raw in self.params.get('postprocessors', []):
670 pp_def = dict(pp_def_raw)
671 when = pp_def.pop('when', 'post_process')
672 self.add_post_processor(
f9934b96 673 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 674 when=when)
675
97ec5bc5 676 self._setup_opener()
51fb4995
YCH
677 register_socks_protocols()
678
ed39cac5 679 def preload_download_archive(fn):
680 """Preload the archive, if any is specified"""
681 if fn is None:
682 return False
49a57e70 683 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 684 try:
685 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
686 for line in archive_file:
687 self.archive.add(line.strip())
86e5f3ed 688 except OSError as ioe:
ed39cac5 689 if ioe.errno != errno.ENOENT:
690 raise
691 return False
692 return True
693
694 self.archive = set()
695 preload_download_archive(self.params.get('download_archive'))
696
7d4111ed
PH
697 def warn_if_short_id(self, argv):
698 # short YouTube ID starting with dash?
699 idxs = [
700 i for i, a in enumerate(argv)
701 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
702 if idxs:
703 correct_argv = (
7a5c1cfe 704 ['yt-dlp']
3089bc74
S
705 + [a for i, a in enumerate(argv) if i not in idxs]
706 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
707 )
708 self.report_warning(
709 'Long argument string detected. '
49a57e70 710 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
711 args_to_str(correct_argv))
712
8222d8de
JMF
713 def add_info_extractor(self, ie):
714 """Add an InfoExtractor object to the end of the list."""
8b7491c8 715 ie_key = ie.ie_key()
716 self._ies[ie_key] = ie
e52d7f85 717 if not isinstance(ie, type):
8b7491c8 718 self._ies_instances[ie_key] = ie
e52d7f85 719 ie.set_downloader(self)
8222d8de 720
8b7491c8 721 def _get_info_extractor_class(self, ie_key):
722 ie = self._ies.get(ie_key)
723 if ie is None:
724 ie = get_info_extractor(ie_key)
725 self.add_info_extractor(ie)
726 return ie
727
56c73665
JMF
728 def get_info_extractor(self, ie_key):
729 """
730 Get an instance of an IE with name ie_key, it will try to get one from
731 the _ies list, if there's no instance it will create a new one and add
732 it to the extractor list.
733 """
734 ie = self._ies_instances.get(ie_key)
735 if ie is None:
736 ie = get_info_extractor(ie_key)()
737 self.add_info_extractor(ie)
738 return ie
739
023fa8c4
JMF
740 def add_default_info_extractors(self):
741 """
742 Add the InfoExtractors returned by gen_extractors to the end of the list
743 """
e52d7f85 744 for ie in gen_extractor_classes():
023fa8c4
JMF
745 self.add_info_extractor(ie)
746
56d868db 747 def add_post_processor(self, pp, when='post_process'):
8222d8de 748 """Add a PostProcessor object to the end of the chain."""
5bfa4862 749 self._pps[when].append(pp)
8222d8de
JMF
750 pp.set_downloader(self)
751
ab8e5e51
AM
752 def add_post_hook(self, ph):
753 """Add the post hook"""
754 self._post_hooks.append(ph)
755
933605d7 756 def add_progress_hook(self, ph):
819e0531 757 """Add the download progress hook"""
933605d7 758 self._progress_hooks.append(ph)
8ab470f1 759
819e0531 760 def add_postprocessor_hook(self, ph):
761 """Add the postprocessing progress hook"""
762 self._postprocessor_hooks.append(ph)
5bfc8bee 763 for pps in self._pps.values():
764 for pp in pps:
765 pp.add_progress_hook(ph)
819e0531 766
1c088fa8 767 def _bidi_workaround(self, message):
5d681e96 768 if not hasattr(self, '_output_channel'):
1c088fa8
PH
769 return message
770
5d681e96 771 assert hasattr(self, '_output_process')
11b85ce6 772 assert isinstance(message, compat_str)
6febd1c1 773 line_count = message.count('\n') + 1
0f06bcd7 774 self._output_process.stdin.write((message + '\n').encode())
5d681e96 775 self._output_process.stdin.flush()
0f06bcd7 776 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 777 for _ in range(line_count))
6febd1c1 778 return res[:-len('\n')]
1c088fa8 779
b35496d8 780 def _write_string(self, message, out=None, only_once=False):
781 if only_once:
782 if message in self._printed_messages:
783 return
784 self._printed_messages.add(message)
785 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 786
cf4f42cb 787 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 788 """Print message to stdout"""
cf4f42cb 789 if quiet is not None:
ae6a1b95 790 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
cf4f42cb 791 self._write_string(
792 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
591bb9d3 793 self._out_files.out)
cf4f42cb 794
795 def to_screen(self, message, skip_eol=False, quiet=None):
796 """Print message to screen if not in quiet mode"""
8bf9319e 797 if self.params.get('logger'):
43afe285 798 self.params['logger'].debug(message)
cf4f42cb 799 return
800 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
801 return
802 self._write_string(
803 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
591bb9d3 804 self._out_files.screen)
8222d8de 805
b35496d8 806 def to_stderr(self, message, only_once=False):
0760b0a7 807 """Print message to stderr"""
11b85ce6 808 assert isinstance(message, compat_str)
8bf9319e 809 if self.params.get('logger'):
43afe285
IB
810 self.params['logger'].error(message)
811 else:
5792c950 812 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 813
814 def _send_console_code(self, code):
591bb9d3 815 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 816 return
591bb9d3 817 self._write_string(code, self._out_files.console)
8222d8de 818
1e5b9a95
PH
819 def to_console_title(self, message):
820 if not self.params.get('consoletitle', False):
821 return
3efb96a6 822 message = remove_terminal_sequences(message)
4bede0d8
C
823 if compat_os_name == 'nt':
824 if ctypes.windll.kernel32.GetConsoleWindow():
825 # c_wchar_p() might not be necessary if `message` is
826 # already of type unicode()
827 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 828 else:
829 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 830
bdde425c 831 def save_console_title(self):
cf4f42cb 832 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 833 return
592b7485 834 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
835
836 def restore_console_title(self):
cf4f42cb 837 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 838 return
592b7485 839 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
840
841 def __enter__(self):
842 self.save_console_title()
843 return self
844
845 def __exit__(self, *args):
846 self.restore_console_title()
f89197d7 847
dca08720 848 if self.params.get('cookiefile') is not None:
1bab3437 849 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 850
fa9f30b8 851 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
852 """Determine action to take when a download problem appears.
853
854 Depending on if the downloader has been configured to ignore
855 download errors or not, this method may throw an exception or
856 not when errors are found, after printing the message.
857
fa9f30b8 858 @param tb If given, is additional traceback information
859 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
860 """
861 if message is not None:
862 self.to_stderr(message)
863 if self.params.get('verbose'):
864 if tb is None:
865 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 866 tb = ''
8222d8de 867 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 868 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 869 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
870 else:
871 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 872 tb = ''.join(tb_data)
c19bc311 873 if tb:
874 self.to_stderr(tb)
fa9f30b8 875 if not is_error:
876 return
b1940459 877 if not self.params.get('ignoreerrors'):
8222d8de
JMF
878 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
879 exc_info = sys.exc_info()[1].exc_info
880 else:
881 exc_info = sys.exc_info()
882 raise DownloadError(message, exc_info)
883 self._download_retcode = 1
884
19a03940 885 Styles = Namespace(
886 HEADERS='yellow',
887 EMPHASIS='light blue',
492272fe 888 FILENAME='green',
19a03940 889 ID='green',
890 DELIM='blue',
891 ERROR='red',
892 WARNING='yellow',
893 SUPPRESS='light black',
894 )
ec11a9f4 895
7578d77d 896 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 897 text = str(text)
ec11a9f4 898 if test_encoding:
899 original_text = text
5c104538 900 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
901 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 902 text = text.encode(encoding, 'ignore').decode(encoding)
903 if fallback is not None and text != original_text:
904 text = fallback
7578d77d 905 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 906
591bb9d3 907 def _format_out(self, *args, **kwargs):
908 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
909
ec11a9f4 910 def _format_screen(self, *args, **kwargs):
591bb9d3 911 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 912
913 def _format_err(self, *args, **kwargs):
591bb9d3 914 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 915
c84aeac6 916 def report_warning(self, message, only_once=False):
8222d8de
JMF
917 '''
918 Print the message to stderr, it will be prefixed with 'WARNING:'
919 If stderr is a tty file the 'WARNING:' will be colored
920 '''
6d07ce01
JMF
921 if self.params.get('logger') is not None:
922 self.params['logger'].warning(message)
8222d8de 923 else:
ad8915b7
PH
924 if self.params.get('no_warnings'):
925 return
ec11a9f4 926 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 927
ee8dd27a 928 def deprecation_warning(self, message):
929 if self.params.get('logger') is not None:
a44ca5a4 930 self.params['logger'].warning(f'DeprecationWarning: {message}')
ee8dd27a 931 else:
932 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
933
fa9f30b8 934 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
935 '''
936 Do the same as trouble, but prefixes the message with 'ERROR:', colored
937 in red if stderr is a tty file.
938 '''
fa9f30b8 939 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 940
b35496d8 941 def write_debug(self, message, only_once=False):
0760b0a7 942 '''Log debug message or Print message to stderr'''
943 if not self.params.get('verbose', False):
944 return
945 message = '[debug] %s' % message
946 if self.params.get('logger'):
947 self.params['logger'].debug(message)
948 else:
b35496d8 949 self.to_stderr(message, only_once)
0760b0a7 950
8222d8de
JMF
951 def report_file_already_downloaded(self, file_name):
952 """Report file has already been fully downloaded."""
953 try:
6febd1c1 954 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 955 except UnicodeEncodeError:
6febd1c1 956 self.to_screen('[download] The file has already been downloaded')
8222d8de 957
0c3d0f51 958 def report_file_delete(self, file_name):
959 """Report that existing file will be deleted."""
960 try:
c25228e5 961 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 962 except UnicodeEncodeError:
c25228e5 963 self.to_screen('Deleting existing file')
0c3d0f51 964
319b6059 965 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 966 has_drm = info.get('_has_drm')
319b6059 967 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
968 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
969 if forced or not ignored:
1151c407 970 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 971 expected=has_drm or ignored or expected)
88acdbc2 972 else:
973 self.report_warning(msg)
974
de6000d9 975 def parse_outtmpl(self):
976 outtmpl_dict = self.params.get('outtmpl', {})
977 if not isinstance(outtmpl_dict, dict):
978 outtmpl_dict = {'default': outtmpl_dict}
71ce444a 979 # Remove spaces in the default template
980 if self.params.get('restrictfilenames'):
981 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
982 else:
983 sanitize = lambda x: x
de6000d9 984 outtmpl_dict.update({
71ce444a 985 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
80c03fa9 986 if outtmpl_dict.get(k) is None})
86e5f3ed 987 for _, val in outtmpl_dict.items():
de6000d9 988 if isinstance(val, bytes):
86e5f3ed 989 self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
de6000d9 990 return outtmpl_dict
991
21cd8fae 992 def get_output_path(self, dir_type='', filename=None):
993 paths = self.params.get('paths', {})
994 assert isinstance(paths, dict)
995 path = os.path.join(
996 expand_path(paths.get('home', '').strip()),
997 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
998 filename or '')
21cd8fae 999 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1000
76a264ac 1001 @staticmethod
901130bb 1002 def _outtmpl_expandpath(outtmpl):
1003 # expand_path translates '%%' into '%' and '$$' into '$'
1004 # correspondingly that is not what we want since we need to keep
1005 # '%%' intact for template dict substitution step. Working around
1006 # with boundary-alike separator hack.
1007 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
86e5f3ed 1008 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1009
1010 # outtmpl should be expand_path'ed before template dict substitution
1011 # because meta fields may contain env variables we don't want to
1012 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1013 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1014 return expand_path(outtmpl).replace(sep, '')
1015
1016 @staticmethod
1017 def escape_outtmpl(outtmpl):
1018 ''' Escape any remaining strings like %s, %abc% etc. '''
1019 return re.sub(
1020 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1021 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1022 outtmpl)
1023
1024 @classmethod
1025 def validate_outtmpl(cls, outtmpl):
76a264ac 1026 ''' @return None or Exception object '''
7d1eb38a 1027 outtmpl = re.sub(
37893bb0 1028 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1029 lambda mobj: f'{mobj.group(0)[:-1]}s',
1030 cls._outtmpl_expandpath(outtmpl))
76a264ac 1031 try:
7d1eb38a 1032 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1033 return None
1034 except ValueError as err:
1035 return err
1036
03b4de72 1037 @staticmethod
1038 def _copy_infodict(info_dict):
1039 info_dict = dict(info_dict)
09b49e1f 1040 info_dict.pop('__postprocessors', None)
03b4de72 1041 return info_dict
1042
e0fd9573 1043 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045 @param sanitize Whether to sanitize the output as a filename.
1046 For backward compatibility, a function can also be passed
1047 """
1048
6e84b215 1049 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1050
03b4de72 1051 info_dict = self._copy_infodict(info_dict)
752cda38 1052 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1053 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1054 if info_dict.get('duration', None) is not None
1055 else None)
1d485a1a 1056 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1057 info_dict['video_autonumber'] = self._num_videos
752cda38 1058 if info_dict.get('resolution') is None:
1059 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1060
e6f21b3d 1061 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1062 # of %(field)s to %(field)0Nd for backward compatibility
1063 field_size_compat_map = {
0a5a191a 1064 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1065 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1066 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1067 }
752cda38 1068
385a27fa 1069 TMPL_DICT = {}
37893bb0 1070 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1071 MATH_FUNCTIONS = {
1072 '+': float.__add__,
1073 '-': float.__sub__,
1074 }
e625be0d 1075 # Field is of the form key1.key2...
1076 # where keys (except first) can be string, int or slice
2b8a2973 1077 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1d485a1a 1078 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1079 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1080 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1081 (?P<negate>-)?
1d485a1a 1082 (?P<fields>{FIELD_RE})
1083 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1084 (?:>(?P<strf_format>.+?))?
34baa9fd 1085 (?P<remaining>
1086 (?P<alternate>(?<!\\),[^|&)]+)?
1087 (?:&(?P<replacement>.*?))?
1088 (?:\|(?P<default>.*?))?
1d485a1a 1089 )$''')
752cda38 1090
2b8a2973 1091 def _traverse_infodict(k):
1092 k = k.split('.')
1093 if k[0] == '':
1094 k.pop(0)
1095 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1096
752cda38 1097 def get_value(mdict):
1098 # Object traversal
2b8a2973 1099 value = _traverse_infodict(mdict['fields'])
752cda38 1100 # Negative
1101 if mdict['negate']:
1102 value = float_or_none(value)
1103 if value is not None:
1104 value *= -1
1105 # Do maths
385a27fa 1106 offset_key = mdict['maths']
1107 if offset_key:
752cda38 1108 value = float_or_none(value)
1109 operator = None
385a27fa 1110 while offset_key:
1111 item = re.match(
1112 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1113 offset_key).group(0)
1114 offset_key = offset_key[len(item):]
1115 if operator is None:
752cda38 1116 operator = MATH_FUNCTIONS[item]
385a27fa 1117 continue
1118 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1119 offset = float_or_none(item)
1120 if offset is None:
2b8a2973 1121 offset = float_or_none(_traverse_infodict(item))
385a27fa 1122 try:
1123 value = operator(value, multiplier * offset)
1124 except (TypeError, ZeroDivisionError):
1125 return None
1126 operator = None
752cda38 1127 # Datetime formatting
1128 if mdict['strf_format']:
7c37ff97 1129 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1130
1131 return value
1132
b868936c 1133 na = self.params.get('outtmpl_na_placeholder', 'NA')
1134
e0fd9573 1135 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1136 return sanitize_filename(str(value), restricted=restricted, is_id=(
1137 bool(re.search(r'(^|[_.])id(\.|$)', key))
1138 if 'filename-sanitization' in self.params.get('compat_opts', [])
1139 else NO_DEFAULT))
e0fd9573 1140
1141 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1142 sanitize = bool(sanitize)
1143
6e84b215 1144 def _dumpjson_default(obj):
1145 if isinstance(obj, (set, LazyList)):
1146 return list(obj)
adbc4ec4 1147 return repr(obj)
6e84b215 1148
752cda38 1149 def create_key(outer_mobj):
1150 if not outer_mobj.group('has_key'):
b836dc94 1151 return outer_mobj.group(0)
752cda38 1152 key = outer_mobj.group('key')
752cda38 1153 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1154 initial_field = mobj.group('fields') if mobj else ''
e978789f 1155 value, replacement, default = None, None, na
7c37ff97 1156 while mobj:
e625be0d 1157 mobj = mobj.groupdict()
7c37ff97 1158 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1159 value = get_value(mobj)
e978789f 1160 replacement = mobj['replacement']
7c37ff97 1161 if value is None and mobj['alternate']:
34baa9fd 1162 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1163 else:
1164 break
752cda38 1165
b868936c 1166 fmt = outer_mobj.group('format')
752cda38 1167 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1168 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1169
e978789f 1170 value = default if value is None else value if replacement is None else replacement
752cda38 1171
4476d2c7 1172 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1173 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1174 if fmt[-1] == 'l': # list
4476d2c7 1175 delim = '\n' if '#' in flags else ', '
9e907ebd 1176 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1177 elif fmt[-1] == 'j': # json
4476d2c7 1178 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1179 elif fmt[-1] == 'q': # quoted
4476d2c7 1180 value = map(str, variadic(value) if '#' in flags else [value])
1181 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1182 elif fmt[-1] == 'B': # bytes
0f06bcd7 1183 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1184 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1185 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1186 value, fmt = unicodedata.normalize(
1187 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1188 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1189 value), str_fmt
e0fd9573 1190 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1191 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1192 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1193 factor=1024 if '#' in flags else 1000)
37893bb0 1194 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1195 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1196 elif fmt[-1] == 'c':
524e2e4f 1197 if value:
1198 value = str(value)[0]
76a264ac 1199 else:
524e2e4f 1200 fmt = str_fmt
76a264ac 1201 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1202 value = float_or_none(value)
752cda38 1203 if value is None:
1204 value, fmt = default, 's'
901130bb 1205
752cda38 1206 if sanitize:
1207 if fmt[-1] == 'r':
1208 # If value is an object, sanitize might convert it to a string
1209 # So we convert it to repr first
7d1eb38a 1210 value, fmt = repr(value), str_fmt
639f1cea 1211 if fmt[-1] in 'csr':
e0fd9573 1212 value = sanitizer(initial_field, value)
901130bb 1213
b868936c 1214 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1215 TMPL_DICT[key] = value
b868936c 1216 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1217
385a27fa 1218 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1219
819e0531 1220 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1221 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1222 return self.escape_outtmpl(outtmpl) % info_dict
1223
5127e92a 1224 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1225 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1226 if outtmpl is None:
1227 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
8222d8de 1228 try:
5127e92a 1229 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1230 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1231 if not filename:
1232 return None
15da37c7 1233
5127e92a 1234 if tmpl_type in ('', 'temp'):
6a0546e3 1235 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1236 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1237 filename = replace_extension(filename, ext, final_ext)
5127e92a 1238 elif tmpl_type:
6a0546e3 1239 force_ext = OUTTMPL_TYPES[tmpl_type]
1240 if force_ext:
1241 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1242
bdc3fd2f
U
1243 # https://github.com/blackjack4494/youtube-dlc/issues/85
1244 trim_file_name = self.params.get('trim_file_name', False)
1245 if trim_file_name:
5c22c63d 1246 no_ext, *ext = filename.rsplit('.', 2)
1247 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1248
0202b52a 1249 return filename
8222d8de 1250 except ValueError as err:
6febd1c1 1251 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1252 return None
1253
5127e92a 1254 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1255 """Generate the output filename"""
1256 if outtmpl:
1257 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1258 dir_type = None
1259 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1260 if not filename and dir_type not in ('', 'temp'):
1261 return ''
de6000d9 1262
c84aeac6 1263 if warn:
21cd8fae 1264 if not self.params.get('paths'):
de6000d9 1265 pass
1266 elif filename == '-':
c84aeac6 1267 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1268 elif os.path.isabs(filename):
c84aeac6 1269 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1270 if filename == '-' or not filename:
1271 return filename
1272
21cd8fae 1273 return self.get_output_path(dir_type, filename)
0202b52a 1274
120fe513 1275 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1276 """ Returns None if the file should be downloaded """
8222d8de 1277
c77495e3 1278 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1279
8b0d7497 1280 def check_filter():
8b0d7497 1281 if 'title' in info_dict:
1282 # This can happen when we're just evaluating the playlist
1283 title = info_dict['title']
1284 matchtitle = self.params.get('matchtitle', False)
1285 if matchtitle:
1286 if not re.search(matchtitle, title, re.IGNORECASE):
1287 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1288 rejecttitle = self.params.get('rejecttitle', False)
1289 if rejecttitle:
1290 if re.search(rejecttitle, title, re.IGNORECASE):
1291 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1292 date = info_dict.get('upload_date')
1293 if date is not None:
1294 dateRange = self.params.get('daterange', DateRange())
1295 if date not in dateRange:
86e5f3ed 1296 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1297 view_count = info_dict.get('view_count')
1298 if view_count is not None:
1299 min_views = self.params.get('min_views')
1300 if min_views is not None and view_count < min_views:
1301 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1302 max_views = self.params.get('max_views')
1303 if max_views is not None and view_count > max_views:
1304 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1305 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1306 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1307
8f18aca8 1308 match_filter = self.params.get('match_filter')
1309 if match_filter is not None:
1310 try:
1311 ret = match_filter(info_dict, incomplete=incomplete)
1312 except TypeError:
1313 # For backward compatibility
1314 ret = None if incomplete else match_filter(info_dict)
492272fe 1315 if ret is NO_DEFAULT:
1316 while True:
1317 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1318 reply = input(self._format_screen(
1319 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1320 if reply in {'y', ''}:
1321 return None
1322 elif reply == 'n':
1323 return f'Skipping {video_title}'
492272fe 1324 elif ret is not None:
8f18aca8 1325 return ret
8b0d7497 1326 return None
1327
c77495e3 1328 if self.in_download_archive(info_dict):
1329 reason = '%s has already been recorded in the archive' % video_title
1330 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1331 else:
1332 reason = check_filter()
1333 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1334 if reason is not None:
120fe513 1335 if not silent:
1336 self.to_screen('[download] ' + reason)
c77495e3 1337 if self.params.get(break_opt, False):
1338 raise break_err()
8b0d7497 1339 return reason
fe7e0c98 1340
b6c45014
JMF
1341 @staticmethod
1342 def add_extra_info(info_dict, extra_info):
1343 '''Set the keys from extra_info in info dict if they are missing'''
1344 for key, value in extra_info.items():
1345 info_dict.setdefault(key, value)
1346
409e1828 1347 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1348 process=True, force_generic_extractor=False):
41d1cca3 1349 """
1350 Return a list with a dictionary for each video extracted.
1351
1352 Arguments:
1353 url -- URL to extract
1354
1355 Keyword arguments:
1356 download -- whether to download videos during extraction
1357 ie_key -- extractor key hint
1358 extra_info -- dictionary containing the extra values to add to each result
1359 process -- whether to resolve all unresolved references (URLs, playlist items),
1360 must be True for download to work.
1361 force_generic_extractor -- force using the generic extractor
1362 """
fe7e0c98 1363
409e1828 1364 if extra_info is None:
1365 extra_info = {}
1366
61aa5ba3 1367 if not ie_key and force_generic_extractor:
d22dec74
S
1368 ie_key = 'Generic'
1369
8222d8de 1370 if ie_key:
8b7491c8 1371 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1372 else:
1373 ies = self._ies
1374
8b7491c8 1375 for ie_key, ie in ies.items():
8222d8de
JMF
1376 if not ie.suitable(url):
1377 continue
1378
1379 if not ie.working():
6febd1c1
PH
1380 self.report_warning('The program functionality for this site has been marked as broken, '
1381 'and will probably not work.')
8222d8de 1382
1151c407 1383 temp_id = ie.get_temp_id(url)
a0566bbf 1384 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1385 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1386 if self.params.get('break_on_existing', False):
1387 raise ExistingVideoReached()
a0566bbf 1388 break
8b7491c8 1389 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1390 else:
1391 self.report_error('no suitable InfoExtractor for URL %s' % url)
1392
8e5fecc8 1393 def __handle_extraction_exceptions(func):
b5ae35ee 1394 @functools.wraps(func)
a0566bbf 1395 def wrapper(self, *args, **kwargs):
6da22e7d 1396 while True:
1397 try:
1398 return func(self, *args, **kwargs)
1399 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1400 raise
6da22e7d 1401 except ReExtractInfo as e:
1402 if e.expected:
1403 self.to_screen(f'{e}; Re-extracting data')
1404 else:
1405 self.to_stderr('\r')
1406 self.report_warning(f'{e}; Re-extracting data')
1407 continue
1408 except GeoRestrictedError as e:
1409 msg = e.msg
1410 if e.countries:
1411 msg += '\nThis video is available in %s.' % ', '.join(
1412 map(ISO3166Utils.short2full, e.countries))
1413 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1414 self.report_error(msg)
1415 except ExtractorError as e: # An error we somewhat expected
1416 self.report_error(str(e), e.format_traceback())
1417 except Exception as e:
1418 if self.params.get('ignoreerrors'):
1419 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1420 else:
1421 raise
1422 break
a0566bbf 1423 return wrapper
1424
f2ebc5c7 1425 def _wait_for_video(self, ie_result):
1426 if (not self.params.get('wait_for_video')
1427 or ie_result.get('_type', 'video') != 'video'
1428 or ie_result.get('formats') or ie_result.get('url')):
1429 return
1430
1431 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1432 last_msg = ''
1433
1434 def progress(msg):
1435 nonlocal last_msg
1436 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1437 last_msg = msg
1438
1439 min_wait, max_wait = self.params.get('wait_for_video')
1440 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1441 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1442 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1443 self.report_warning('Release time of video is not known')
1444 elif (diff or 0) <= 0:
1445 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1446 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1447 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1448
1449 wait_till = time.time() + diff
1450 try:
1451 while True:
1452 diff = wait_till - time.time()
1453 if diff <= 0:
1454 progress('')
1455 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1456 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1457 time.sleep(1)
1458 except KeyboardInterrupt:
1459 progress('')
1460 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1461 except BaseException as e:
1462 if not isinstance(e, ReExtractInfo):
1463 self.to_screen('')
1464 raise
1465
a0566bbf 1466 @__handle_extraction_exceptions
58f197b7 1467 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1468 ie_result = ie.extract(url)
1469 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1470 return
1471 if isinstance(ie_result, list):
1472 # Backwards compatibility: old IE result format
1473 ie_result = {
1474 '_type': 'compat_list',
1475 'entries': ie_result,
1476 }
e37d0efb 1477 if extra_info.get('original_url'):
1478 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1479 self.add_default_extra_info(ie_result, ie, url)
1480 if process:
f2ebc5c7 1481 self._wait_for_video(ie_result)
a0566bbf 1482 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1483 else:
a0566bbf 1484 return ie_result
fe7e0c98 1485
ea38e55f 1486 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1487 if url is not None:
1488 self.add_extra_info(ie_result, {
1489 'webpage_url': url,
1490 'original_url': url,
57ebfca3 1491 })
1492 webpage_url = ie_result.get('webpage_url')
1493 if webpage_url:
1494 self.add_extra_info(ie_result, {
1495 'webpage_url_basename': url_basename(webpage_url),
1496 'webpage_url_domain': get_domain(webpage_url),
6033d980 1497 })
1498 if ie is not None:
1499 self.add_extra_info(ie_result, {
1500 'extractor': ie.IE_NAME,
1501 'extractor_key': ie.ie_key(),
1502 })
ea38e55f 1503
58adec46 1504 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1505 """
1506 Take the result of the ie(may be modified) and resolve all unresolved
1507 references (URLs, playlist items).
1508
1509 It will also download the videos if 'download'.
1510 Returns the resolved ie_result.
1511 """
58adec46 1512 if extra_info is None:
1513 extra_info = {}
e8ee972c
PH
1514 result_type = ie_result.get('_type', 'video')
1515
057a5206 1516 if result_type in ('url', 'url_transparent'):
134c6ea8 1517 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1518 if ie_result.get('original_url'):
1519 extra_info.setdefault('original_url', ie_result['original_url'])
1520
057a5206 1521 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1522 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1523 or extract_flat is True):
ecb54191 1524 info_copy = ie_result.copy()
6033d980 1525 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1526 if ie and not ie_result.get('id'):
4614bc22 1527 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1528 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1529 self.add_extra_info(info_copy, extra_info)
b5475f11 1530 info_copy, _ = self.pre_process(info_copy)
ecb54191 1531 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
4614bc22 1532 if self.params.get('force_write_download_archive', False):
1533 self.record_download_archive(info_copy)
e8ee972c
PH
1534 return ie_result
1535
8222d8de 1536 if result_type == 'video':
b6c45014 1537 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1538 ie_result = self.process_video_result(ie_result, download=download)
28b0eb0f 1539 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1540 if additional_urls:
e9f4ccd1 1541 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1542 if isinstance(additional_urls, compat_str):
1543 additional_urls = [additional_urls]
1544 self.to_screen(
1545 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1546 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1547 ie_result['additional_entries'] = [
1548 self.extract_info(
b69fd25c 1549 url, download, extra_info=extra_info,
9c2b75b5 1550 force_generic_extractor=self.params.get('force_generic_extractor'))
1551 for url in additional_urls
1552 ]
1553 return ie_result
8222d8de
JMF
1554 elif result_type == 'url':
1555 # We have to add extra_info to the results because it may be
1556 # contained in a playlist
07cce701 1557 return self.extract_info(
1558 ie_result['url'], download,
1559 ie_key=ie_result.get('ie_key'),
1560 extra_info=extra_info)
7fc3fa05
PH
1561 elif result_type == 'url_transparent':
1562 # Use the information from the embedding page
1563 info = self.extract_info(
1564 ie_result['url'], ie_key=ie_result.get('ie_key'),
1565 extra_info=extra_info, download=False, process=False)
1566
1640eb09
S
1567 # extract_info may return None when ignoreerrors is enabled and
1568 # extraction failed with an error, don't crash and return early
1569 # in this case
1570 if not info:
1571 return info
1572
412c617d 1573 new_result = info.copy()
90137ca4 1574 new_result.update(filter_dict(ie_result, lambda k, v: (
1575 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
7fc3fa05 1576
0563f7ac
S
1577 # Extracted info may not be a video result (i.e.
1578 # info.get('_type', 'video') != video) but rather an url or
1579 # url_transparent. In such cases outer metadata (from ie_result)
1580 # should be propagated to inner one (info). For this to happen
1581 # _type of info should be overridden with url_transparent. This
067aa17e 1582 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1583 if new_result.get('_type') == 'url':
1584 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1585
1586 return self.process_ie_result(
1587 new_result, download=download, extra_info=extra_info)
40fcba5e 1588 elif result_type in ('playlist', 'multi_video'):
30a074c2 1589 # Protect from infinite recursion due to recursively nested playlists
1590 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1591 webpage_url = ie_result['webpage_url']
1592 if webpage_url in self._playlist_urls:
7e85e872 1593 self.to_screen(
30a074c2 1594 '[download] Skipping already downloaded playlist: %s'
1595 % ie_result.get('title') or ie_result.get('id'))
1596 return
7e85e872 1597
30a074c2 1598 self._playlist_level += 1
1599 self._playlist_urls.add(webpage_url)
03f83004 1600 self._fill_common_fields(ie_result, False)
bc516a3f 1601 self._sanitize_thumbnails(ie_result)
30a074c2 1602 try:
1603 return self.__process_playlist(ie_result, download)
1604 finally:
1605 self._playlist_level -= 1
1606 if not self._playlist_level:
1607 self._playlist_urls.clear()
8222d8de 1608 elif result_type == 'compat_list':
c9bf4114
PH
1609 self.report_warning(
1610 'Extractor %s returned a compat_list result. '
1611 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1612
8222d8de 1613 def _fixup(r):
b868936c 1614 self.add_extra_info(r, {
1615 'extractor': ie_result['extractor'],
1616 'webpage_url': ie_result['webpage_url'],
1617 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1618 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1619 'extractor_key': ie_result['extractor_key'],
1620 })
8222d8de
JMF
1621 return r
1622 ie_result['entries'] = [
b6c45014 1623 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1624 for r in ie_result['entries']
1625 ]
1626 return ie_result
1627 else:
1628 raise Exception('Invalid result type: %s' % result_type)
1629
e92caff5 1630 def _ensure_dir_exists(self, path):
1631 return make_dir(path, self.report_error)
1632
3b603dbd 1633 @staticmethod
1634 def _playlist_infodict(ie_result, **kwargs):
1635 return {
1636 **ie_result,
1637 'playlist': ie_result.get('title') or ie_result.get('id'),
1638 'playlist_id': ie_result.get('id'),
1639 'playlist_title': ie_result.get('title'),
1640 'playlist_uploader': ie_result.get('uploader'),
1641 'playlist_uploader_id': ie_result.get('uploader_id'),
1642 'playlist_index': 0,
1643 **kwargs,
1644 }
1645
30a074c2 1646 def __process_playlist(self, ie_result, download):
1647 # We process each entry in the playlist
1648 playlist = ie_result.get('title') or ie_result.get('id')
1649 self.to_screen('[download] Downloading playlist: %s' % playlist)
1650
498f5606 1651 if 'entries' not in ie_result:
aa9369a2 1652 raise EntryNotInPlaylist('There are no entries')
7c7f7161 1653
1654 MissingEntry = object()
498f5606 1655 incomplete_entries = bool(ie_result.get('requested_entries'))
1656 if incomplete_entries:
bf5f605e 1657 def fill_missing_entries(entries, indices):
7c7f7161 1658 ret = [MissingEntry] * max(indices)
bf5f605e 1659 for i, entry in zip(indices, entries):
498f5606 1660 ret[i - 1] = entry
1661 return ret
1662 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
02fd60d3 1663
30a074c2 1664 playlist_results = []
1665
56a8fb4f 1666 playliststart = self.params.get('playliststart', 1)
30a074c2 1667 playlistend = self.params.get('playlistend')
1668 # For backwards compatibility, interpret -1 as whole list
1669 if playlistend == -1:
1670 playlistend = None
1671
1672 playlistitems_str = self.params.get('playlist_items')
1673 playlistitems = None
1674 if playlistitems_str is not None:
1675 def iter_playlistitems(format):
1676 for string_segment in format.split(','):
1677 if '-' in string_segment:
1678 start, end = string_segment.split('-')
1679 for item in range(int(start), int(end) + 1):
1680 yield int(item)
1681 else:
1682 yield int(string_segment)
1683 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1684
1685 ie_entries = ie_result['entries']
8e5fecc8 1686 if isinstance(ie_entries, list):
ed8d87f9 1687 playlist_count = len(ie_entries)
f0d785d3 1688 msg = f'Collected {playlist_count} videos; downloading %d of them'
1689 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1690
8e5fecc8 1691 def get_entry(i):
1692 return ie_entries[i - 1]
1693 else:
f0d785d3 1694 msg = 'Downloading %d videos'
c586f9e8 1695 if not isinstance(ie_entries, (PagedList, LazyList)):
8e5fecc8 1696 ie_entries = LazyList(ie_entries)
d37707bd 1697 elif isinstance(ie_entries, InAdvancePagedList):
1698 if ie_entries._pagesize == 1:
1699 playlist_count = ie_entries._pagecount
8e5fecc8 1700
1701 def get_entry(i):
1702 return YoutubeDL.__handle_extraction_exceptions(
1703 lambda self, i: ie_entries[i - 1]
1704 )(self, i)
50fed816 1705
f0d785d3 1706 entries, broken = [], False
ff1c7fc9 1707 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1708 for i in items:
1709 if i == 0:
1710 continue
56a8fb4f 1711 if playlistitems is None and playlistend is not None and playlistend < i:
1712 break
1713 entry = None
1714 try:
50fed816 1715 entry = get_entry(i)
7c7f7161 1716 if entry is MissingEntry:
498f5606 1717 raise EntryNotInPlaylist()
56a8fb4f 1718 except (IndexError, EntryNotInPlaylist):
1719 if incomplete_entries:
aa9369a2 1720 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
56a8fb4f 1721 elif not playlistitems:
1722 break
1723 entries.append(entry)
120fe513 1724 try:
1725 if entry is not None:
e5a998f3 1726 # TODO: Add auto-generated fields
120fe513 1727 self._match_entry(entry, incomplete=True, silent=True)
1728 except (ExistingVideoReached, RejectedVideoReached):
f0d785d3 1729 broken = True
120fe513 1730 break
56a8fb4f 1731 ie_result['entries'] = entries
30a074c2 1732
56a8fb4f 1733 # Save playlist_index before re-ordering
1734 entries = [
9e598870 1735 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
56a8fb4f 1736 for i, entry in enumerate(entries, 1)
1737 if entry is not None]
1738 n_entries = len(entries)
498f5606 1739
f0d785d3 1740 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1741 ie_result['playlist_count'] = n_entries
1742
e08a85d8 1743 if not playlistitems and (playliststart != 1 or playlistend):
56a8fb4f 1744 playlistitems = list(range(playliststart, playliststart + n_entries))
498f5606 1745 ie_result['requested_entries'] = playlistitems
1746
e08a85d8 1747 _infojson_written = False
0bfc53d0 1748 write_playlist_files = self.params.get('allow_playlist_files', True)
1749 if write_playlist_files and self.params.get('list_thumbnails'):
1750 self.list_thumbnails(ie_result)
1751 if write_playlist_files and not self.params.get('simulate'):
3b603dbd 1752 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
e08a85d8 1753 _infojson_written = self._write_info_json(
1754 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1755 if _infojson_written is None:
80c03fa9 1756 return
1757 if self._write_description('playlist', ie_result,
1758 self.prepare_filename(ie_copy, 'pl_description')) is None:
1759 return
681de68e 1760 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1761 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1762
1763 if self.params.get('playlistreverse', False):
1764 entries = entries[::-1]
30a074c2 1765 if self.params.get('playlistrandom', False):
1766 random.shuffle(entries)
1767
1768 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1769
86e5f3ed 1770 self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
26e2805c 1771 failures = 0
1772 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
71729754 1773 for i, entry_tuple in enumerate(entries, 1):
1774 playlist_index, entry = entry_tuple
81139999 1775 if 'playlist-index' in self.params.get('compat_opts', []):
1776 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
19a03940 1777 self.to_screen('[download] Downloading video %s of %s' % (
1778 self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
30a074c2 1779 # This __x_forwarded_for_ip thing is a bit ugly but requires
1780 # minimal changes
1781 if x_forwarded_for:
1782 entry['__x_forwarded_for_ip'] = x_forwarded_for
1783 extra = {
1784 'n_entries': n_entries,
0a5a191a 1785 '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
f0d785d3 1786 'playlist_count': ie_result.get('playlist_count'),
71729754 1787 'playlist_index': playlist_index,
1788 'playlist_autonumber': i,
30a074c2 1789 'playlist': playlist,
1790 'playlist_id': ie_result.get('id'),
1791 'playlist_title': ie_result.get('title'),
1792 'playlist_uploader': ie_result.get('uploader'),
1793 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1794 'extractor': ie_result['extractor'],
1795 'webpage_url': ie_result['webpage_url'],
1796 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1797 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1798 'extractor_key': ie_result['extractor_key'],
1799 }
1800
1801 if self._match_entry(entry, incomplete=True) is not None:
1802 continue
1803
1804 entry_result = self.__process_iterable_entry(entry, download, extra)
26e2805c 1805 if not entry_result:
1806 failures += 1
1807 if failures >= max_failures:
1808 self.report_error(
1809 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1810 break
30a074c2 1811 playlist_results.append(entry_result)
1812 ie_result['entries'] = playlist_results
e08a85d8 1813
1814 # Write the updated info to json
cb96c5be 1815 if _infojson_written is True and self._write_info_json(
e08a85d8 1816 'updated playlist', ie_result,
1817 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1818 return
ca30f449 1819
ed5835b4 1820 ie_result = self.run_all_pps('playlist', ie_result)
1821 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
30a074c2 1822 return ie_result
1823
a0566bbf 1824 @__handle_extraction_exceptions
1825 def __process_iterable_entry(self, entry, download, extra_info):
1826 return self.process_ie_result(
1827 entry, download=download, extra_info=extra_info)
1828
67134eab
JMF
1829 def _build_format_filter(self, filter_spec):
1830 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1831
1832 OPERATORS = {
1833 '<': operator.lt,
1834 '<=': operator.le,
1835 '>': operator.gt,
1836 '>=': operator.ge,
1837 '=': operator.eq,
1838 '!=': operator.ne,
1839 }
67134eab 1840 operator_rex = re.compile(r'''(?x)\s*
187986a8 1841 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1842 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1843 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1844 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1845 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1846 if m:
1847 try:
1848 comparison_value = int(m.group('value'))
1849 except ValueError:
1850 comparison_value = parse_filesize(m.group('value'))
1851 if comparison_value is None:
1852 comparison_value = parse_filesize(m.group('value') + 'B')
1853 if comparison_value is None:
1854 raise ValueError(
1855 'Invalid value %r in format specification %r' % (
67134eab 1856 m.group('value'), filter_spec))
9ddb6925
S
1857 op = OPERATORS[m.group('op')]
1858
083c9df9 1859 if not m:
9ddb6925
S
1860 STR_OPERATORS = {
1861 '=': operator.eq,
10d33b34
YCH
1862 '^=': lambda attr, value: attr.startswith(value),
1863 '$=': lambda attr, value: attr.endswith(value),
1864 '*=': lambda attr, value: value in attr,
1ce9a3cb 1865 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1866 }
187986a8 1867 str_operator_rex = re.compile(r'''(?x)\s*
1868 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1869 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1870 (?P<quote>["'])?
1871 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1872 (?(quote)(?P=quote))\s*
9ddb6925 1873 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1874 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1875 if m:
1ce9a3cb
LF
1876 if m.group('op') == '~=':
1877 comparison_value = re.compile(m.group('value'))
1878 else:
1879 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1880 str_op = STR_OPERATORS[m.group('op')]
1881 if m.group('negation'):
e118a879 1882 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1883 else:
1884 op = str_op
083c9df9 1885
9ddb6925 1886 if not m:
187986a8 1887 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1888
1889 def _filter(f):
1890 actual_value = f.get(m.group('key'))
1891 if actual_value is None:
1892 return m.group('none_inclusive')
1893 return op(actual_value, comparison_value)
67134eab
JMF
1894 return _filter
1895
9f1a1c36 1896 def _check_formats(self, formats):
1897 for f in formats:
1898 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1899 path = self.get_output_path('temp')
1900 if not self._ensure_dir_exists(f'{path}/'):
1901 continue
1902 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1903 temp_file.close()
1904 try:
1905 success, _ = self.dl(temp_file.name, f, test=True)
1906 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1907 success = False
1908 finally:
1909 if os.path.exists(temp_file.name):
1910 try:
1911 os.remove(temp_file.name)
1912 except OSError:
1913 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1914 if success:
1915 yield f
1916 else:
1917 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1918
0017d9ad 1919 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1920
af0f7428
S
1921 def can_merge():
1922 merger = FFmpegMergerPP(self)
1923 return merger.available and merger.can_merge()
1924
91ebc640 1925 prefer_best = (
b7b04c78 1926 not self.params.get('simulate')
91ebc640 1927 and download
1928 and (
1929 not can_merge()
21633673 1930 or info_dict.get('is_live') and not self.params.get('live_from_start')
de6000d9 1931 or self.outtmpl_dict['default'] == '-'))
53ed7066 1932 compat = (
1933 prefer_best
1934 or self.params.get('allow_multiple_audio_streams', False)
1935 or 'format-spec' in self.params.get('compat_opts', []))
91ebc640 1936
1937 return (
53ed7066 1938 'best/bestvideo+bestaudio' if prefer_best
1939 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1940 else 'bestvideo+bestaudio/best')
0017d9ad 1941
67134eab
JMF
1942 def build_format_selector(self, format_spec):
1943 def syntax_error(note, start):
1944 message = (
1945 'Invalid format specification: '
86e5f3ed 1946 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
1947 return SyntaxError(message)
1948
1949 PICKFIRST = 'PICKFIRST'
1950 MERGE = 'MERGE'
1951 SINGLE = 'SINGLE'
0130afb7 1952 GROUP = 'GROUP'
67134eab
JMF
1953 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1954
91ebc640 1955 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1956 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1957
9f1a1c36 1958 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1959
67134eab
JMF
1960 def _parse_filter(tokens):
1961 filter_parts = []
1962 for type, string, start, _, _ in tokens:
1963 if type == tokenize.OP and string == ']':
1964 return ''.join(filter_parts)
1965 else:
1966 filter_parts.append(string)
1967
232541df 1968 def _remove_unused_ops(tokens):
17cc1534 1969 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1970 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1971 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1972 last_string, last_start, last_end, last_line = None, None, None, None
1973 for type, string, start, end, line in tokens:
1974 if type == tokenize.OP and string == '[':
1975 if last_string:
1976 yield tokenize.NAME, last_string, last_start, last_end, last_line
1977 last_string = None
1978 yield type, string, start, end, line
1979 # everything inside brackets will be handled by _parse_filter
1980 for type, string, start, end, line in tokens:
1981 yield type, string, start, end, line
1982 if type == tokenize.OP and string == ']':
1983 break
1984 elif type == tokenize.OP and string in ALLOWED_OPS:
1985 if last_string:
1986 yield tokenize.NAME, last_string, last_start, last_end, last_line
1987 last_string = None
1988 yield type, string, start, end, line
1989 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1990 if not last_string:
1991 last_string = string
1992 last_start = start
1993 last_end = end
1994 else:
1995 last_string += string
1996 if last_string:
1997 yield tokenize.NAME, last_string, last_start, last_end, last_line
1998
cf2ac6df 1999 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2000 selectors = []
2001 current_selector = None
2002 for type, string, start, _, _ in tokens:
2003 # ENCODING is only defined in python 3.x
2004 if type == getattr(tokenize, 'ENCODING', None):
2005 continue
2006 elif type in [tokenize.NAME, tokenize.NUMBER]:
2007 current_selector = FormatSelector(SINGLE, string, [])
2008 elif type == tokenize.OP:
cf2ac6df
JMF
2009 if string == ')':
2010 if not inside_group:
2011 # ')' will be handled by the parentheses group
2012 tokens.restore_last_token()
67134eab 2013 break
cf2ac6df 2014 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
2015 tokens.restore_last_token()
2016 break
cf2ac6df
JMF
2017 elif inside_choice and string == ',':
2018 tokens.restore_last_token()
2019 break
2020 elif string == ',':
0a31a350
JMF
2021 if not current_selector:
2022 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2023 selectors.append(current_selector)
2024 current_selector = None
2025 elif string == '/':
d96d604e
JMF
2026 if not current_selector:
2027 raise syntax_error('"/" must follow a format selector', start)
67134eab 2028 first_choice = current_selector
cf2ac6df 2029 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2030 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
2031 elif string == '[':
2032 if not current_selector:
2033 current_selector = FormatSelector(SINGLE, 'best', [])
2034 format_filter = _parse_filter(tokens)
2035 current_selector.filters.append(format_filter)
0130afb7
JMF
2036 elif string == '(':
2037 if current_selector:
2038 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2039 group = _parse_format_selection(tokens, inside_group=True)
2040 current_selector = FormatSelector(GROUP, group, [])
67134eab 2041 elif string == '+':
d03cfdce 2042 if not current_selector:
2043 raise syntax_error('Unexpected "+"', start)
2044 selector_1 = current_selector
2045 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2046 if not selector_2:
2047 raise syntax_error('Expected a selector', start)
2048 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2049 else:
86e5f3ed 2050 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2051 elif type == tokenize.ENDMARKER:
2052 break
2053 if current_selector:
2054 selectors.append(current_selector)
2055 return selectors
2056
f8d4ad9a 2057 def _merge(formats_pair):
2058 format_1, format_2 = formats_pair
2059
2060 formats_info = []
2061 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2062 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2063
2064 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2065 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2066 for (i, fmt_info) in enumerate(formats_info):
551f9388 2067 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2068 formats_info.pop(i)
2069 continue
2070 for aud_vid in ['audio', 'video']:
f8d4ad9a 2071 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2072 if get_no_more[aud_vid]:
2073 formats_info.pop(i)
f5510afe 2074 break
f8d4ad9a 2075 get_no_more[aud_vid] = True
2076
2077 if len(formats_info) == 1:
2078 return formats_info[0]
2079
2080 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2081 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2082
2083 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2084 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2085
2086 output_ext = self.params.get('merge_output_format')
2087 if not output_ext:
2088 if the_only_video:
2089 output_ext = the_only_video['ext']
2090 elif the_only_audio and not video_fmts:
2091 output_ext = the_only_audio['ext']
2092 else:
2093 output_ext = 'mkv'
2094
975a0d0d 2095 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2096
f8d4ad9a 2097 new_dict = {
2098 'requested_formats': formats_info,
975a0d0d 2099 'format': '+'.join(filtered('format')),
2100 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2101 'ext': output_ext,
975a0d0d 2102 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2103 'language': '+'.join(orderedSet(filtered('language'))) or None,
2104 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2105 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2106 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2107 }
2108
2109 if the_only_video:
2110 new_dict.update({
2111 'width': the_only_video.get('width'),
2112 'height': the_only_video.get('height'),
2113 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2114 'fps': the_only_video.get('fps'),
49a57e70 2115 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2116 'vcodec': the_only_video.get('vcodec'),
2117 'vbr': the_only_video.get('vbr'),
2118 'stretched_ratio': the_only_video.get('stretched_ratio'),
2119 })
2120
2121 if the_only_audio:
2122 new_dict.update({
2123 'acodec': the_only_audio.get('acodec'),
2124 'abr': the_only_audio.get('abr'),
975a0d0d 2125 'asr': the_only_audio.get('asr'),
f8d4ad9a 2126 })
2127
2128 return new_dict
2129
e8e73840 2130 def _check_formats(formats):
981052c9 2131 if not check_formats:
2132 yield from formats
b5ac45b1 2133 return
9f1a1c36 2134 yield from self._check_formats(formats)
e8e73840 2135
67134eab 2136 def _build_selector_function(selector):
909d24dd 2137 if isinstance(selector, list): # ,
67134eab
JMF
2138 fs = [_build_selector_function(s) for s in selector]
2139
317f7ab6 2140 def selector_function(ctx):
67134eab 2141 for f in fs:
981052c9 2142 yield from f(ctx)
67134eab 2143 return selector_function
909d24dd 2144
2145 elif selector.type == GROUP: # ()
0130afb7 2146 selector_function = _build_selector_function(selector.selector)
909d24dd 2147
2148 elif selector.type == PICKFIRST: # /
67134eab
JMF
2149 fs = [_build_selector_function(s) for s in selector.selector]
2150
317f7ab6 2151 def selector_function(ctx):
67134eab 2152 for f in fs:
317f7ab6 2153 picked_formats = list(f(ctx))
67134eab
JMF
2154 if picked_formats:
2155 return picked_formats
2156 return []
67134eab 2157
981052c9 2158 elif selector.type == MERGE: # +
2159 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2160
2161 def selector_function(ctx):
adbc4ec4 2162 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2163 yield _merge(pair)
2164
909d24dd 2165 elif selector.type == SINGLE: # atom
598d185d 2166 format_spec = selector.selector or 'best'
909d24dd 2167
f8d4ad9a 2168 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2169 if format_spec == 'all':
2170 def selector_function(ctx):
9222c381 2171 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2172 elif format_spec == 'mergeall':
2173 def selector_function(ctx):
316f2650 2174 formats = list(_check_formats(
2175 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2176 if not formats:
2177 return
921b76ca 2178 merged_format = formats[-1]
2179 for f in formats[-2::-1]:
f8d4ad9a 2180 merged_format = _merge((merged_format, f))
2181 yield merged_format
909d24dd 2182
2183 else:
85e801a9 2184 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2185 mobj = re.match(
2186 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2187 format_spec)
2188 if mobj is not None:
2189 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2190 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2191 format_type = (mobj.group('type') or [None])[0]
2192 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2193 format_modified = mobj.group('mod') is not None
909d24dd 2194
2195 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2196 _filter_f = (
eff63539 2197 (lambda f: f.get('%scodec' % format_type) != 'none')
2198 if format_type and format_modified # bv*, ba*, wv*, wa*
2199 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2200 if format_type # bv, ba, wv, wa
2201 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2202 if not format_modified # b, w
8326b00a 2203 else lambda f: True) # b*, w*
2204 filter_f = lambda f: _filter_f(f) and (
2205 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2206 else:
48ee10ee 2207 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2208 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2209 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2210 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2211 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2212 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2213 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2214 else:
b5ae35ee 2215 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2216
2217 def selector_function(ctx):
2218 formats = list(ctx['formats'])
909d24dd 2219 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2220 if not matches:
2221 if format_fallback and ctx['incomplete_formats']:
2222 # for extractors with incomplete formats (audio only (soundcloud)
2223 # or video only (imgur)) best/worst will fallback to
2224 # best/worst {video,audio}-only format
2225 matches = formats
2226 elif seperate_fallback and not ctx['has_merged_format']:
2227 # for compatibility with youtube-dl when there is no pre-merged format
2228 matches = list(filter(seperate_fallback, formats))
981052c9 2229 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2230 try:
e8e73840 2231 yield matches[format_idx - 1]
4abea8ca 2232 except LazyList.IndexError:
981052c9 2233 return
083c9df9 2234
67134eab 2235 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2236
317f7ab6 2237 def final_selector(ctx):
adbc4ec4 2238 ctx_copy = dict(ctx)
67134eab 2239 for _filter in filters:
317f7ab6
S
2240 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2241 return selector_function(ctx_copy)
67134eab 2242 return final_selector
083c9df9 2243
0f06bcd7 2244 stream = io.BytesIO(format_spec.encode())
0130afb7 2245 try:
f9934b96 2246 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2247 except tokenize.TokenError:
2248 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2249
86e5f3ed 2250 class TokenIterator:
0130afb7
JMF
2251 def __init__(self, tokens):
2252 self.tokens = tokens
2253 self.counter = 0
2254
2255 def __iter__(self):
2256 return self
2257
2258 def __next__(self):
2259 if self.counter >= len(self.tokens):
2260 raise StopIteration()
2261 value = self.tokens[self.counter]
2262 self.counter += 1
2263 return value
2264
2265 next = __next__
2266
2267 def restore_last_token(self):
2268 self.counter -= 1
2269
2270 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2271 return _build_selector_function(parsed_selector)
a9c58ad9 2272
e5660ee6 2273 def _calc_headers(self, info_dict):
8b7539d2 2274 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6
JMF
2275
2276 cookies = self._calc_cookies(info_dict)
2277 if cookies:
2278 res['Cookie'] = cookies
2279
0016b84e
S
2280 if 'X-Forwarded-For' not in res:
2281 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2282 if x_forwarded_for_ip:
2283 res['X-Forwarded-For'] = x_forwarded_for_ip
2284
e5660ee6
JMF
2285 return res
2286
2287 def _calc_cookies(self, info_dict):
5c2266df 2288 pr = sanitized_Request(info_dict['url'])
e5660ee6 2289 self.cookiejar.add_cookie_header(pr)
662435f7 2290 return pr.get_header('Cookie')
e5660ee6 2291
9f1a1c36 2292 def _sort_thumbnails(self, thumbnails):
2293 thumbnails.sort(key=lambda t: (
2294 t.get('preference') if t.get('preference') is not None else -1,
2295 t.get('width') if t.get('width') is not None else -1,
2296 t.get('height') if t.get('height') is not None else -1,
2297 t.get('id') if t.get('id') is not None else '',
2298 t.get('url')))
2299
b0249bca 2300 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2301 thumbnails = info_dict.get('thumbnails')
2302 if thumbnails is None:
2303 thumbnail = info_dict.get('thumbnail')
2304 if thumbnail:
2305 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2306 if not thumbnails:
2307 return
2308
2309 def check_thumbnails(thumbnails):
2310 for t in thumbnails:
2311 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2312 try:
2313 self.urlopen(HEADRequest(t['url']))
2314 except network_exceptions as err:
2315 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2316 continue
2317 yield t
2318
2319 self._sort_thumbnails(thumbnails)
2320 for i, t in enumerate(thumbnails):
2321 if t.get('id') is None:
2322 t['id'] = '%d' % i
2323 if t.get('width') and t.get('height'):
2324 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2325 t['url'] = sanitize_url(t['url'])
2326
2327 if self.params.get('check_formats') is True:
282f5709 2328 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2329 else:
2330 info_dict['thumbnails'] = thumbnails
bc516a3f 2331
03f83004
LNO
2332 def _fill_common_fields(self, info_dict, is_video=True):
2333 # TODO: move sanitization here
2334 if is_video:
2335 # playlists are allowed to lack "title"
d4736fdb 2336 title = info_dict.get('title', NO_DEFAULT)
2337 if title is NO_DEFAULT:
03f83004
LNO
2338 raise ExtractorError('Missing "title" field in extractor result',
2339 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2340 info_dict['fulltitle'] = title
2341 if not title:
2342 if title == '':
2343 self.write_debug('Extractor gave empty title. Creating a generic title')
2344 else:
2345 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2346 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2347
2348 if info_dict.get('duration') is not None:
2349 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2350
2351 for ts_key, date_key in (
2352 ('timestamp', 'upload_date'),
2353 ('release_timestamp', 'release_date'),
2354 ('modified_timestamp', 'modified_date'),
2355 ):
2356 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2357 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2358 # see http://bugs.python.org/issue1646728)
19a03940 2359 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2360 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2361 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2362
2363 live_keys = ('is_live', 'was_live')
2364 live_status = info_dict.get('live_status')
2365 if live_status is None:
2366 for key in live_keys:
2367 if info_dict.get(key) is False:
2368 continue
2369 if info_dict.get(key):
2370 live_status = key
2371 break
2372 if all(info_dict.get(key) is False for key in live_keys):
2373 live_status = 'not_live'
2374 if live_status:
2375 info_dict['live_status'] = live_status
2376 for key in live_keys:
2377 if info_dict.get(key) is None:
2378 info_dict[key] = (live_status == key)
2379
2380 # Auto generate title fields corresponding to the *_number fields when missing
2381 # in order to always have clean titles. This is very common for TV series.
2382 for field in ('chapter', 'season', 'episode'):
2383 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2384 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2385
dd82ffea
JMF
2386 def process_video_result(self, info_dict, download=True):
2387 assert info_dict.get('_type', 'video') == 'video'
9c906919 2388 self._num_videos += 1
dd82ffea 2389
bec1fad2 2390 if 'id' not in info_dict:
fc08bdd6 2391 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2392 elif not info_dict.get('id'):
2393 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2394
c9969434
S
2395 def report_force_conversion(field, field_not, conversion):
2396 self.report_warning(
2397 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2398 % (field, field_not, conversion))
2399
2400 def sanitize_string_field(info, string_field):
2401 field = info.get(string_field)
2402 if field is None or isinstance(field, compat_str):
2403 return
2404 report_force_conversion(string_field, 'a string', 'string')
2405 info[string_field] = compat_str(field)
2406
2407 def sanitize_numeric_fields(info):
2408 for numeric_field in self._NUMERIC_FIELDS:
2409 field = info.get(numeric_field)
f9934b96 2410 if field is None or isinstance(field, (int, float)):
c9969434
S
2411 continue
2412 report_force_conversion(numeric_field, 'numeric', 'int')
2413 info[numeric_field] = int_or_none(field)
2414
2415 sanitize_string_field(info_dict, 'id')
2416 sanitize_numeric_fields(info_dict)
4c3f8c3f 2417 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2418 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2419
dd82ffea
JMF
2420 if 'playlist' not in info_dict:
2421 # It isn't part of a playlist
2422 info_dict['playlist'] = None
2423 info_dict['playlist_index'] = None
2424
bc516a3f 2425 self._sanitize_thumbnails(info_dict)
d5519808 2426
536a55da 2427 thumbnail = info_dict.get('thumbnail')
bc516a3f 2428 thumbnails = info_dict.get('thumbnails')
536a55da
S
2429 if thumbnail:
2430 info_dict['thumbnail'] = sanitize_url(thumbnail)
2431 elif thumbnails:
d5519808
PH
2432 info_dict['thumbnail'] = thumbnails[-1]['url']
2433
ae30b840 2434 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2435 info_dict['display_id'] = info_dict['id']
2436
03f83004 2437 self._fill_common_fields(info_dict)
33d2fc2f 2438
05108a49
S
2439 for cc_kind in ('subtitles', 'automatic_captions'):
2440 cc = info_dict.get(cc_kind)
2441 if cc:
2442 for _, subtitle in cc.items():
2443 for subtitle_format in subtitle:
2444 if subtitle_format.get('url'):
2445 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2446 if subtitle_format.get('ext') is None:
2447 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2448
2449 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2450 subtitles = info_dict.get('subtitles')
4bba3716 2451
360e1ca5 2452 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2453 info_dict['id'], subtitles, automatic_captions)
a504ced0 2454
dd82ffea
JMF
2455 if info_dict.get('formats') is None:
2456 # There's only one format available
2457 formats = [info_dict]
2458 else:
2459 formats = info_dict['formats']
2460
0a5a191a 2461 # or None ensures --clean-infojson removes it
2462 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2463 if not self.params.get('allow_unplayable_formats'):
2464 formats = [f for f in formats if not f.get('has_drm')]
0a5a191a 2465 if info_dict['_has_drm'] and all(
c0b6e5c7 2466 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2467 self.report_warning(
2468 'This video is DRM protected and only images are available for download. '
2469 'Use --list-formats to see them')
88acdbc2 2470
319b6059 2471 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2472 if not get_from_start:
2473 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2474 if info_dict.get('is_live') and formats:
adbc4ec4 2475 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2476 if get_from_start and not formats:
a44ca5a4 2477 self.raise_no_formats(info_dict, msg=(
2478 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2479 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2480
db95dc13 2481 if not formats:
1151c407 2482 self.raise_no_formats(info_dict)
db95dc13 2483
73af5cc8
S
2484 def is_wellformed(f):
2485 url = f.get('url')
a5ac0c47 2486 if not url:
73af5cc8
S
2487 self.report_warning(
2488 '"url" field is missing or empty - skipping format, '
2489 'there is an error in extractor')
a5ac0c47
S
2490 return False
2491 if isinstance(url, bytes):
2492 sanitize_string_field(f, 'url')
2493 return True
73af5cc8
S
2494
2495 # Filter out malformed formats for better extraction robustness
2496 formats = list(filter(is_wellformed, formats))
2497
181c7053
S
2498 formats_dict = {}
2499
dd82ffea 2500 # We check that all the formats have the format and format_id fields
db95dc13 2501 for i, format in enumerate(formats):
c9969434
S
2502 sanitize_string_field(format, 'format_id')
2503 sanitize_numeric_fields(format)
dcf77cf1 2504 format['url'] = sanitize_url(format['url'])
e74e3b63 2505 if not format.get('format_id'):
8016c922 2506 format['format_id'] = compat_str(i)
e2effb08
S
2507 else:
2508 # Sanitize format_id from characters used in format selector expression
ec85ded8 2509 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2510 format_id = format['format_id']
2511 if format_id not in formats_dict:
2512 formats_dict[format_id] = []
2513 formats_dict[format_id].append(format)
2514
2515 # Make sure all formats have unique format_id
03b4de72 2516 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2517 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2518 ambigious_id = len(ambiguous_formats) > 1
2519 for i, format in enumerate(ambiguous_formats):
2520 if ambigious_id:
181c7053 2521 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2522 if format.get('ext') is None:
2523 format['ext'] = determine_ext(format['url']).lower()
2524 # Ensure there is no conflict between id and ext in format selection
2525 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2526 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2527 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2528
2529 for i, format in enumerate(formats):
8c51aa65 2530 if format.get('format') is None:
6febd1c1 2531 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2532 id=format['format_id'],
2533 res=self.format_resolution(format),
b868936c 2534 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2535 )
6f0be937 2536 if format.get('protocol') is None:
b5559424 2537 format['protocol'] = determine_protocol(format)
239df021 2538 if format.get('resolution') is None:
2539 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2540 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2541 format['dynamic_range'] = 'SDR'
f2fe69c7 2542 if (info_dict.get('duration') and format.get('tbr')
2543 and not format.get('filesize') and not format.get('filesize_approx')):
2544 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2545
e5660ee6
JMF
2546 # Add HTTP headers, so that external programs can use them from the
2547 # json output
2548 full_format_info = info_dict.copy()
2549 full_format_info.update(format)
2550 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2551 # Remove private housekeeping stuff
2552 if '__x_forwarded_for_ip' in info_dict:
2553 del info_dict['__x_forwarded_for_ip']
dd82ffea 2554
9f1a1c36 2555 if self.params.get('check_formats') is True:
282f5709 2556 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2557
88acdbc2 2558 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2559 # only set the 'formats' fields if the original info_dict list them
2560 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2561 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2562 # which can't be exported to json
b3d9ef88 2563 info_dict['formats'] = formats
4ec82a72 2564
2565 info_dict, _ = self.pre_process(info_dict)
2566
6db9c4d5 2567 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2568 return info_dict
2569
2570 self.post_extract(info_dict)
2571 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2572
093a1710 2573 # The pre-processors may have modified the formats
2574 formats = info_dict.get('formats', [info_dict])
2575
fa9f30b8 2576 list_only = self.params.get('simulate') is None and (
2577 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2578 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2579 if self.params.get('list_thumbnails'):
2580 self.list_thumbnails(info_dict)
b7b04c78 2581 if self.params.get('listsubtitles'):
2582 if 'automatic_captions' in info_dict:
2583 self.list_subtitles(
2584 info_dict['id'], automatic_captions, 'automatic captions')
2585 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2586 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2587 self.list_formats(info_dict)
169dbde9 2588 if list_only:
b7b04c78 2589 # Without this printing, -F --print-json will not work
169dbde9 2590 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
bfaae0a7 2591 return
2592
187986a8 2593 format_selector = self.format_selector
2594 if format_selector is None:
0017d9ad 2595 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2596 self.write_debug('Default format spec: %s' % req_format)
187986a8 2597 format_selector = self.build_format_selector(req_format)
317f7ab6 2598
fa9f30b8 2599 while True:
2600 if interactive_format_selection:
2601 req_format = input(
2602 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2603 try:
2604 format_selector = self.build_format_selector(req_format)
2605 except SyntaxError as err:
2606 self.report_error(err, tb=False, is_error=False)
2607 continue
2608
85e801a9 2609 formats_to_download = list(format_selector({
fa9f30b8 2610 'formats': formats,
85e801a9 2611 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2612 'incomplete_formats': (
2613 # All formats are video-only or
2614 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2615 # all formats are audio-only
2616 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2617 }))
fa9f30b8 2618 if interactive_format_selection and not formats_to_download:
2619 self.report_error('Requested format is not available', tb=False, is_error=False)
2620 continue
2621 break
317f7ab6 2622
dd82ffea 2623 if not formats_to_download:
b7da73eb 2624 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2625 raise ExtractorError(
2626 'Requested format is not available. Use --list-formats for a list of available formats',
2627 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2628 self.report_warning('Requested format is not available')
2629 # Process what we can, even without any available formats.
2630 formats_to_download = [{}]
a13e6848 2631
b62fa6d7 2632 best_format = formats_to_download[-1]
2633 if download:
2634 if best_format:
2635 self.to_screen(
2636 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2637 + ', '.join([f['format_id'] for f in formats_to_download]))
a13e6848 2638 max_downloads_reached = False
f46e2f9d 2639 for i, fmt in enumerate(formats_to_download):
09b49e1f 2640 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
b7da73eb 2641 new_info.update(fmt)
a13e6848 2642 try:
2643 self.process_info(new_info)
2644 except MaxDownloadsReached:
2645 max_downloads_reached = True
f46e2f9d 2646 # Remove copied info
2647 for key, val in tuple(new_info.items()):
2648 if info_dict.get(key) == val:
2649 new_info.pop(key)
a13e6848 2650 if max_downloads_reached:
2651 break
ebed8b37 2652
86e5f3ed 2653 write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
a13e6848 2654 assert write_archive.issubset({True, False, 'ignore'})
2655 if True in write_archive and False not in write_archive:
2656 self.record_download_archive(info_dict)
be72c624 2657
2658 info_dict['requested_downloads'] = formats_to_download
ed5835b4 2659 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2660 if max_downloads_reached:
2661 raise MaxDownloadsReached()
ebed8b37 2662
49a57e70 2663 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2664 info_dict.update(best_format)
dd82ffea
JMF
2665 return info_dict
2666
98c70d6f 2667 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2668 """Select the requested subtitles and their format"""
d8a58ddc 2669 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2670 if normal_subtitles and self.params.get('writesubtitles'):
2671 available_subs.update(normal_subtitles)
d8a58ddc 2672 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2673 if automatic_captions and self.params.get('writeautomaticsub'):
2674 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2675 if lang not in available_subs:
2676 available_subs[lang] = cap_info
2677
4d171848
JMF
2678 if (not self.params.get('writesubtitles') and not
2679 self.params.get('writeautomaticsub') or not
2680 available_subs):
2681 return None
a504ced0 2682
d8a58ddc 2683 all_sub_langs = tuple(available_subs.keys())
a504ced0 2684 if self.params.get('allsubtitles', False):
c32b0aab 2685 requested_langs = all_sub_langs
2686 elif self.params.get('subtitleslangs', False):
77c4a9ef 2687 # A list is used so that the order of languages will be the same as
2688 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2689 requested_langs = []
2690 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2691 discard = lang_re[0] == '-'
c32b0aab 2692 if discard:
77c4a9ef 2693 lang_re = lang_re[1:]
3aa91540 2694 if lang_re == 'all':
2695 if discard:
2696 requested_langs = []
2697 else:
2698 requested_langs.extend(all_sub_langs)
2699 continue
77c4a9ef 2700 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2701 if discard:
2702 for lang in current_langs:
77c4a9ef 2703 while lang in requested_langs:
2704 requested_langs.remove(lang)
c32b0aab 2705 else:
77c4a9ef 2706 requested_langs.extend(current_langs)
2707 requested_langs = orderedSet(requested_langs)
d8a58ddc 2708 elif normal_sub_langs:
2709 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2710 else:
d8a58ddc 2711 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2712 if requested_langs:
2713 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2714
2715 formats_query = self.params.get('subtitlesformat', 'best')
2716 formats_preference = formats_query.split('/') if formats_query else []
2717 subs = {}
2718 for lang in requested_langs:
2719 formats = available_subs.get(lang)
2720 if formats is None:
86e5f3ed 2721 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2722 continue
a504ced0
JMF
2723 for ext in formats_preference:
2724 if ext == 'best':
2725 f = formats[-1]
2726 break
2727 matches = list(filter(lambda f: f['ext'] == ext, formats))
2728 if matches:
2729 f = matches[-1]
2730 break
2731 else:
2732 f = formats[-1]
2733 self.report_warning(
2734 'No subtitle format found matching "%s" for language %s, '
2735 'using %s' % (formats_query, lang, f['ext']))
2736 subs[lang] = f
2737 return subs
2738
bb66c247 2739 def _forceprint(self, key, info_dict):
2740 if info_dict is None:
2741 return
2742 info_copy = info_dict.copy()
2743 info_copy['formats_table'] = self.render_formats_table(info_dict)
2744 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2745 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2746 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2747
2748 def format_tmpl(tmpl):
2749 mobj = re.match(r'\w+(=?)$', tmpl)
2750 if mobj and mobj.group(1):
2751 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2752 elif mobj:
2753 return f'%({tmpl})s'
2754 return tmpl
8130779d 2755
bb66c247 2756 for tmpl in self.params['forceprint'].get(key, []):
2757 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2758
2759 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2760 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2761 tmpl = format_tmpl(tmpl)
2762 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2763 if self._ensure_dir_exists(filename):
86e5f3ed 2764 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2765 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2766
d06daf23 2767 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2768 def print_mandatory(field, actual_field=None):
2769 if actual_field is None:
2770 actual_field = field
d06daf23 2771 if (self.params.get('force%s' % field, False)
53c18592 2772 and (not incomplete or info_dict.get(actual_field) is not None)):
2773 self.to_stdout(info_dict[actual_field])
d06daf23
S
2774
2775 def print_optional(field):
2776 if (self.params.get('force%s' % field, False)
2777 and info_dict.get(field) is not None):
2778 self.to_stdout(info_dict[field])
2779
53c18592 2780 info_dict = info_dict.copy()
2781 if filename is not None:
2782 info_dict['filename'] = filename
2783 if info_dict.get('requested_formats') is not None:
2784 # For RTMP URLs, also include the playpath
2785 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2786 elif info_dict.get('url'):
53c18592 2787 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2788
bb66c247 2789 if (self.params.get('forcejson')
2790 or self.params['forceprint'].get('video')
2791 or self.params['print_to_file'].get('video')):
2b8a2973 2792 self.post_extract(info_dict)
bb66c247 2793 self._forceprint('video', info_dict)
53c18592 2794
d06daf23
S
2795 print_mandatory('title')
2796 print_mandatory('id')
53c18592 2797 print_mandatory('url', 'urls')
d06daf23
S
2798 print_optional('thumbnail')
2799 print_optional('description')
53c18592 2800 print_optional('filename')
b868936c 2801 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2802 self.to_stdout(formatSeconds(info_dict['duration']))
2803 print_mandatory('format')
53c18592 2804
2b8a2973 2805 if self.params.get('forcejson'):
6e84b215 2806 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2807
e8e73840 2808 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2809 if not info.get('url'):
1151c407 2810 self.raise_no_formats(info, True)
e8e73840 2811
2812 if test:
2813 verbose = self.params.get('verbose')
2814 params = {
2815 'test': True,
a169858f 2816 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2817 'verbose': verbose,
2818 'noprogress': not verbose,
2819 'nopart': True,
2820 'skip_unavailable_fragments': False,
2821 'keep_fragments': False,
2822 'overwrites': True,
2823 '_no_ytdl_file': True,
2824 }
2825 else:
2826 params = self.params
96fccc10 2827 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2828 if not test:
2829 for ph in self._progress_hooks:
2830 fd.add_progress_hook(ph)
42676437
M
2831 urls = '", "'.join(
2832 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2833 for f in info.get('requested_formats', []) or [info])
3a408f9d 2834 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2835
adbc4ec4
THD
2836 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2837 # But it may contain objects that are not deep-copyable
2838 new_info = self._copy_infodict(info)
e8e73840 2839 if new_info.get('http_headers') is None:
2840 new_info['http_headers'] = self._calc_headers(new_info)
2841 return fd.download(name, new_info, subtitle)
2842
e04938ab 2843 def existing_file(self, filepaths, *, default_overwrite=True):
2844 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2845 if existing_files and not self.params.get('overwrites', default_overwrite):
2846 return existing_files[0]
2847
2848 for file in existing_files:
2849 self.report_file_delete(file)
2850 os.remove(file)
2851 return None
2852
8222d8de 2853 def process_info(self, info_dict):
09b49e1f 2854 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2855
2856 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2857 original_infodict = info_dict
fd288278 2858
4513a41a 2859 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2860 info_dict['format'] = info_dict['ext']
2861
09b49e1f 2862 # This is mostly just for backward compatibility of process_info
2863 # As a side-effect, this allows for format-specific filters
c77495e3 2864 if self._match_entry(info_dict) is not None:
9e907ebd 2865 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2866 return
2867
09b49e1f 2868 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2869 self.post_extract(info_dict)
0c14d66a 2870 self._num_downloads += 1
8222d8de 2871
dcf64d43 2872 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2873 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2874 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2875 files_to_move = {}
8222d8de
JMF
2876
2877 # Forced printings
4513a41a 2878 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2879
ca6d59d2 2880 def check_max_downloads():
2881 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2882 raise MaxDownloadsReached()
2883
b7b04c78 2884 if self.params.get('simulate'):
9e907ebd 2885 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 2886 check_max_downloads()
8222d8de
JMF
2887 return
2888
de6000d9 2889 if full_filename is None:
8222d8de 2890 return
e92caff5 2891 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2892 return
e92caff5 2893 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2894 return
2895
80c03fa9 2896 if self._write_description('video', info_dict,
2897 self.prepare_filename(info_dict, 'description')) is None:
2898 return
2899
2900 sub_files = self._write_subtitles(info_dict, temp_filename)
2901 if sub_files is None:
2902 return
2903 files_to_move.update(dict(sub_files))
2904
2905 thumb_files = self._write_thumbnails(
2906 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2907 if thumb_files is None:
2908 return
2909 files_to_move.update(dict(thumb_files))
8222d8de 2910
80c03fa9 2911 infofn = self.prepare_filename(info_dict, 'infojson')
2912 _infojson_written = self._write_info_json('video', info_dict, infofn)
2913 if _infojson_written:
dac5df5a 2914 info_dict['infojson_filename'] = infofn
e75bb0d6 2915 # For backward compatibility, even though it was a private field
80c03fa9 2916 info_dict['__infojson_filename'] = infofn
2917 elif _infojson_written is None:
2918 return
2919
2920 # Note: Annotations are deprecated
2921 annofn = None
1fb07d10 2922 if self.params.get('writeannotations', False):
de6000d9 2923 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2924 if annofn:
e92caff5 2925 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2926 return
0c3d0f51 2927 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2928 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2929 elif not info_dict.get('annotations'):
2930 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2931 else:
2932 try:
6febd1c1 2933 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 2934 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
2935 annofile.write(info_dict['annotations'])
2936 except (KeyError, TypeError):
6febd1c1 2937 self.report_warning('There are no annotations to write.')
86e5f3ed 2938 except OSError:
6febd1c1 2939 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2940 return
1fb07d10 2941
732044af 2942 # Write internet shortcut files
08438d2c 2943 def _write_link_file(link_type):
60f3e995 2944 url = try_get(info_dict['webpage_url'], iri_to_uri)
2945 if not url:
2946 self.report_warning(
2947 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2948 return True
08438d2c 2949 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2950 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2951 return False
10e3742e 2952 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2953 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2954 return True
2955 try:
2956 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 2957 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2958 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2959 template_vars = {'url': url}
08438d2c 2960 if link_type == 'desktop':
2961 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2962 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 2963 except OSError:
08438d2c 2964 self.report_error(f'Cannot write internet shortcut {linkfn}')
2965 return False
732044af 2966 return True
2967
08438d2c 2968 write_links = {
2969 'url': self.params.get('writeurllink'),
2970 'webloc': self.params.get('writewebloclink'),
2971 'desktop': self.params.get('writedesktoplink'),
2972 }
2973 if self.params.get('writelink'):
2974 link_type = ('webloc' if sys.platform == 'darwin'
2975 else 'desktop' if sys.platform.startswith('linux')
2976 else 'url')
2977 write_links[link_type] = True
2978
2979 if any(should_write and not _write_link_file(link_type)
2980 for link_type, should_write in write_links.items()):
2981 return
732044af 2982
f46e2f9d 2983 def replace_info_dict(new_info):
2984 nonlocal info_dict
2985 if new_info == info_dict:
2986 return
2987 info_dict.clear()
2988 info_dict.update(new_info)
2989
56d868db 2990 try:
f46e2f9d 2991 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2992 replace_info_dict(new_info)
56d868db 2993 except PostProcessingError as err:
2994 self.report_error('Preprocessing: %s' % str(err))
2995 return
2996
a13e6848 2997 if self.params.get('skip_download'):
56d868db 2998 info_dict['filepath'] = temp_filename
2999 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3000 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3001 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3002 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3003 else:
3004 # Download
b868936c 3005 info_dict.setdefault('__postprocessors', [])
4340deca 3006 try:
0202b52a 3007
e04938ab 3008 def existing_video_file(*filepaths):
6b591b29 3009 ext = info_dict.get('ext')
e04938ab 3010 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3011 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3012 default_overwrite=False)
3013 if file:
3014 info_dict['ext'] = os.path.splitext(file)[1][1:]
3015 return file
0202b52a 3016
3017 success = True
4340deca 3018 if info_dict.get('requested_formats') is not None:
81cd954a
S
3019
3020 def compatible_formats(formats):
d03cfdce 3021 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3022 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3023 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3024 if len(video_formats) > 2 or len(audio_formats) > 2:
3025 return False
3026
81cd954a 3027 # Check extension
86e5f3ed 3028 exts = {format.get('ext') for format in formats}
d03cfdce 3029 COMPATIBLE_EXTS = (
86e5f3ed 3030 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3031 {'webm'},
d03cfdce 3032 )
3033 for ext_sets in COMPATIBLE_EXTS:
3034 if ext_sets.issuperset(exts):
3035 return True
81cd954a
S
3036 # TODO: Check acodec/vcodec
3037 return False
3038
3039 requested_formats = info_dict['requested_formats']
0202b52a 3040 old_ext = info_dict['ext']
4e3b637d 3041 if self.params.get('merge_output_format') is None:
3042 if not compatible_formats(requested_formats):
3043 info_dict['ext'] = 'mkv'
3044 self.report_warning(
3045 'Requested formats are incompatible for merge and will be merged into mkv')
3046 if (info_dict['ext'] == 'webm'
3047 and info_dict.get('thumbnails')
3048 # check with type instead of pp_key, __name__, or isinstance
3049 # since we dont want any custom PPs to trigger this
3050 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3051 info_dict['ext'] = 'mkv'
3052 self.report_warning(
3053 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3054 new_ext = info_dict['ext']
0202b52a 3055
124bc071 3056 def correct_ext(filename, ext=new_ext):
96fccc10 3057 if filename == '-':
3058 return filename
0202b52a 3059 filename_real_ext = os.path.splitext(filename)[1][1:]
3060 filename_wo_ext = (
3061 os.path.splitext(filename)[0]
124bc071 3062 if filename_real_ext in (old_ext, new_ext)
0202b52a 3063 else filename)
86e5f3ed 3064 return f'{filename_wo_ext}.{ext}'
0202b52a 3065
38c6902b 3066 # Ensure filename always has a correct extension for successful merge
0202b52a 3067 full_filename = correct_ext(full_filename)
3068 temp_filename = correct_ext(temp_filename)
e04938ab 3069 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3070 info_dict['__real_download'] = False
18e674b4 3071
adbc4ec4
THD
3072 downloaded = []
3073 merger = FFmpegMergerPP(self)
3074
3075 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
dbf5416a 3076 if dl_filename is not None:
6c7274ec 3077 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3078 elif fd:
3079 for f in requested_formats if fd != FFmpegFD else []:
3080 f['filepath'] = fname = prepend_extension(
3081 correct_ext(temp_filename, info_dict['ext']),
3082 'f%s' % f['format_id'], info_dict['ext'])
3083 downloaded.append(fname)
dbf5416a 3084 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3085 success, real_download = self.dl(temp_filename, info_dict)
3086 info_dict['__real_download'] = real_download
18e674b4 3087 else:
18e674b4 3088 if self.params.get('allow_unplayable_formats'):
3089 self.report_warning(
3090 'You have requested merging of multiple formats '
3091 'while also allowing unplayable formats to be downloaded. '
3092 'The formats won\'t be merged to prevent data corruption.')
3093 elif not merger.available:
e8969bda 3094 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3095 if not self.params.get('ignoreerrors'):
3096 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3097 return
3098 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3099
96fccc10 3100 if temp_filename == '-':
adbc4ec4 3101 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3102 else 'but the formats are incompatible for simultaneous download' if merger.available
3103 else 'but ffmpeg is not installed')
3104 self.report_warning(
3105 f'You have requested downloading multiple formats to stdout {reason}. '
3106 'The formats will be streamed one after the other')
3107 fname = temp_filename
dbf5416a 3108 for f in requested_formats:
3109 new_info = dict(info_dict)
3110 del new_info['requested_formats']
3111 new_info.update(f)
96fccc10 3112 if temp_filename != '-':
124bc071 3113 fname = prepend_extension(
3114 correct_ext(temp_filename, new_info['ext']),
3115 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3116 if not self._ensure_dir_exists(fname):
3117 return
a21e0ab1 3118 f['filepath'] = fname
96fccc10 3119 downloaded.append(fname)
dbf5416a 3120 partial_success, real_download = self.dl(fname, new_info)
3121 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3122 success = success and partial_success
adbc4ec4
THD
3123
3124 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3125 info_dict['__postprocessors'].append(merger)
3126 info_dict['__files_to_merge'] = downloaded
3127 # Even if there were no downloads, it is being merged only now
3128 info_dict['__real_download'] = True
3129 else:
3130 for file in downloaded:
3131 files_to_move[file] = None
4340deca
P
3132 else:
3133 # Just a single file
e04938ab 3134 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3135 if dl_filename is None or dl_filename == temp_filename:
3136 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3137 # So we should try to resume the download
e8e73840 3138 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3139 info_dict['__real_download'] = real_download
6c7274ec 3140 else:
3141 self.report_file_already_downloaded(dl_filename)
0202b52a 3142
0202b52a 3143 dl_filename = dl_filename or temp_filename
c571435f 3144 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3145
3158150c 3146 except network_exceptions as err:
7960b056 3147 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3148 return
86e5f3ed 3149 except OSError as err:
4340deca
P
3150 raise UnavailableVideoError(err)
3151 except (ContentTooShortError, ) as err:
86e5f3ed 3152 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3153 return
8222d8de 3154
de6000d9 3155 if success and full_filename != '-':
f17f8651 3156
fd7cfb64 3157 def fixup():
3158 do_fixup = True
3159 fixup_policy = self.params.get('fixup')
3160 vid = info_dict['id']
3161
3162 if fixup_policy in ('ignore', 'never'):
3163 return
3164 elif fixup_policy == 'warn':
3fe75fdc 3165 do_fixup = 'warn'
f89b3e2d 3166 elif fixup_policy != 'force':
3167 assert fixup_policy in ('detect_or_warn', None)
3168 if not info_dict.get('__real_download'):
3169 do_fixup = False
fd7cfb64 3170
3171 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3172 if not (do_fixup and cndn):
fd7cfb64 3173 return
3fe75fdc 3174 elif do_fixup == 'warn':
fd7cfb64 3175 self.report_warning(f'{vid}: {msg}')
3176 return
3177 pp = cls(self)
3178 if pp.available:
3179 info_dict['__postprocessors'].append(pp)
3180 else:
3181 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3182
3183 stretched_ratio = info_dict.get('stretched_ratio')
3184 ffmpeg_fixup(
3185 stretched_ratio not in (1, None),
3186 f'Non-uniform pixel ratio {stretched_ratio}',
3187 FFmpegFixupStretchedPP)
3188
3189 ffmpeg_fixup(
3190 (info_dict.get('requested_formats') is None
3191 and info_dict.get('container') == 'm4a_dash'
3192 and info_dict.get('ext') == 'm4a'),
3193 'writing DASH m4a. Only some players support this container',
3194 FFmpegFixupM4aPP)
3195
993191c0 3196 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3197 downloader = downloader.FD_NAME if downloader else None
adbc4ec4
THD
3198
3199 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
24146491 3200 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3201 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3202 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3203 FFmpegFixupM3u8PP)
3204 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3205 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3206
24146491 3207 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3208 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3209
3210 fixup()
8222d8de 3211 try:
f46e2f9d 3212 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3213 except PostProcessingError as err:
3214 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3215 return
ab8e5e51
AM
3216 try:
3217 for ph in self._post_hooks:
23c1a667 3218 ph(info_dict['filepath'])
ab8e5e51
AM
3219 except Exception as err:
3220 self.report_error('post hooks: %s' % str(err))
3221 return
9e907ebd 3222 info_dict['__write_download_archive'] = True
2d30509f 3223
a13e6848 3224 if self.params.get('force_write_download_archive'):
9e907ebd 3225 info_dict['__write_download_archive'] = True
a13e6848 3226
3227 # Make sure the info_dict was modified in-place
f46e2f9d 3228 assert info_dict is original_infodict
ca6d59d2 3229 check_max_downloads()
8222d8de 3230
aa9369a2 3231 def __download_wrapper(self, func):
3232 @functools.wraps(func)
3233 def wrapper(*args, **kwargs):
3234 try:
3235 res = func(*args, **kwargs)
3236 except UnavailableVideoError as e:
3237 self.report_error(e)
b222c271 3238 except DownloadCancelled as e:
3239 self.to_screen(f'[info] {e}')
3240 if not self.params.get('break_per_url'):
3241 raise
aa9369a2 3242 else:
3243 if self.params.get('dump_single_json', False):
3244 self.post_extract(res)
3245 self.to_stdout(json.dumps(self.sanitize_info(res)))
3246 return wrapper
3247
8222d8de
JMF
3248 def download(self, url_list):
3249 """Download a given list of URLs."""
aa9369a2 3250 url_list = variadic(url_list) # Passing a single URL is a common mistake
de6000d9 3251 outtmpl = self.outtmpl_dict['default']
3089bc74
S
3252 if (len(url_list) > 1
3253 and outtmpl != '-'
3254 and '%' not in outtmpl
3255 and self.params.get('max_downloads') != 1):
acd69589 3256 raise SameFileError(outtmpl)
8222d8de
JMF
3257
3258 for url in url_list:
aa9369a2 3259 self.__download_wrapper(self.extract_info)(
3260 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3261
3262 return self._download_retcode
3263
1dcc4c0c 3264 def download_with_info_file(self, info_filename):
31bd3925
JMF
3265 with contextlib.closing(fileinput.FileInput(
3266 [info_filename], mode='r',
3267 openhook=fileinput.hook_encoded('utf-8'))) as f:
3268 # FileInput doesn't have a read method, we can't call json.load
8012d892 3269 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3270 try:
aa9369a2 3271 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3272 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3273 if not isinstance(e, EntryNotInPlaylist):
3274 self.to_stderr('\r')
d4943898
JMF
3275 webpage_url = info.get('webpage_url')
3276 if webpage_url is not None:
aa9369a2 3277 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3278 return self.download([webpage_url])
3279 else:
3280 raise
3281 return self._download_retcode
1dcc4c0c 3282
cb202fd2 3283 @staticmethod
8012d892 3284 def sanitize_info(info_dict, remove_private_keys=False):
3285 ''' Sanitize the infodict for converting to json '''
3ad56b42 3286 if info_dict is None:
3287 return info_dict
6e84b215 3288 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3289 info_dict.setdefault('_type', 'video')
09b49e1f 3290
8012d892 3291 if remove_private_keys:
0a5a191a 3292 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3293 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3294 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3295 }
ae8f99e6 3296 else:
09b49e1f 3297 reject = lambda k, v: False
adbc4ec4
THD
3298
3299 def filter_fn(obj):
3300 if isinstance(obj, dict):
3301 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3302 elif isinstance(obj, (list, tuple, set, LazyList)):
3303 return list(map(filter_fn, obj))
3304 elif obj is None or isinstance(obj, (str, int, float, bool)):
3305 return obj
3306 else:
3307 return repr(obj)
3308
5226731e 3309 return filter_fn(info_dict)
cb202fd2 3310
8012d892 3311 @staticmethod
3312 def filter_requested_info(info_dict, actually_filter=True):
3313 ''' Alias of sanitize_info for backward compatibility '''
3314 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3315
43d7f5a5 3316 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3317 for filename in set(filter(None, files_to_delete)):
3318 if msg:
3319 self.to_screen(msg % filename)
3320 try:
3321 os.remove(filename)
3322 except OSError:
3323 self.report_warning(f'Unable to delete file {filename}')
3324 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3325 del info['__files_to_move'][filename]
3326
ed5835b4 3327 @staticmethod
3328 def post_extract(info_dict):
3329 def actual_post_extract(info_dict):
3330 if info_dict.get('_type') in ('playlist', 'multi_video'):
3331 for video_dict in info_dict.get('entries', {}):
3332 actual_post_extract(video_dict or {})
3333 return
3334
09b49e1f 3335 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3336 info_dict.update(post_extractor())
ed5835b4 3337
3338 actual_post_extract(info_dict or {})
3339
dcf64d43 3340 def run_pp(self, pp, infodict):
5bfa4862 3341 files_to_delete = []
dcf64d43 3342 if '__files_to_move' not in infodict:
3343 infodict['__files_to_move'] = {}
b1940459 3344 try:
3345 files_to_delete, infodict = pp.run(infodict)
3346 except PostProcessingError as e:
3347 # Must be True and not 'only_download'
3348 if self.params.get('ignoreerrors') is True:
3349 self.report_error(e)
3350 return infodict
3351 raise
3352
5bfa4862 3353 if not files_to_delete:
dcf64d43 3354 return infodict
5bfa4862 3355 if self.params.get('keepvideo', False):
3356 for f in files_to_delete:
dcf64d43 3357 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3358 else:
43d7f5a5 3359 self._delete_downloaded_files(
3360 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3361 return infodict
5bfa4862 3362
ed5835b4 3363 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3364 self._forceprint(key, info)
ed5835b4 3365 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3366 info = self.run_pp(pp, info)
ed5835b4 3367 return info
277d6ff5 3368
56d868db 3369 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3370 info = dict(ie_info)
56d868db 3371 info['__files_to_move'] = files_to_move or {}
ed5835b4 3372 info = self.run_all_pps(key, info)
56d868db 3373 return info, info.pop('__files_to_move', None)
5bfa4862 3374
f46e2f9d 3375 def post_process(self, filename, info, files_to_move=None):
8222d8de 3376 """Run all the postprocessors on the given file."""
8222d8de 3377 info['filepath'] = filename
dcf64d43 3378 info['__files_to_move'] = files_to_move or {}
ed5835b4 3379 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3380 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3381 del info['__files_to_move']
ed5835b4 3382 return self.run_all_pps('after_move', info)
c1c9a79c 3383
5db07df6 3384 def _make_archive_id(self, info_dict):
e9fef7ee
S
3385 video_id = info_dict.get('id')
3386 if not video_id:
3387 return
5db07df6
PH
3388 # Future-proof against any change in case
3389 # and backwards compatibility with prior versions
e9fef7ee 3390 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3391 if extractor is None:
1211bb6d
S
3392 url = str_or_none(info_dict.get('url'))
3393 if not url:
3394 return
e9fef7ee 3395 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3396 for ie_key, ie in self._ies.items():
1211bb6d 3397 if ie.suitable(url):
8b7491c8 3398 extractor = ie_key
e9fef7ee
S
3399 break
3400 else:
3401 return
86e5f3ed 3402 return f'{extractor.lower()} {video_id}'
5db07df6
PH
3403
3404 def in_download_archive(self, info_dict):
3405 fn = self.params.get('download_archive')
3406 if fn is None:
3407 return False
3408
3409 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3410 if not vid_id:
7012b23c 3411 return False # Incomplete video information
5db07df6 3412
a45e8619 3413 return vid_id in self.archive
c1c9a79c
PH
3414
3415 def record_download_archive(self, info_dict):
3416 fn = self.params.get('download_archive')
3417 if fn is None:
3418 return
5db07df6
PH
3419 vid_id = self._make_archive_id(info_dict)
3420 assert vid_id
a13e6848 3421 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3422 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3423 archive_file.write(vid_id + '\n')
a45e8619 3424 self.archive.add(vid_id)
dd82ffea 3425
8c51aa65 3426 @staticmethod
8abeeb94 3427 def format_resolution(format, default='unknown'):
9359f3d4 3428 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3429 return 'audio only'
f49d89ee
PH
3430 if format.get('resolution') is not None:
3431 return format['resolution']
35615307 3432 if format.get('width') and format.get('height'):
ff51ed58 3433 return '%dx%d' % (format['width'], format['height'])
35615307 3434 elif format.get('height'):
ff51ed58 3435 return '%sp' % format['height']
35615307 3436 elif format.get('width'):
ff51ed58 3437 return '%dx?' % format['width']
3438 return default
8c51aa65 3439
8130779d 3440 def _list_format_headers(self, *headers):
3441 if self.params.get('listformats_table', True) is not False:
591bb9d3 3442 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3443 return headers
3444
c57f7757
PH
3445 def _format_note(self, fdict):
3446 res = ''
3447 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3448 res += '(unsupported)'
32f90364
PH
3449 if fdict.get('language'):
3450 if res:
3451 res += ' '
f304da8a 3452 res += '[%s]' % fdict['language']
c57f7757 3453 if fdict.get('format_note') is not None:
f304da8a 3454 if res:
3455 res += ' '
3456 res += fdict['format_note']
c57f7757 3457 if fdict.get('tbr') is not None:
f304da8a 3458 if res:
3459 res += ', '
3460 res += '%4dk' % fdict['tbr']
c57f7757
PH
3461 if fdict.get('container') is not None:
3462 if res:
3463 res += ', '
3464 res += '%s container' % fdict['container']
3089bc74
S
3465 if (fdict.get('vcodec') is not None
3466 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3467 if res:
3468 res += ', '
3469 res += fdict['vcodec']
91c7271a 3470 if fdict.get('vbr') is not None:
c57f7757
PH
3471 res += '@'
3472 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3473 res += 'video@'
3474 if fdict.get('vbr') is not None:
3475 res += '%4dk' % fdict['vbr']
fbb21cf5 3476 if fdict.get('fps') is not None:
5d583bdf
S
3477 if res:
3478 res += ', '
3479 res += '%sfps' % fdict['fps']
c57f7757
PH
3480 if fdict.get('acodec') is not None:
3481 if res:
3482 res += ', '
3483 if fdict['acodec'] == 'none':
3484 res += 'video only'
3485 else:
3486 res += '%-5s' % fdict['acodec']
3487 elif fdict.get('abr') is not None:
3488 if res:
3489 res += ', '
3490 res += 'audio'
3491 if fdict.get('abr') is not None:
3492 res += '@%3dk' % fdict['abr']
3493 if fdict.get('asr') is not None:
3494 res += ' (%5dHz)' % fdict['asr']
3495 if fdict.get('filesize') is not None:
3496 if res:
3497 res += ', '
3498 res += format_bytes(fdict['filesize'])
9732d77e
PH
3499 elif fdict.get('filesize_approx') is not None:
3500 if res:
3501 res += ', '
3502 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3503 return res
91c7271a 3504
8130779d 3505 def render_formats_table(self, info_dict):
b69fd25c 3506 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3507 return None
b69fd25c 3508
94badb25 3509 formats = info_dict.get('formats', [info_dict])
8130779d 3510 if not self.params.get('listformats_table', True) is not False:
76d321f6 3511 table = [
3512 [
3513 format_field(f, 'format_id'),
3514 format_field(f, 'ext'),
3515 self.format_resolution(f),
8130779d 3516 self._format_note(f)
3517 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3518 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3519
591bb9d3 3520 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3521 table = [
3522 [
591bb9d3 3523 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3524 format_field(f, 'ext'),
3525 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3526 format_field(f, 'fps', '\t%d'),
3527 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3528 delim,
3529 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3530 format_field(f, 'tbr', '\t%dk'),
3531 shorten_protocol_name(f.get('protocol', '')),
3532 delim,
3533 format_field(f, 'vcodec', default='unknown').replace(
3534 'none', 'images' if f.get('acodec') == 'none'
591bb9d3 3535 else self._format_out('audio only', self.Styles.SUPPRESS)),
8130779d 3536 format_field(f, 'vbr', '\t%dk'),
3537 format_field(f, 'acodec', default='unknown').replace(
3538 'none', '' if f.get('vcodec') == 'none'
591bb9d3 3539 else self._format_out('video only', self.Styles.SUPPRESS)),
8130779d 3540 format_field(f, 'abr', '\t%dk'),
3541 format_field(f, 'asr', '\t%dHz'),
3542 join_nonempty(
591bb9d3 3543 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3544 format_field(f, 'language', '[%s]'),
3545 join_nonempty(format_field(f, 'format_note'),
3546 format_field(f, 'container', ignore=(None, f.get('ext'))),
3547 delim=', '),
3548 delim=' '),
3549 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3550 header_line = self._list_format_headers(
3551 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3552 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3553
3554 return render_table(
3555 header_line, table, hide_empty=True,
591bb9d3 3556 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3557
3558 def render_thumbnails_table(self, info_dict):
88f23a18 3559 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3560 if not thumbnails:
8130779d 3561 return None
3562 return render_table(
ec11a9f4 3563 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3564 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3565
8130779d 3566 def render_subtitles_table(self, video_id, subtitles):
2412044c 3567 def _row(lang, formats):
49c258e1 3568 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3569 if len(set(names)) == 1:
7aee40c1 3570 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3571 return [lang, ', '.join(names), ', '.join(exts)]
3572
8130779d 3573 if not subtitles:
3574 return None
3575 return render_table(
ec11a9f4 3576 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3577 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3578 hide_empty=True)
3579
3580 def __list_table(self, video_id, name, func, *args):
3581 table = func(*args)
3582 if not table:
3583 self.to_screen(f'{video_id} has no {name}')
3584 return
3585 self.to_screen(f'[info] Available {name} for {video_id}:')
3586 self.to_stdout(table)
3587
3588 def list_formats(self, info_dict):
3589 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3590
3591 def list_thumbnails(self, info_dict):
3592 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3593
3594 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3595 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3596
dca08720
PH
3597 def urlopen(self, req):
3598 """ Start an HTTP download """
f9934b96 3599 if isinstance(req, str):
67dda517 3600 req = sanitized_Request(req)
19a41fc6 3601 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3602
3603 def print_debug_header(self):
3604 if not self.params.get('verbose'):
3605 return
49a57e70 3606
3607 def get_encoding(stream):
2a938746 3608 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3609 if not supports_terminal_sequences(stream):
53973b4d 3610 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3611 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3612 return ret
3613
591bb9d3 3614 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3615 locale.getpreferredencoding(),
3616 sys.getfilesystemencoding(),
591bb9d3 3617 self.get_encoding(),
3618 ', '.join(
7896214c 3619 f'{key} {get_encoding(stream)}' for key, stream in self._out_files
591bb9d3 3620 if stream is not None and key != 'console')
3621 )
883d4b1e 3622
3623 logger = self.params.get('logger')
3624 if logger:
3625 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3626 write_debug(encoding_str)
3627 else:
96565c7e 3628 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3629 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3630
4c88ff87 3631 source = detect_variant()
36eaf303 3632 write_debug(join_nonempty(
3633 'yt-dlp version', __version__,
3634 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3635 '' if source == 'unknown' else f'({source})',
3636 delim=' '))
6e21fdd2 3637 if not _LAZY_LOADER:
3638 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3639 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3640 else:
49a57e70 3641 write_debug('Lazy loading extractors is disabled')
3ae5e797 3642 if plugin_extractors or plugin_postprocessors:
49a57e70 3643 write_debug('Plugins: %s' % [
3ae5e797 3644 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3645 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
53ed7066 3646 if self.params.get('compat_opts'):
49a57e70 3647 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
36eaf303 3648
3649 if source == 'source':
dca08720 3650 try:
36eaf303 3651 sp = Popen(
3652 ['git', 'rev-parse', '--short', 'HEAD'],
3653 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3654 cwd=os.path.dirname(os.path.abspath(__file__)))
3655 out, err = sp.communicate_or_kill()
3656 out = out.decode().strip()
3657 if re.match('[0-9a-f]+', out):
3658 write_debug('Git HEAD: %s' % out)
70a1165b 3659 except Exception:
19a03940 3660 with contextlib.suppress(Exception):
36eaf303 3661 sys.exc_clear()
b300cda4
S
3662
3663 def python_implementation():
3664 impl_name = platform.python_implementation()
3665 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3666 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3667 return impl_name
3668
49a57e70 3669 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3670 platform.python_version(),
3671 python_implementation(),
3672 platform.architecture()[0],
b300cda4 3673 platform_name()))
d28b5171 3674
8913ef74 3675 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3676 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3677 if ffmpeg_features:
19a03940 3678 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3679
4c83c967 3680 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3681 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3682 exe_str = ', '.join(
2831b468 3683 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3684 ) or 'none'
49a57e70 3685 write_debug('exe versions: %s' % exe_str)
dca08720 3686
1d485a1a 3687 from .compat.compat_utils import get_package_info
9b8ee23b 3688 from .dependencies import available_dependencies
3689
3690 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3691 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3692 })) or 'none'))
2831b468 3693
97ec5bc5 3694 self._setup_opener()
dca08720
PH
3695 proxy_map = {}
3696 for handler in self._opener.handlers:
3697 if hasattr(handler, 'proxies'):
3698 proxy_map.update(handler.proxies)
49a57e70 3699 write_debug(f'Proxy map: {proxy_map}')
dca08720 3700
49a57e70 3701 # Not implemented
3702 if False and self.params.get('call_home'):
0f06bcd7 3703 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3704 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3705 latest_version = self.urlopen(
0f06bcd7 3706 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3707 if version_tuple(latest_version) > version_tuple(__version__):
3708 self.report_warning(
3709 'You are using an outdated version (newest version: %s)! '
3710 'See https://yt-dl.org/update if you need help updating.' %
3711 latest_version)
3712
e344693b 3713 def _setup_opener(self):
97ec5bc5 3714 if hasattr(self, '_opener'):
3715 return
6ad14cab 3716 timeout_val = self.params.get('socket_timeout')
17bddf3e 3717 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3718
982ee69a 3719 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3720 opts_cookiefile = self.params.get('cookiefile')
3721 opts_proxy = self.params.get('proxy')
3722
982ee69a 3723 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3724
6a3f4c3f 3725 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3726 if opts_proxy is not None:
3727 if opts_proxy == '':
3728 proxies = {}
3729 else:
3730 proxies = {'http': opts_proxy, 'https': opts_proxy}
3731 else:
3732 proxies = compat_urllib_request.getproxies()
067aa17e 3733 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3734 if 'http' in proxies and 'https' not in proxies:
3735 proxies['https'] = proxies['http']
91410c9b 3736 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3737
3738 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3739 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3740 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3741 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3742 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3743
3744 # When passing our own FileHandler instance, build_opener won't add the
3745 # default FileHandler and allows us to disable the file protocol, which
3746 # can be used for malicious purposes (see
067aa17e 3747 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3748 file_handler = compat_urllib_request.FileHandler()
3749
3750 def file_open(*args, **kwargs):
7a5c1cfe 3751 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3752 file_handler.file_open = file_open
3753
3754 opener = compat_urllib_request.build_opener(
fca6dba8 3755 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3756
dca08720
PH
3757 # Delete the default user-agent header, which would otherwise apply in
3758 # cases where our custom HTTP handler doesn't come into play
067aa17e 3759 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3760 opener.addheaders = []
3761 self._opener = opener
62fec3b2
PH
3762
3763 def encode(self, s):
3764 if isinstance(s, bytes):
3765 return s # Already encoded
3766
3767 try:
3768 return s.encode(self.get_encoding())
3769 except UnicodeEncodeError as err:
3770 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3771 raise
3772
3773 def get_encoding(self):
3774 encoding = self.params.get('encoding')
3775 if encoding is None:
3776 encoding = preferredencoding()
3777 return encoding
ec82d85a 3778
e08a85d8 3779 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3780 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3781 if overwrite is None:
3782 overwrite = self.params.get('overwrites', True)
80c03fa9 3783 if not self.params.get('writeinfojson'):
3784 return False
3785 elif not infofn:
3786 self.write_debug(f'Skipping writing {label} infojson')
3787 return False
3788 elif not self._ensure_dir_exists(infofn):
3789 return None
e08a85d8 3790 elif not overwrite and os.path.exists(infofn):
80c03fa9 3791 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3792 return 'exists'
3793
3794 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3795 try:
3796 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3797 return True
86e5f3ed 3798 except OSError:
cb96c5be 3799 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3800 return None
80c03fa9 3801
3802 def _write_description(self, label, ie_result, descfn):
3803 ''' Write description and returns True = written, False = skip, None = error '''
3804 if not self.params.get('writedescription'):
3805 return False
3806 elif not descfn:
3807 self.write_debug(f'Skipping writing {label} description')
3808 return False
3809 elif not self._ensure_dir_exists(descfn):
3810 return None
3811 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3812 self.to_screen(f'[info] {label.title()} description is already present')
3813 elif ie_result.get('description') is None:
3814 self.report_warning(f'There\'s no {label} description to write')
3815 return False
3816 else:
3817 try:
3818 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3819 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3820 descfile.write(ie_result['description'])
86e5f3ed 3821 except OSError:
80c03fa9 3822 self.report_error(f'Cannot write {label} description file {descfn}')
3823 return None
3824 return True
3825
3826 def _write_subtitles(self, info_dict, filename):
3827 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3828 ret = []
3829 subtitles = info_dict.get('requested_subtitles')
3830 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3831 # subtitles download errors are already managed as troubles in relevant IE
3832 # that way it will silently go on when used with unsupporting IE
3833 return ret
3834
3835 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3836 if not sub_filename_base:
3837 self.to_screen('[info] Skipping writing video subtitles')
3838 return ret
3839 for sub_lang, sub_info in subtitles.items():
3840 sub_format = sub_info['ext']
3841 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3842 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3843 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3844 if existing_sub:
80c03fa9 3845 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3846 sub_info['filepath'] = existing_sub
3847 ret.append((existing_sub, sub_filename_final))
80c03fa9 3848 continue
3849
3850 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3851 if sub_info.get('data') is not None:
3852 try:
3853 # Use newline='' to prevent conversion of newline characters
3854 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3855 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3856 subfile.write(sub_info['data'])
3857 sub_info['filepath'] = sub_filename
3858 ret.append((sub_filename, sub_filename_final))
3859 continue
86e5f3ed 3860 except OSError:
80c03fa9 3861 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3862 return None
3863
3864 try:
3865 sub_copy = sub_info.copy()
3866 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3867 self.dl(sub_filename, sub_copy, subtitle=True)
3868 sub_info['filepath'] = sub_filename
3869 ret.append((sub_filename, sub_filename_final))
6020e05d 3870 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3871 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3872 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3873 if not self.params.get('ignoreerrors'):
3874 self.report_error(msg)
3875 raise DownloadError(msg)
3876 self.report_warning(msg)
519804a9 3877 return ret
80c03fa9 3878
3879 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3880 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3881 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3882 thumbnails, ret = [], []
6c4fd172 3883 if write_all or self.params.get('writethumbnail', False):
0202b52a 3884 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3885 multiple = write_all and len(thumbnails) > 1
ec82d85a 3886
80c03fa9 3887 if thumb_filename_base is None:
3888 thumb_filename_base = filename
3889 if thumbnails and not thumb_filename_base:
3890 self.write_debug(f'Skipping writing {label} thumbnail')
3891 return ret
3892
dd0228ce 3893 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3894 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3895 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3896 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3897 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3898
e04938ab 3899 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3900 if existing_thumb:
aa9369a2 3901 self.to_screen('[info] %s is already present' % (
3902 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3903 t['filepath'] = existing_thumb
3904 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3905 else:
80c03fa9 3906 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3907 try:
297e9952 3908 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3909 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3910 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3911 shutil.copyfileobj(uf, thumbf)
80c03fa9 3912 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3913 t['filepath'] = thumb_filename
3158150c 3914 except network_exceptions as err:
dd0228ce 3915 thumbnails.pop(idx)
80c03fa9 3916 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3917 if ret and not write_all:
3918 break
0202b52a 3919 return ret