]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[build] Fix release tag commit
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
26e63931 2import collections
31bd3925 3import contextlib
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
b5ae35ee 7import functools
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
dca08720 14import platform
f8271158 15import random
8222d8de
JMF
16import re
17import shutil
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
f9934b96 25import urllib.request
961ea474
S
26from string import ascii_letters
27
f8271158 28from .cache import Cache
8c25f81b 29from .compat import (
8a82af35 30 HAS_LEGACY as compat_has_legacy,
003c69a8 31 compat_get_terminal_size,
e9c0cdd3 32 compat_os_name,
7d1eb38a 33 compat_shlex_quote,
ce02ed60
PH
34 compat_str,
35 compat_urllib_error,
36 compat_urllib_request,
8c25f81b 37)
982ee69a 38from .cookies import load_cookies
f8271158 39from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
40from .downloader.rtmp import rtmpdump_version
f8271158 41from .extractor import gen_extractor_classes, get_info_extractor
42from .extractor.openload import PhantomJSwrapper
43from .minicurses import format_text
44from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
45from .postprocessor import (
46 EmbedThumbnailPP,
47 FFmpegFixupDuplicateMoovPP,
48 FFmpegFixupDurationPP,
49 FFmpegFixupM3u8PP,
50 FFmpegFixupM4aPP,
51 FFmpegFixupStretchedPP,
52 FFmpegFixupTimestampPP,
53 FFmpegMergerPP,
54 FFmpegPostProcessor,
55 MoveFilesAfterDownloadPP,
56 get_postprocessor,
57)
58from .update import detect_variant
8c25f81b 59from .utils import (
f8271158 60 DEFAULT_OUTTMPL,
7b2c3f47 61 IDENTITY,
f8271158 62 LINK_TEMPLATES,
63 NO_DEFAULT,
1d485a1a 64 NUMBER_RE,
f8271158 65 OUTTMPL_TYPES,
66 POSTPROCESS_WHEN,
67 STR_FORMAT_RE_TMPL,
68 STR_FORMAT_TYPES,
69 ContentTooShortError,
70 DateRange,
71 DownloadCancelled,
72 DownloadError,
73 EntryNotInPlaylist,
74 ExistingVideoReached,
75 ExtractorError,
76 GeoRestrictedError,
77 HEADRequest,
f8271158 78 ISO3166Utils,
79 LazyList,
80 MaxDownloadsReached,
19a03940 81 Namespace,
f8271158 82 PagedList,
83 PerRequestProxyHandler,
7e88d7d7 84 PlaylistEntries,
f8271158 85 Popen,
86 PostProcessingError,
87 ReExtractInfo,
88 RejectedVideoReached,
89 SameFileError,
90 UnavailableVideoError,
91 YoutubeDLCookieProcessor,
92 YoutubeDLHandler,
93 YoutubeDLRedirectHandler,
eedb7ba5
S
94 age_restricted,
95 args_to_str,
ce02ed60 96 date_from_str,
ce02ed60 97 determine_ext,
b5559424 98 determine_protocol,
c0384f22 99 encode_compat_str,
ce02ed60 100 encodeFilename,
a06916d9 101 error_to_compat_str,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
0bb322b9 109 get_domain,
c9969434 110 int_or_none,
732044af 111 iri_to_uri,
34921b43 112 join_nonempty,
ce02ed60 113 locked_file,
0202b52a 114 make_dir,
dca08720 115 make_HTTPS_handler,
8b7539d2 116 merge_headers,
3158150c 117 network_exceptions,
ec11a9f4 118 number_of_digits,
cd6fc19e 119 orderedSet,
083c9df9 120 parse_filesize,
dca08720 121 platform_name,
ce02ed60 122 preferredencoding,
eedb7ba5 123 prepend_extension,
51fb4995 124 register_socks_protocols,
3efb96a6 125 remove_terminal_sequences,
cfb56d1a 126 render_table,
eedb7ba5 127 replace_extension,
ce02ed60 128 sanitize_filename,
1bb5c511 129 sanitize_path,
dcf77cf1 130 sanitize_url,
67dda517 131 sanitized_Request,
e5660ee6 132 std_headers,
1211bb6d 133 str_or_none,
e29663c6 134 strftime_or_none,
ce02ed60 135 subtitles_filename,
819e0531 136 supports_terminal_sequences,
f2ebc5c7 137 timetuple_from_msec,
732044af 138 to_high_limit_path,
324ad820 139 traverse_obj,
6033d980 140 try_get,
29eb5174 141 url_basename,
7d1eb38a 142 variadic,
58b1f00d 143 version_tuple,
53973b4d 144 windows_enable_vt_mode,
ce02ed60
PH
145 write_json_file,
146 write_string,
4f026faf 147)
f8271158 148from .version import RELEASE_GIT_HEAD, __version__
8222d8de 149
e9c0cdd3
YCH
150if compat_os_name == 'nt':
151 import ctypes
152
2459b6e1 153
86e5f3ed 154class YoutubeDL:
8222d8de
JMF
155 """YoutubeDL class.
156
157 YoutubeDL objects are the ones responsible of downloading the
158 actual video file and writing it to disk if the user has requested
159 it, among some other tasks. In most cases there should be one per
160 program. As, given a video URL, the downloader doesn't know how to
161 extract all the needed information, task that InfoExtractors do, it
162 has to pass the URL to one of them.
163
164 For this, YoutubeDL objects have a method that allows
165 InfoExtractors to be registered in a given order. When it is passed
166 a URL, the YoutubeDL object handles it to the first InfoExtractor it
167 finds that reports being able to handle it. The InfoExtractor extracts
168 all the information about the video or videos the URL refers to, and
169 YoutubeDL process the extracted information, possibly using a File
170 Downloader to download the video.
171
172 YoutubeDL objects accept a lot of parameters. In order not to saturate
173 the object constructor with arguments, it receives a dictionary of
174 options instead. These options are available through the params
175 attribute for the InfoExtractors to use. The YoutubeDL also
176 registers itself as the downloader in charge for the InfoExtractors
177 that are added to it, so this is a "mutual registration".
178
179 Available options:
180
181 username: Username for authentication purposes.
182 password: Password for authentication purposes.
180940e0 183 videopassword: Password for accessing a video.
1da50aa3
S
184 ap_mso: Adobe Pass multiple-system operator identifier.
185 ap_username: Multiple-system operator account username.
186 ap_password: Multiple-system operator account password.
8222d8de
JMF
187 usenetrc: Use netrc for authentication instead.
188 verbose: Print additional info to stdout.
189 quiet: Do not print messages to stdout.
ad8915b7 190 no_warnings: Do not print out anything for warnings.
bb66c247 191 forceprint: A dict with keys WHEN mapped to a list of templates to
192 print to stdout. The allowed keys are video or any of the
193 items in utils.POSTPROCESS_WHEN.
ca30f449 194 For compatibility, a single list is also accepted
bb66c247 195 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
196 a list of tuples with (template, filename)
8694c600 197 forcejson: Force printing info_dict as JSON.
63e0be34
PH
198 dump_single_json: Force printing the info_dict of the whole playlist
199 (or video) as a single JSON line.
c25228e5 200 force_write_download_archive: Force writing download archive regardless
201 of 'skip_download' or 'simulate'.
b7b04c78 202 simulate: Do not download the video files. If unset (or None),
203 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 204 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 205 You can also pass a function. The function takes 'ctx' as
206 argument and returns the formats to download.
207 See "build_format_selector" for an implementation
63ad4d43 208 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 209 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
210 extracting metadata even if the video is not actually
211 available for download (experimental)
0930b11f 212 format_sort: A list of fields by which to sort the video formats.
213 See "Sorting Formats" for more details.
c25228e5 214 format_sort_force: Force the given format_sort. see "Sorting Formats"
215 for more details.
08d30158 216 prefer_free_formats: Whether to prefer video formats with free containers
217 over non-free ones of same quality.
c25228e5 218 allow_multiple_video_streams: Allow multiple video streams to be merged
219 into a single file
220 allow_multiple_audio_streams: Allow multiple audio streams to be merged
221 into a single file
0ba692ac 222 check_formats Whether to test if the formats are downloadable.
9f1a1c36 223 Can be True (check all), False (check none),
224 'selected' (check selected formats),
0ba692ac 225 or None (check only if requested by extractor)
4524baf0 226 paths: Dictionary of output paths. The allowed keys are 'home'
227 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 228 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 229 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 230 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
231 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
232 restrictfilenames: Do not allow "&" and spaces in file names
233 trim_file_name: Limit length of filename (extension excluded)
4524baf0 234 windowsfilenames: Force the filenames to be windows compatible
b1940459 235 ignoreerrors: Do not stop on download/postprocessing errors.
236 Can be 'only_download' to ignore only download errors.
237 Default is 'only_download' for CLI, but False for API
26e2805c 238 skip_playlist_after_errors: Number of allowed failures until the rest of
239 the playlist is skipped
d22dec74 240 force_generic_extractor: Force downloader to use the generic extractor
0c3d0f51 241 overwrites: Overwrite all video and metadata files if True,
242 overwrite only non-video files if None
243 and don't overwrite any file if False
34488702 244 For compatibility with youtube-dl,
245 "nooverwrites" may also be used instead
c14e88f0 246 playlist_items: Specific indices of playlist to download.
75822ca7 247 playlistrandom: Download playlist items in random order.
7e9a6125 248 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
249 matchtitle: Download only matching titles.
250 rejecttitle: Reject downloads for matching titles.
8bf9319e 251 logger: Log messages to a logging.Logger instance.
8222d8de 252 logtostderr: Log messages to stderr instead of stdout.
819e0531 253 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
254 writedescription: Write the video description to a .description file
255 writeinfojson: Write the video description to a .info.json file
75d43ca0 256 clean_infojson: Remove private fields from the infojson
34488702 257 getcomments: Extract video comments. This will not be written to disk
06167fbb 258 unless writeinfojson is also given
1fb07d10 259 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 260 writethumbnail: Write the thumbnail image to a file
c25228e5 261 allow_playlist_files: Whether to write playlists' description, infojson etc
262 also to disk when using the 'write*' options
ec82d85a 263 write_all_thumbnails: Write all thumbnail formats to files
732044af 264 writelink: Write an internet shortcut file, depending on the
265 current platform (.url/.webloc/.desktop)
266 writeurllink: Write a Windows internet shortcut file (.url)
267 writewebloclink: Write a macOS internet shortcut file (.webloc)
268 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 269 writesubtitles: Write the video subtitles to a file
741dd8ea 270 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 271 listsubtitles: Lists all available subtitles for the video
a504ced0 272 subtitlesformat: The format code for subtitles
c32b0aab 273 subtitleslangs: List of languages of the subtitles to download (can be regex).
274 The list may contain "all" to refer to all the available
275 subtitles. The language can be prefixed with a "-" to
276 exclude it from the requested languages. Eg: ['all', '-live_chat']
8222d8de
JMF
277 keepvideo: Keep the video file after post-processing
278 daterange: A DateRange object, download only if the upload_date is in the range.
279 skip_download: Skip the actual download of the video file
c35f9e72 280 cachedir: Location of the cache files in the filesystem.
a0e07d31 281 False to disable filesystem cache.
47192f92 282 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
283 age_limit: An integer representing the user's age in years.
284 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
285 min_views: An integer representing the minimum view count the video
286 must have in order to not be skipped.
287 Videos without view count information are always
288 downloaded. None for no limit.
289 max_views: An integer representing the maximum view count.
290 Videos that are more popular than that are not
291 downloaded.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
295 Videos already present in the file are not downloaded
296 again.
8a51f564 297 break_on_existing: Stop the download process after attempting to download a
298 file that is in the archive.
299 break_on_reject: Stop the download process when encountering a video that
300 has been filtered out.
b222c271 301 break_per_url: Whether break_on_reject and break_on_existing
302 should act on each input URL as opposed to for the entire queue
d76fa1f3 303 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8
MB
304 cookiesfrombrowser: A tuple containing the name of the browser, the profile
305 name/pathfrom where cookies are loaded, and the name of the
306 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
f81c62a6 307 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
308 support RFC 5746 secure renegotiation
f59f5ef8 309 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 310 client_certificate: Path to client certificate file in PEM format. May include the private key
311 client_certificate_key: Path to private key file for client certificate
312 client_certificate_password: Password for client certificate private key, if encrypted.
313 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0
PH
314 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
315 At the moment, this is only supported by YouTube.
8b7539d2 316 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 317 proxy: URL of the proxy server to use
38cce791 318 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 319 on geo-restricted sites.
e344693b 320 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
321 bidi_workaround: Work around buggy terminals without bidirectional text
322 support, using fridibi
a0ddb8a2 323 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
324 default_search: Prepend this string if an input url is not valid.
325 'auto' for elaborate guessing
62fec3b2 326 encoding: Use this encoding instead of the system-specified.
e8ee972c 327 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
328 Pass in 'in_playlist' to only show this behavior for
329 playlist items.
f2ebc5c7 330 wait_for_video: If given, wait for scheduled streams to become available.
331 The value should be a tuple containing the range
332 (min_secs, max_secs) to wait between retries
4f026faf 333 postprocessors: A list of dictionaries, each with an entry
71b640cc 334 * key: The name of the postprocessor. See
7a5c1cfe 335 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 336 * when: When to run the postprocessor. Allowed values are
337 the entries of utils.POSTPROCESS_WHEN
56d868db 338 Assumed to be 'post_process' if not given
71b640cc
PH
339 progress_hooks: A list of functions that get called on download
340 progress, with a dictionary with the entries
5cda4eda 341 * status: One of "downloading", "error", or "finished".
ee69b99a 342 Check this first and ignore unknown values.
3ba7740d 343 * info_dict: The extracted info_dict
71b640cc 344
5cda4eda 345 If status is one of "downloading", or "finished", the
ee69b99a
PH
346 following properties may also be present:
347 * filename: The final filename (always present)
5cda4eda 348 * tmpfilename: The filename we're currently writing to
71b640cc
PH
349 * downloaded_bytes: Bytes on disk
350 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
351 * total_bytes_estimate: Guess of the eventual file size,
352 None if unavailable.
353 * elapsed: The number of seconds since download started.
71b640cc
PH
354 * eta: The estimated time in seconds, None if unknown
355 * speed: The download speed in bytes/second, None if
356 unknown
5cda4eda
PH
357 * fragment_index: The counter of the currently
358 downloaded video fragment.
359 * fragment_count: The number of fragments (= individual
360 files that will be merged)
71b640cc
PH
361
362 Progress hooks are guaranteed to be called at least once
363 (with status "finished") if the download is successful.
819e0531 364 postprocessor_hooks: A list of functions that get called on postprocessing
365 progress, with a dictionary with the entries
366 * status: One of "started", "processing", or "finished".
367 Check this first and ignore unknown values.
368 * postprocessor: Name of the postprocessor
369 * info_dict: The extracted info_dict
370
371 Progress hooks are guaranteed to be called at least twice
372 (with status "started" and "finished") if the processing is successful.
45598f15 373 merge_output_format: Extension to use when merging formats.
6b591b29 374 final_ext: Expected final extension; used to detect when the file was
59a7a13e 375 already downloaded and converted
6271f1ca
PH
376 fixup: Automatically correct known faults of the file.
377 One of:
378 - "never": do nothing
379 - "warn": only emit a warning
380 - "detect_or_warn": check whether we can do anything
62cd676c 381 about it, warn otherwise (default)
504f20dd 382 source_address: Client-side IP address to bind to.
1cf376f5 383 sleep_interval_requests: Number of seconds to sleep between requests
384 during extraction
7aa589a5
S
385 sleep_interval: Number of seconds to sleep before each download when
386 used alone or a lower bound of a range for randomized
387 sleep before each download (minimum possible number
388 of seconds to sleep) when used along with
389 max_sleep_interval.
390 max_sleep_interval:Upper bound of a range for randomized sleep before each
391 download (maximum possible number of seconds to sleep).
392 Must only be used along with sleep_interval.
393 Actual sleep time will be a random float from range
394 [sleep_interval; max_sleep_interval].
1cf376f5 395 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
396 listformats: Print an overview of available video formats and exit.
397 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 398 match_filter: A function that gets called for every video with the signature
399 (info_dict, *, incomplete: bool) -> Optional[str]
400 For backward compatibility with youtube-dl, the signature
401 (info_dict) -> Optional[str] is also allowed.
402 - If it returns a message, the video is ignored.
403 - If it returns None, the video is downloaded.
404 - If it returns utils.NO_DEFAULT, the user is interactively
405 asked whether to download the video.
347de493 406 match_filter_func in utils.py is one example for this.
7e5db8c9 407 no_color: Do not emit color codes in output.
0a840f58 408 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 409 HTTP header
0a840f58 410 geo_bypass_country:
773f291d
S
411 Two-letter ISO 3166-2 country code that will be used for
412 explicit geographic restriction bypassing via faking
504f20dd 413 X-Forwarded-For HTTP header
5f95927a
S
414 geo_bypass_ip_block:
415 IP range in CIDR notation that will be used similarly to
504f20dd 416 geo_bypass_country
52a8a1e1 417 external_downloader: A dictionary of protocol keys and the executable of the
418 external downloader to use for it. The allowed protocols
419 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
420 Set the value to 'native' to use the native downloader
53ed7066 421 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 422 The following options do not work when used through the API:
b5ae35ee 423 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 424 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 425 Refer __init__.py for their implementation
819e0531 426 progress_template: Dictionary of templates for progress outputs.
427 Allowed keys are 'download', 'postprocess',
428 'download-title' (console title) and 'postprocess-title'.
429 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 430 retry_sleep_functions: Dictionary of functions that takes the number of attempts
431 as argument and returns the time to sleep in seconds.
432 Allowed keys are 'http', 'fragment', 'file_access'
5ec1b6b7 433 download_ranges: A function that gets called for every video with the signature
434 (info_dict, *, ydl) -> Iterable[Section].
435 Only the returned sections will be downloaded. Each Section contains:
436 * start_time: Start time of the section in seconds
437 * end_time: End time of the section in seconds
438 * title: Section title (Optional)
439 * index: Section number (Optional)
fe7e0c98 440
8222d8de 441 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 442 the downloader (see yt_dlp/downloader/common.py):
51d9739f 443 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654
EH
444 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
445 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 446 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
447
448 The following options are used by the post processors:
c0b7d117
S
449 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
450 to the binary or its containing directory.
43820c03 451 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 452 and a list of additional command-line arguments for the
453 postprocessor/executable. The dict can also have "PP+EXE" keys
454 which are used when the given exe is used by the given PP.
455 Use 'default' as the name for arguments to passed to all PP
456 For compatibility with youtube-dl, a single list of args
457 can also be used
e409895f 458
459 The following options are used by the extractors:
62bff2c1 460 extractor_retries: Number of times to retry for known errors
461 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 462 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 463 discontinuities such as ad breaks (default: False)
5d3a0e79 464 extractor_args: A dictionary of arguments to be passed to the extractors.
465 See "EXTRACTOR ARGUMENTS" for details.
466 Eg: {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 467 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 468
469 The following options are deprecated and may be removed in the future:
470
7e9a6125 471 playliststart: - Use playlist_items
472 Playlist item to start at.
473 playlistend: - Use playlist_items
474 Playlist item to end at.
475 playlistreverse: - Use playlist_items
476 Download playlist items in reverse order.
1890fc63 477 forceurl: - Use forceprint
478 Force printing final URL.
479 forcetitle: - Use forceprint
480 Force printing title.
481 forceid: - Use forceprint
482 Force printing ID.
483 forcethumbnail: - Use forceprint
484 Force printing thumbnail URL.
485 forcedescription: - Use forceprint
486 Force printing description.
487 forcefilename: - Use forceprint
488 Force printing final filename.
489 forceduration: - Use forceprint
490 Force printing duration.
491 allsubtitles: - Use subtitleslangs = ['all']
492 Downloads all the subtitles of the video
493 (requires writesubtitles or writeautomaticsub)
494 include_ads: - Doesn't work
495 Download ads as well
496 call_home: - Not implemented
497 Boolean, true iff we are allowed to contact the
498 yt-dlp servers for debugging.
499 post_hooks: - Register a custom postprocessor
500 A list of functions that get called as the final step
501 for each video file, after all postprocessors have been
502 called. The filename will be passed as the only argument.
503 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
504 Use the native HLS downloader instead of ffmpeg/avconv
505 if True, otherwise use ffmpeg/avconv if False, otherwise
506 use downloader suggested by extractor if None.
507 prefer_ffmpeg: - avconv support is deprecated
508 If False, use avconv instead of ffmpeg if both are available,
509 otherwise prefer ffmpeg.
510 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 511 If True (default), DASH manifests and related
62bff2c1 512 data will be downloaded and processed by extractor.
513 You can reduce network I/O by disabling it if you don't
514 care about DASH. (only for youtube)
1890fc63 515 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 516 If True (default), HLS manifests and related
62bff2c1 517 data will be downloaded and processed by extractor.
518 You can reduce network I/O by disabling it if you don't
519 care about HLS. (only for youtube)
8222d8de
JMF
520 """
521
86e5f3ed 522 _NUMERIC_FIELDS = {
c9969434 523 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
e6f21b3d 524 'timestamp', 'release_timestamp',
c9969434
S
525 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
526 'average_rating', 'comment_count', 'age_limit',
527 'start_time', 'end_time',
528 'chapter_number', 'season_number', 'episode_number',
529 'track_number', 'disc_number', 'release_year',
86e5f3ed 530 }
c9969434 531
6db9c4d5 532 _format_fields = {
533 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 534 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
6db9c4d5 535 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
536 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
537 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
538 'preference', 'language', 'language_preference', 'quality', 'source_preference',
539 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
540 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
541 }
48ee10ee 542 _format_selection_exts = {
543 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
544 'video': {'mp4', 'flv', 'webm', '3gp'},
545 'storyboards': {'mhtml'},
546 }
547
3511266b 548 def __init__(self, params=None, auto_init=True):
883d4b1e 549 """Create a FileDownloader object with the given options.
550 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 551 Set to 'no_verbose_header' to not print the header
883d4b1e 552 """
e9f9a10f
JMF
553 if params is None:
554 params = {}
592b7485 555 self.params = params
8b7491c8 556 self._ies = {}
56c73665 557 self._ies_instances = {}
1e43a6f7 558 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 559 self._printed_messages = set()
1cf376f5 560 self._first_webpage_request = True
ab8e5e51 561 self._post_hooks = []
933605d7 562 self._progress_hooks = []
819e0531 563 self._postprocessor_hooks = []
8222d8de
JMF
564 self._download_retcode = 0
565 self._num_downloads = 0
9c906919 566 self._num_videos = 0
592b7485 567 self._playlist_level = 0
568 self._playlist_urls = set()
a0e07d31 569 self.cache = Cache(self)
34308b30 570
819e0531 571 windows_enable_vt_mode()
591bb9d3 572 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
573 self._out_files = Namespace(
574 out=stdout,
575 error=sys.stderr,
576 screen=sys.stderr if self.params.get('quiet') else stdout,
577 console=None if compat_os_name == 'nt' else next(
cf4f42cb 578 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 579 )
580 self._allow_colors = Namespace(**{
581 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
64fa820c 582 for type_, stream in self._out_files.items_ if type_ != 'console'
591bb9d3 583 })
819e0531 584
eff42759 585 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 6), (3, 7)
586 current_version = sys.version_info[:2]
587 if current_version < MIN_RECOMMENDED:
588 msg = 'Support for Python version %d.%d has been deprecated and will break in future versions of yt-dlp'
589 if current_version < MIN_SUPPORTED:
590 msg = 'Python version %d.%d is no longer supported'
591 self.deprecation_warning(
592 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 593
88acdbc2 594 if self.params.get('allow_unplayable_formats'):
595 self.report_warning(
ec11a9f4 596 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 597 'This is a developer option intended for debugging. \n'
598 ' If you experience any issues while using this option, '
ec11a9f4 599 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 600
be5df5ee
S
601 def check_deprecated(param, option, suggestion):
602 if self.params.get(param) is not None:
86e5f3ed 603 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
604 return True
605 return False
606
607 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
608 if self.params.get('geo_verification_proxy') is None:
609 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
610
0d1bb027 611 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
612 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 613 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 614
49a57e70 615 for msg in self.params.get('_warnings', []):
0d1bb027 616 self.report_warning(msg)
ee8dd27a 617 for msg in self.params.get('_deprecation_warnings', []):
618 self.deprecation_warning(msg)
0d1bb027 619
8a82af35 620 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
621 if not compat_has_legacy:
622 self.params['compat_opts'].add('no-compat-legacy')
623 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 624 self.params['listformats_table'] = False
625
b5ae35ee 626 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 627 # nooverwrites was unnecessarily changed to overwrites
628 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
629 # This ensures compatibility with both keys
630 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 631 elif self.params.get('overwrites') is None:
632 self.params.pop('overwrites', None)
b868936c 633 else:
634 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 635
455a15e2 636 self.params.setdefault('forceprint', {})
637 self.params.setdefault('print_to_file', {})
bb66c247 638
639 # Compatibility with older syntax
ca30f449 640 if not isinstance(params['forceprint'], dict):
455a15e2 641 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 642
455a15e2 643 if self.params.get('bidi_workaround', False):
1c088fa8
PH
644 try:
645 import pty
646 master, slave = pty.openpty()
003c69a8 647 width = compat_get_terminal_size().columns
591bb9d3 648 width_args = [] if width is None else ['-w', str(width)]
649 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
5d681e96 650 try:
d3c93ec2 651 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
5d681e96 652 except OSError:
d3c93ec2 653 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
5d681e96 654 self._output_channel = os.fdopen(master, 'rb')
1c088fa8 655 except OSError as ose:
66e7ace1 656 if ose.errno == errno.ENOENT:
49a57e70 657 self.report_warning(
658 'Could not find fribidi executable, ignoring --bidi-workaround. '
659 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
660 else:
661 raise
0783b09b 662
97ec5bc5 663 if auto_init:
664 if auto_init != 'no_verbose_header':
665 self.print_debug_header()
666 self.add_default_info_extractors()
667
3089bc74
S
668 if (sys.platform != 'win32'
669 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 670 and not self.params.get('restrictfilenames', False)):
e9137224 671 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 672 self.report_warning(
6febd1c1 673 'Assuming --restrict-filenames since file system encoding '
1b725173 674 'cannot encode all characters. '
6febd1c1 675 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 676 self.params['restrictfilenames'] = True
34308b30 677
bf1824b3 678 self._parse_outtmpl()
486dd09e 679
187986a8 680 # Creating format selector here allows us to catch syntax errors before the extraction
681 self.format_selector = (
fa9f30b8 682 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 683 else self.params['format'] if callable(self.params['format'])
187986a8 684 else self.build_format_selector(self.params['format']))
685
8b7539d2 686 # Set http_headers defaults according to std_headers
687 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
688
013b50b7 689 hooks = {
690 'post_hooks': self.add_post_hook,
691 'progress_hooks': self.add_progress_hook,
692 'postprocessor_hooks': self.add_postprocessor_hook,
693 }
694 for opt, fn in hooks.items():
695 for ph in self.params.get(opt, []):
696 fn(ph)
71b640cc 697
5bfc8bee 698 for pp_def_raw in self.params.get('postprocessors', []):
699 pp_def = dict(pp_def_raw)
700 when = pp_def.pop('when', 'post_process')
701 self.add_post_processor(
f9934b96 702 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 703 when=when)
704
97ec5bc5 705 self._setup_opener()
51fb4995
YCH
706 register_socks_protocols()
707
ed39cac5 708 def preload_download_archive(fn):
709 """Preload the archive, if any is specified"""
710 if fn is None:
711 return False
49a57e70 712 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 713 try:
714 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
715 for line in archive_file:
716 self.archive.add(line.strip())
86e5f3ed 717 except OSError as ioe:
ed39cac5 718 if ioe.errno != errno.ENOENT:
719 raise
720 return False
721 return True
722
723 self.archive = set()
724 preload_download_archive(self.params.get('download_archive'))
725
7d4111ed
PH
726 def warn_if_short_id(self, argv):
727 # short YouTube ID starting with dash?
728 idxs = [
729 i for i, a in enumerate(argv)
730 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
731 if idxs:
732 correct_argv = (
7a5c1cfe 733 ['yt-dlp']
3089bc74
S
734 + [a for i, a in enumerate(argv) if i not in idxs]
735 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
736 )
737 self.report_warning(
738 'Long argument string detected. '
49a57e70 739 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
740 args_to_str(correct_argv))
741
8222d8de
JMF
742 def add_info_extractor(self, ie):
743 """Add an InfoExtractor object to the end of the list."""
8b7491c8 744 ie_key = ie.ie_key()
745 self._ies[ie_key] = ie
e52d7f85 746 if not isinstance(ie, type):
8b7491c8 747 self._ies_instances[ie_key] = ie
e52d7f85 748 ie.set_downloader(self)
8222d8de 749
8b7491c8 750 def _get_info_extractor_class(self, ie_key):
751 ie = self._ies.get(ie_key)
752 if ie is None:
753 ie = get_info_extractor(ie_key)
754 self.add_info_extractor(ie)
755 return ie
756
56c73665
JMF
757 def get_info_extractor(self, ie_key):
758 """
759 Get an instance of an IE with name ie_key, it will try to get one from
760 the _ies list, if there's no instance it will create a new one and add
761 it to the extractor list.
762 """
763 ie = self._ies_instances.get(ie_key)
764 if ie is None:
765 ie = get_info_extractor(ie_key)()
766 self.add_info_extractor(ie)
767 return ie
768
023fa8c4
JMF
769 def add_default_info_extractors(self):
770 """
771 Add the InfoExtractors returned by gen_extractors to the end of the list
772 """
e52d7f85 773 for ie in gen_extractor_classes():
023fa8c4
JMF
774 self.add_info_extractor(ie)
775
56d868db 776 def add_post_processor(self, pp, when='post_process'):
8222d8de 777 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 778 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 779 self._pps[when].append(pp)
8222d8de
JMF
780 pp.set_downloader(self)
781
ab8e5e51
AM
782 def add_post_hook(self, ph):
783 """Add the post hook"""
784 self._post_hooks.append(ph)
785
933605d7 786 def add_progress_hook(self, ph):
819e0531 787 """Add the download progress hook"""
933605d7 788 self._progress_hooks.append(ph)
8ab470f1 789
819e0531 790 def add_postprocessor_hook(self, ph):
791 """Add the postprocessing progress hook"""
792 self._postprocessor_hooks.append(ph)
5bfc8bee 793 for pps in self._pps.values():
794 for pp in pps:
795 pp.add_progress_hook(ph)
819e0531 796
1c088fa8 797 def _bidi_workaround(self, message):
5d681e96 798 if not hasattr(self, '_output_channel'):
1c088fa8
PH
799 return message
800
5d681e96 801 assert hasattr(self, '_output_process')
11b85ce6 802 assert isinstance(message, compat_str)
6febd1c1 803 line_count = message.count('\n') + 1
0f06bcd7 804 self._output_process.stdin.write((message + '\n').encode())
5d681e96 805 self._output_process.stdin.flush()
0f06bcd7 806 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 807 for _ in range(line_count))
6febd1c1 808 return res[:-len('\n')]
1c088fa8 809
b35496d8 810 def _write_string(self, message, out=None, only_once=False):
811 if only_once:
812 if message in self._printed_messages:
813 return
814 self._printed_messages.add(message)
815 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 816
cf4f42cb 817 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 818 """Print message to stdout"""
cf4f42cb 819 if quiet is not None:
ae6a1b95 820 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
8a82af35 821 if skip_eol is not False:
822 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
0bf9dc1e 823 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 824
825 def to_screen(self, message, skip_eol=False, quiet=None):
826 """Print message to screen if not in quiet mode"""
8bf9319e 827 if self.params.get('logger'):
43afe285 828 self.params['logger'].debug(message)
cf4f42cb 829 return
830 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
831 return
832 self._write_string(
833 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
591bb9d3 834 self._out_files.screen)
8222d8de 835
b35496d8 836 def to_stderr(self, message, only_once=False):
0760b0a7 837 """Print message to stderr"""
11b85ce6 838 assert isinstance(message, compat_str)
8bf9319e 839 if self.params.get('logger'):
43afe285
IB
840 self.params['logger'].error(message)
841 else:
5792c950 842 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 843
844 def _send_console_code(self, code):
591bb9d3 845 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 846 return
591bb9d3 847 self._write_string(code, self._out_files.console)
8222d8de 848
1e5b9a95
PH
849 def to_console_title(self, message):
850 if not self.params.get('consoletitle', False):
851 return
3efb96a6 852 message = remove_terminal_sequences(message)
4bede0d8
C
853 if compat_os_name == 'nt':
854 if ctypes.windll.kernel32.GetConsoleWindow():
855 # c_wchar_p() might not be necessary if `message` is
856 # already of type unicode()
857 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 858 else:
859 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 860
bdde425c 861 def save_console_title(self):
cf4f42cb 862 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 863 return
592b7485 864 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
865
866 def restore_console_title(self):
cf4f42cb 867 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 868 return
592b7485 869 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
870
871 def __enter__(self):
872 self.save_console_title()
873 return self
874
875 def __exit__(self, *args):
876 self.restore_console_title()
f89197d7 877
dca08720 878 if self.params.get('cookiefile') is not None:
1bab3437 879 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 880
fa9f30b8 881 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
882 """Determine action to take when a download problem appears.
883
884 Depending on if the downloader has been configured to ignore
885 download errors or not, this method may throw an exception or
886 not when errors are found, after printing the message.
887
fa9f30b8 888 @param tb If given, is additional traceback information
889 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
890 """
891 if message is not None:
892 self.to_stderr(message)
893 if self.params.get('verbose'):
894 if tb is None:
895 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 896 tb = ''
8222d8de 897 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 898 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 899 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
900 else:
901 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 902 tb = ''.join(tb_data)
c19bc311 903 if tb:
904 self.to_stderr(tb)
fa9f30b8 905 if not is_error:
906 return
b1940459 907 if not self.params.get('ignoreerrors'):
8222d8de
JMF
908 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
909 exc_info = sys.exc_info()[1].exc_info
910 else:
911 exc_info = sys.exc_info()
912 raise DownloadError(message, exc_info)
913 self._download_retcode = 1
914
19a03940 915 Styles = Namespace(
916 HEADERS='yellow',
917 EMPHASIS='light blue',
492272fe 918 FILENAME='green',
19a03940 919 ID='green',
920 DELIM='blue',
921 ERROR='red',
922 WARNING='yellow',
923 SUPPRESS='light black',
924 )
ec11a9f4 925
7578d77d 926 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 927 text = str(text)
ec11a9f4 928 if test_encoding:
929 original_text = text
5c104538 930 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
931 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 932 text = text.encode(encoding, 'ignore').decode(encoding)
933 if fallback is not None and text != original_text:
934 text = fallback
7578d77d 935 return format_text(text, f) if allow_colors else text if fallback is None else fallback
ec11a9f4 936
591bb9d3 937 def _format_out(self, *args, **kwargs):
938 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
939
ec11a9f4 940 def _format_screen(self, *args, **kwargs):
591bb9d3 941 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 942
943 def _format_err(self, *args, **kwargs):
591bb9d3 944 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 945
c84aeac6 946 def report_warning(self, message, only_once=False):
8222d8de
JMF
947 '''
948 Print the message to stderr, it will be prefixed with 'WARNING:'
949 If stderr is a tty file the 'WARNING:' will be colored
950 '''
6d07ce01
JMF
951 if self.params.get('logger') is not None:
952 self.params['logger'].warning(message)
8222d8de 953 else:
ad8915b7
PH
954 if self.params.get('no_warnings'):
955 return
ec11a9f4 956 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 957
ee8dd27a 958 def deprecation_warning(self, message):
959 if self.params.get('logger') is not None:
a44ca5a4 960 self.params['logger'].warning(f'DeprecationWarning: {message}')
ee8dd27a 961 else:
962 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
963
fa9f30b8 964 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
965 '''
966 Do the same as trouble, but prefixes the message with 'ERROR:', colored
967 in red if stderr is a tty file.
968 '''
fa9f30b8 969 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 970
b35496d8 971 def write_debug(self, message, only_once=False):
0760b0a7 972 '''Log debug message or Print message to stderr'''
973 if not self.params.get('verbose', False):
974 return
8a82af35 975 message = f'[debug] {message}'
0760b0a7 976 if self.params.get('logger'):
977 self.params['logger'].debug(message)
978 else:
b35496d8 979 self.to_stderr(message, only_once)
0760b0a7 980
8222d8de
JMF
981 def report_file_already_downloaded(self, file_name):
982 """Report file has already been fully downloaded."""
983 try:
6febd1c1 984 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 985 except UnicodeEncodeError:
6febd1c1 986 self.to_screen('[download] The file has already been downloaded')
8222d8de 987
0c3d0f51 988 def report_file_delete(self, file_name):
989 """Report that existing file will be deleted."""
990 try:
c25228e5 991 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 992 except UnicodeEncodeError:
c25228e5 993 self.to_screen('Deleting existing file')
0c3d0f51 994
319b6059 995 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 996 has_drm = info.get('_has_drm')
319b6059 997 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
998 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
999 if forced or not ignored:
1151c407 1000 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1001 expected=has_drm or ignored or expected)
88acdbc2 1002 else:
1003 self.report_warning(msg)
1004
de6000d9 1005 def parse_outtmpl(self):
bf1824b3 1006 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1007 self._parse_outtmpl()
1008 return self.params['outtmpl']
1009
1010 def _parse_outtmpl(self):
7b2c3f47 1011 sanitize = IDENTITY
bf1824b3 1012 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1013 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1014
1015 outtmpl = self.params.setdefault('outtmpl', {})
1016 if not isinstance(outtmpl, dict):
1017 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1018 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1019
21cd8fae 1020 def get_output_path(self, dir_type='', filename=None):
1021 paths = self.params.get('paths', {})
1022 assert isinstance(paths, dict)
1023 path = os.path.join(
1024 expand_path(paths.get('home', '').strip()),
1025 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1026 filename or '')
21cd8fae 1027 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1028
76a264ac 1029 @staticmethod
901130bb 1030 def _outtmpl_expandpath(outtmpl):
1031 # expand_path translates '%%' into '%' and '$$' into '$'
1032 # correspondingly that is not what we want since we need to keep
1033 # '%%' intact for template dict substitution step. Working around
1034 # with boundary-alike separator hack.
1035 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
86e5f3ed 1036 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1037
1038 # outtmpl should be expand_path'ed before template dict substitution
1039 # because meta fields may contain env variables we don't want to
1040 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1041 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1042 return expand_path(outtmpl).replace(sep, '')
1043
1044 @staticmethod
1045 def escape_outtmpl(outtmpl):
1046 ''' Escape any remaining strings like %s, %abc% etc. '''
1047 return re.sub(
1048 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1049 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1050 outtmpl)
1051
1052 @classmethod
1053 def validate_outtmpl(cls, outtmpl):
76a264ac 1054 ''' @return None or Exception object '''
7d1eb38a 1055 outtmpl = re.sub(
37893bb0 1056 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
7d1eb38a 1057 lambda mobj: f'{mobj.group(0)[:-1]}s',
1058 cls._outtmpl_expandpath(outtmpl))
76a264ac 1059 try:
7d1eb38a 1060 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1061 return None
1062 except ValueError as err:
1063 return err
1064
03b4de72 1065 @staticmethod
1066 def _copy_infodict(info_dict):
1067 info_dict = dict(info_dict)
09b49e1f 1068 info_dict.pop('__postprocessors', None)
415f8d51 1069 info_dict.pop('__pending_error', None)
03b4de72 1070 return info_dict
1071
e0fd9573 1072 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1073 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1074 @param sanitize Whether to sanitize the output as a filename.
1075 For backward compatibility, a function can also be passed
1076 """
1077
6e84b215 1078 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1079
03b4de72 1080 info_dict = self._copy_infodict(info_dict)
752cda38 1081 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1082 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1083 if info_dict.get('duration', None) is not None
1084 else None)
1d485a1a 1085 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1086 info_dict['video_autonumber'] = self._num_videos
752cda38 1087 if info_dict.get('resolution') is None:
1088 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1089
e6f21b3d 1090 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1091 # of %(field)s to %(field)0Nd for backward compatibility
1092 field_size_compat_map = {
0a5a191a 1093 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1094 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1095 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1096 }
752cda38 1097
385a27fa 1098 TMPL_DICT = {}
37893bb0 1099 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
385a27fa 1100 MATH_FUNCTIONS = {
1101 '+': float.__add__,
1102 '-': float.__sub__,
1103 }
e625be0d 1104 # Field is of the form key1.key2...
1105 # where keys (except first) can be string, int or slice
2b8a2973 1106 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1d485a1a 1107 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1108 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1d485a1a 1109 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
e625be0d 1110 (?P<negate>-)?
1d485a1a 1111 (?P<fields>{FIELD_RE})
1112 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1113 (?:>(?P<strf_format>.+?))?
34baa9fd 1114 (?P<remaining>
1115 (?P<alternate>(?<!\\),[^|&)]+)?
1116 (?:&(?P<replacement>.*?))?
1117 (?:\|(?P<default>.*?))?
1d485a1a 1118 )$''')
752cda38 1119
2b8a2973 1120 def _traverse_infodict(k):
1121 k = k.split('.')
1122 if k[0] == '':
1123 k.pop(0)
1124 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
76a264ac 1125
752cda38 1126 def get_value(mdict):
1127 # Object traversal
2b8a2973 1128 value = _traverse_infodict(mdict['fields'])
752cda38 1129 # Negative
1130 if mdict['negate']:
1131 value = float_or_none(value)
1132 if value is not None:
1133 value *= -1
1134 # Do maths
385a27fa 1135 offset_key = mdict['maths']
1136 if offset_key:
752cda38 1137 value = float_or_none(value)
1138 operator = None
385a27fa 1139 while offset_key:
1140 item = re.match(
1141 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1142 offset_key).group(0)
1143 offset_key = offset_key[len(item):]
1144 if operator is None:
752cda38 1145 operator = MATH_FUNCTIONS[item]
385a27fa 1146 continue
1147 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1148 offset = float_or_none(item)
1149 if offset is None:
2b8a2973 1150 offset = float_or_none(_traverse_infodict(item))
385a27fa 1151 try:
1152 value = operator(value, multiplier * offset)
1153 except (TypeError, ZeroDivisionError):
1154 return None
1155 operator = None
752cda38 1156 # Datetime formatting
1157 if mdict['strf_format']:
7c37ff97 1158 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1159
1160 return value
1161
b868936c 1162 na = self.params.get('outtmpl_na_placeholder', 'NA')
1163
e0fd9573 1164 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1165 return sanitize_filename(str(value), restricted=restricted, is_id=(
1166 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1167 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1168 else NO_DEFAULT))
e0fd9573 1169
1170 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1171 sanitize = bool(sanitize)
1172
6e84b215 1173 def _dumpjson_default(obj):
1174 if isinstance(obj, (set, LazyList)):
1175 return list(obj)
adbc4ec4 1176 return repr(obj)
6e84b215 1177
752cda38 1178 def create_key(outer_mobj):
1179 if not outer_mobj.group('has_key'):
b836dc94 1180 return outer_mobj.group(0)
752cda38 1181 key = outer_mobj.group('key')
752cda38 1182 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1183 initial_field = mobj.group('fields') if mobj else ''
e978789f 1184 value, replacement, default = None, None, na
7c37ff97 1185 while mobj:
e625be0d 1186 mobj = mobj.groupdict()
7c37ff97 1187 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1188 value = get_value(mobj)
e978789f 1189 replacement = mobj['replacement']
7c37ff97 1190 if value is None and mobj['alternate']:
34baa9fd 1191 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1192 else:
1193 break
752cda38 1194
b868936c 1195 fmt = outer_mobj.group('format')
752cda38 1196 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1197 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1198
e978789f 1199 value = default if value is None else value if replacement is None else replacement
752cda38 1200
4476d2c7 1201 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1202 str_fmt = f'{fmt[:-1]}s'
524e2e4f 1203 if fmt[-1] == 'l': # list
4476d2c7 1204 delim = '\n' if '#' in flags else ', '
9e907ebd 1205 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1206 elif fmt[-1] == 'j': # json
4476d2c7 1207 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
524e2e4f 1208 elif fmt[-1] == 'q': # quoted
4476d2c7 1209 value = map(str, variadic(value) if '#' in flags else [value])
1210 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1211 elif fmt[-1] == 'B': # bytes
0f06bcd7 1212 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1213 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1214 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1215 value, fmt = unicodedata.normalize(
1216 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1217 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1218 value), str_fmt
e0fd9573 1219 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1220 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1221 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1222 factor=1024 if '#' in flags else 1000)
37893bb0 1223 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1224 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1225 elif fmt[-1] == 'c':
524e2e4f 1226 if value:
1227 value = str(value)[0]
76a264ac 1228 else:
524e2e4f 1229 fmt = str_fmt
76a264ac 1230 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1231 value = float_or_none(value)
752cda38 1232 if value is None:
1233 value, fmt = default, 's'
901130bb 1234
752cda38 1235 if sanitize:
1236 if fmt[-1] == 'r':
1237 # If value is an object, sanitize might convert it to a string
1238 # So we convert it to repr first
7d1eb38a 1239 value, fmt = repr(value), str_fmt
639f1cea 1240 if fmt[-1] in 'csr':
e0fd9573 1241 value = sanitizer(initial_field, value)
901130bb 1242
b868936c 1243 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1244 TMPL_DICT[key] = value
b868936c 1245 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1246
385a27fa 1247 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1248
819e0531 1249 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1250 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1251 return self.escape_outtmpl(outtmpl) % info_dict
1252
5127e92a 1253 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1254 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1255 if outtmpl is None:
bf1824b3 1256 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1257 try:
5127e92a 1258 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1259 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1260 if not filename:
1261 return None
15da37c7 1262
5127e92a 1263 if tmpl_type in ('', 'temp'):
6a0546e3 1264 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1265 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1266 filename = replace_extension(filename, ext, final_ext)
5127e92a 1267 elif tmpl_type:
6a0546e3 1268 force_ext = OUTTMPL_TYPES[tmpl_type]
1269 if force_ext:
1270 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1271
bdc3fd2f
U
1272 # https://github.com/blackjack4494/youtube-dlc/issues/85
1273 trim_file_name = self.params.get('trim_file_name', False)
1274 if trim_file_name:
5c22c63d 1275 no_ext, *ext = filename.rsplit('.', 2)
1276 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1277
0202b52a 1278 return filename
8222d8de 1279 except ValueError as err:
6febd1c1 1280 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1281 return None
1282
5127e92a 1283 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1284 """Generate the output filename"""
1285 if outtmpl:
1286 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1287 dir_type = None
1288 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1289 if not filename and dir_type not in ('', 'temp'):
1290 return ''
de6000d9 1291
c84aeac6 1292 if warn:
21cd8fae 1293 if not self.params.get('paths'):
de6000d9 1294 pass
1295 elif filename == '-':
c84aeac6 1296 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1297 elif os.path.isabs(filename):
c84aeac6 1298 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1299 if filename == '-' or not filename:
1300 return filename
1301
21cd8fae 1302 return self.get_output_path(dir_type, filename)
0202b52a 1303
120fe513 1304 def _match_entry(self, info_dict, incomplete=False, silent=False):
ecdec191 1305 """ Returns None if the file should be downloaded """
8222d8de 1306
c77495e3 1307 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1308
8b0d7497 1309 def check_filter():
8b0d7497 1310 if 'title' in info_dict:
1311 # This can happen when we're just evaluating the playlist
1312 title = info_dict['title']
1313 matchtitle = self.params.get('matchtitle', False)
1314 if matchtitle:
1315 if not re.search(matchtitle, title, re.IGNORECASE):
1316 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1317 rejecttitle = self.params.get('rejecttitle', False)
1318 if rejecttitle:
1319 if re.search(rejecttitle, title, re.IGNORECASE):
1320 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1321 date = info_dict.get('upload_date')
1322 if date is not None:
1323 dateRange = self.params.get('daterange', DateRange())
1324 if date not in dateRange:
86e5f3ed 1325 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1326 view_count = info_dict.get('view_count')
1327 if view_count is not None:
1328 min_views = self.params.get('min_views')
1329 if min_views is not None and view_count < min_views:
1330 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1331 max_views = self.params.get('max_views')
1332 if max_views is not None and view_count > max_views:
1333 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1334 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1335 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1336
8f18aca8 1337 match_filter = self.params.get('match_filter')
1338 if match_filter is not None:
1339 try:
1340 ret = match_filter(info_dict, incomplete=incomplete)
1341 except TypeError:
1342 # For backward compatibility
1343 ret = None if incomplete else match_filter(info_dict)
492272fe 1344 if ret is NO_DEFAULT:
1345 while True:
1346 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1347 reply = input(self._format_screen(
1348 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1349 if reply in {'y', ''}:
1350 return None
1351 elif reply == 'n':
1352 return f'Skipping {video_title}'
492272fe 1353 elif ret is not None:
8f18aca8 1354 return ret
8b0d7497 1355 return None
1356
c77495e3 1357 if self.in_download_archive(info_dict):
1358 reason = '%s has already been recorded in the archive' % video_title
1359 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1360 else:
1361 reason = check_filter()
1362 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1363 if reason is not None:
120fe513 1364 if not silent:
1365 self.to_screen('[download] ' + reason)
c77495e3 1366 if self.params.get(break_opt, False):
1367 raise break_err()
8b0d7497 1368 return reason
fe7e0c98 1369
b6c45014
JMF
1370 @staticmethod
1371 def add_extra_info(info_dict, extra_info):
1372 '''Set the keys from extra_info in info dict if they are missing'''
1373 for key, value in extra_info.items():
1374 info_dict.setdefault(key, value)
1375
409e1828 1376 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1377 process=True, force_generic_extractor=False):
41d1cca3 1378 """
1379 Return a list with a dictionary for each video extracted.
1380
1381 Arguments:
1382 url -- URL to extract
1383
1384 Keyword arguments:
1385 download -- whether to download videos during extraction
1386 ie_key -- extractor key hint
1387 extra_info -- dictionary containing the extra values to add to each result
1388 process -- whether to resolve all unresolved references (URLs, playlist items),
1389 must be True for download to work.
1390 force_generic_extractor -- force using the generic extractor
1391 """
fe7e0c98 1392
409e1828 1393 if extra_info is None:
1394 extra_info = {}
1395
61aa5ba3 1396 if not ie_key and force_generic_extractor:
d22dec74
S
1397 ie_key = 'Generic'
1398
8222d8de 1399 if ie_key:
8b7491c8 1400 ies = {ie_key: self._get_info_extractor_class(ie_key)}
8222d8de
JMF
1401 else:
1402 ies = self._ies
1403
8b7491c8 1404 for ie_key, ie in ies.items():
8222d8de
JMF
1405 if not ie.suitable(url):
1406 continue
1407
1408 if not ie.working():
6febd1c1
PH
1409 self.report_warning('The program functionality for this site has been marked as broken, '
1410 'and will probably not work.')
8222d8de 1411
1151c407 1412 temp_id = ie.get_temp_id(url)
a0566bbf 1413 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
5e5be0c0 1414 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1415 if self.params.get('break_on_existing', False):
1416 raise ExistingVideoReached()
a0566bbf 1417 break
8b7491c8 1418 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
a0566bbf 1419 else:
1420 self.report_error('no suitable InfoExtractor for URL %s' % url)
1421
7e88d7d7 1422 def _handle_extraction_exceptions(func):
b5ae35ee 1423 @functools.wraps(func)
a0566bbf 1424 def wrapper(self, *args, **kwargs):
6da22e7d 1425 while True:
1426 try:
1427 return func(self, *args, **kwargs)
1428 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1429 raise
6da22e7d 1430 except ReExtractInfo as e:
1431 if e.expected:
1432 self.to_screen(f'{e}; Re-extracting data')
1433 else:
1434 self.to_stderr('\r')
1435 self.report_warning(f'{e}; Re-extracting data')
1436 continue
1437 except GeoRestrictedError as e:
1438 msg = e.msg
1439 if e.countries:
1440 msg += '\nThis video is available in %s.' % ', '.join(
1441 map(ISO3166Utils.short2full, e.countries))
1442 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1443 self.report_error(msg)
1444 except ExtractorError as e: # An error we somewhat expected
1445 self.report_error(str(e), e.format_traceback())
1446 except Exception as e:
1447 if self.params.get('ignoreerrors'):
1448 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1449 else:
1450 raise
1451 break
a0566bbf 1452 return wrapper
1453
f2ebc5c7 1454 def _wait_for_video(self, ie_result):
1455 if (not self.params.get('wait_for_video')
1456 or ie_result.get('_type', 'video') != 'video'
1457 or ie_result.get('formats') or ie_result.get('url')):
1458 return
1459
1460 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1461 last_msg = ''
1462
1463 def progress(msg):
1464 nonlocal last_msg
1465 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1466 last_msg = msg
1467
1468 min_wait, max_wait = self.params.get('wait_for_video')
1469 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1470 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1471 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1472 self.report_warning('Release time of video is not known')
1473 elif (diff or 0) <= 0:
1474 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1475 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1476 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1477
1478 wait_till = time.time() + diff
1479 try:
1480 while True:
1481 diff = wait_till - time.time()
1482 if diff <= 0:
1483 progress('')
1484 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1485 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1486 time.sleep(1)
1487 except KeyboardInterrupt:
1488 progress('')
1489 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1490 except BaseException as e:
1491 if not isinstance(e, ReExtractInfo):
1492 self.to_screen('')
1493 raise
1494
7e88d7d7 1495 @_handle_extraction_exceptions
58f197b7 1496 def __extract_info(self, url, ie, download, extra_info, process):
a0566bbf 1497 ie_result = ie.extract(url)
1498 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1499 return
1500 if isinstance(ie_result, list):
1501 # Backwards compatibility: old IE result format
1502 ie_result = {
1503 '_type': 'compat_list',
1504 'entries': ie_result,
1505 }
e37d0efb 1506 if extra_info.get('original_url'):
1507 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1508 self.add_default_extra_info(ie_result, ie, url)
1509 if process:
f2ebc5c7 1510 self._wait_for_video(ie_result)
a0566bbf 1511 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1512 else:
a0566bbf 1513 return ie_result
fe7e0c98 1514
ea38e55f 1515 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1516 if url is not None:
1517 self.add_extra_info(ie_result, {
1518 'webpage_url': url,
1519 'original_url': url,
57ebfca3 1520 })
1521 webpage_url = ie_result.get('webpage_url')
1522 if webpage_url:
1523 self.add_extra_info(ie_result, {
1524 'webpage_url_basename': url_basename(webpage_url),
1525 'webpage_url_domain': get_domain(webpage_url),
6033d980 1526 })
1527 if ie is not None:
1528 self.add_extra_info(ie_result, {
1529 'extractor': ie.IE_NAME,
1530 'extractor_key': ie.ie_key(),
1531 })
ea38e55f 1532
58adec46 1533 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1534 """
1535 Take the result of the ie(may be modified) and resolve all unresolved
1536 references (URLs, playlist items).
1537
1538 It will also download the videos if 'download'.
1539 Returns the resolved ie_result.
1540 """
58adec46 1541 if extra_info is None:
1542 extra_info = {}
e8ee972c
PH
1543 result_type = ie_result.get('_type', 'video')
1544
057a5206 1545 if result_type in ('url', 'url_transparent'):
134c6ea8 1546 ie_result['url'] = sanitize_url(ie_result['url'])
e37d0efb 1547 if ie_result.get('original_url'):
1548 extra_info.setdefault('original_url', ie_result['original_url'])
1549
057a5206 1550 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1551 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1552 or extract_flat is True):
ecb54191 1553 info_copy = ie_result.copy()
6033d980 1554 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1555 if ie and not ie_result.get('id'):
4614bc22 1556 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1557 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1558 self.add_extra_info(info_copy, extra_info)
b5475f11 1559 info_copy, _ = self.pre_process(info_copy)
ecb54191 1560 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
415f8d51 1561 self._raise_pending_errors(info_copy)
4614bc22 1562 if self.params.get('force_write_download_archive', False):
1563 self.record_download_archive(info_copy)
e8ee972c
PH
1564 return ie_result
1565
8222d8de 1566 if result_type == 'video':
b6c45014 1567 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1568 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1569 self._raise_pending_errors(ie_result)
28b0eb0f 1570 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1571 if additional_urls:
e9f4ccd1 1572 # TODO: Improve MetadataParserPP to allow setting a list
9c2b75b5 1573 if isinstance(additional_urls, compat_str):
1574 additional_urls = [additional_urls]
1575 self.to_screen(
1576 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1577 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1578 ie_result['additional_entries'] = [
1579 self.extract_info(
b69fd25c 1580 url, download, extra_info=extra_info,
9c2b75b5 1581 force_generic_extractor=self.params.get('force_generic_extractor'))
1582 for url in additional_urls
1583 ]
1584 return ie_result
8222d8de
JMF
1585 elif result_type == 'url':
1586 # We have to add extra_info to the results because it may be
1587 # contained in a playlist
07cce701 1588 return self.extract_info(
1589 ie_result['url'], download,
1590 ie_key=ie_result.get('ie_key'),
1591 extra_info=extra_info)
7fc3fa05
PH
1592 elif result_type == 'url_transparent':
1593 # Use the information from the embedding page
1594 info = self.extract_info(
1595 ie_result['url'], ie_key=ie_result.get('ie_key'),
1596 extra_info=extra_info, download=False, process=False)
1597
1640eb09
S
1598 # extract_info may return None when ignoreerrors is enabled and
1599 # extraction failed with an error, don't crash and return early
1600 # in this case
1601 if not info:
1602 return info
1603
3975b4d2 1604 exempted_fields = {'_type', 'url', 'ie_key'}
1605 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1606 # For video clips, the id etc of the clip extractor should be used
1607 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1608
412c617d 1609 new_result = info.copy()
3975b4d2 1610 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1611
0563f7ac
S
1612 # Extracted info may not be a video result (i.e.
1613 # info.get('_type', 'video') != video) but rather an url or
1614 # url_transparent. In such cases outer metadata (from ie_result)
1615 # should be propagated to inner one (info). For this to happen
1616 # _type of info should be overridden with url_transparent. This
067aa17e 1617 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1618 if new_result.get('_type') == 'url':
1619 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1620
1621 return self.process_ie_result(
1622 new_result, download=download, extra_info=extra_info)
40fcba5e 1623 elif result_type in ('playlist', 'multi_video'):
30a074c2 1624 # Protect from infinite recursion due to recursively nested playlists
1625 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1626 webpage_url = ie_result['webpage_url']
1627 if webpage_url in self._playlist_urls:
7e85e872 1628 self.to_screen(
30a074c2 1629 '[download] Skipping already downloaded playlist: %s'
1630 % ie_result.get('title') or ie_result.get('id'))
1631 return
7e85e872 1632
30a074c2 1633 self._playlist_level += 1
1634 self._playlist_urls.add(webpage_url)
03f83004 1635 self._fill_common_fields(ie_result, False)
bc516a3f 1636 self._sanitize_thumbnails(ie_result)
30a074c2 1637 try:
1638 return self.__process_playlist(ie_result, download)
1639 finally:
1640 self._playlist_level -= 1
1641 if not self._playlist_level:
1642 self._playlist_urls.clear()
8222d8de 1643 elif result_type == 'compat_list':
c9bf4114
PH
1644 self.report_warning(
1645 'Extractor %s returned a compat_list result. '
1646 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1647
8222d8de 1648 def _fixup(r):
b868936c 1649 self.add_extra_info(r, {
1650 'extractor': ie_result['extractor'],
1651 'webpage_url': ie_result['webpage_url'],
1652 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1653 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1654 'extractor_key': ie_result['extractor_key'],
1655 })
8222d8de
JMF
1656 return r
1657 ie_result['entries'] = [
b6c45014 1658 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1659 for r in ie_result['entries']
1660 ]
1661 return ie_result
1662 else:
1663 raise Exception('Invalid result type: %s' % result_type)
1664
e92caff5 1665 def _ensure_dir_exists(self, path):
1666 return make_dir(path, self.report_error)
1667
3b603dbd 1668 @staticmethod
1669 def _playlist_infodict(ie_result, **kwargs):
1670 return {
1671 **ie_result,
1672 'playlist': ie_result.get('title') or ie_result.get('id'),
1673 'playlist_id': ie_result.get('id'),
1674 'playlist_title': ie_result.get('title'),
1675 'playlist_uploader': ie_result.get('uploader'),
1676 'playlist_uploader_id': ie_result.get('uploader_id'),
1677 'playlist_index': 0,
1678 **kwargs,
1679 }
1680
30a074c2 1681 def __process_playlist(self, ie_result, download):
7e88d7d7 1682 """Process each entry in the playlist"""
1683 title = ie_result.get('title') or ie_result.get('id') or '<Untitled>'
1684 self.to_screen(f'[download] Downloading playlist: {title}')
f0d785d3 1685
7e88d7d7 1686 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1687 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1688
1689 lazy = self.params.get('lazy_playlist')
1690 if lazy:
1691 resolved_entries, n_entries = [], 'N/A'
1692 ie_result['requested_entries'], ie_result['entries'] = None, None
1693 else:
1694 entries = resolved_entries = list(entries)
1695 n_entries = len(resolved_entries)
1696 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1697 if not ie_result.get('playlist_count'):
1698 # Better to do this after potentially exhausting entries
1699 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1700
e08a85d8 1701 _infojson_written = False
0bfc53d0 1702 write_playlist_files = self.params.get('allow_playlist_files', True)
1703 if write_playlist_files and self.params.get('list_thumbnails'):
1704 self.list_thumbnails(ie_result)
1705 if write_playlist_files and not self.params.get('simulate'):
7e9a6125 1706 ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
e08a85d8 1707 _infojson_written = self._write_info_json(
1708 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1709 if _infojson_written is None:
80c03fa9 1710 return
1711 if self._write_description('playlist', ie_result,
1712 self.prepare_filename(ie_copy, 'pl_description')) is None:
1713 return
681de68e 1714 # TODO: This should be passed to ThumbnailsConvertor if necessary
80c03fa9 1715 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1716
7e9a6125 1717 if lazy:
1718 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1719 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1720 elif self.params.get('playlistreverse'):
1721 entries.reverse()
1722 elif self.params.get('playlistrandom'):
30a074c2 1723 random.shuffle(entries)
1724
7e88d7d7 1725 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1726 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1727
26e2805c 1728 failures = 0
1729 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1730 for i, (playlist_index, entry) in enumerate(entries):
1731 if lazy:
1732 resolved_entries.append((playlist_index, entry))
1733
7e88d7d7 1734 # TODO: Add auto-generated fields
1735 if self._match_entry(entry, incomplete=True) is not None:
1736 continue
1737
19a03940 1738 self.to_screen('[download] Downloading video %s of %s' % (
7e9a6125 1739 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
7e88d7d7 1740
1741 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1742 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1743 playlist_index = ie_result['requested_entries'][i]
1744
7e88d7d7 1745 entry_result = self.__process_iterable_entry(entry, download, {
7e9a6125 1746 'n_entries': int_or_none(n_entries),
1747 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
f0d785d3 1748 'playlist_count': ie_result.get('playlist_count'),
71729754 1749 'playlist_index': playlist_index,
7e9a6125 1750 'playlist_autonumber': i + 1,
7e88d7d7 1751 'playlist': title,
30a074c2 1752 'playlist_id': ie_result.get('id'),
1753 'playlist_title': ie_result.get('title'),
1754 'playlist_uploader': ie_result.get('uploader'),
1755 'playlist_uploader_id': ie_result.get('uploader_id'),
30a074c2 1756 'extractor': ie_result['extractor'],
1757 'webpage_url': ie_result['webpage_url'],
1758 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1759 'webpage_url_domain': get_domain(ie_result['webpage_url']),
30a074c2 1760 'extractor_key': ie_result['extractor_key'],
7e88d7d7 1761 })
26e2805c 1762 if not entry_result:
1763 failures += 1
1764 if failures >= max_failures:
1765 self.report_error(
7e88d7d7 1766 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1767 break
7e9a6125 1768 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1769
1770 # Update with processed data
7e9a6125 1771 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
e08a85d8 1772
1773 # Write the updated info to json
cb96c5be 1774 if _infojson_written is True and self._write_info_json(
e08a85d8 1775 'updated playlist', ie_result,
1776 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1777 return
ca30f449 1778
ed5835b4 1779 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1780 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1781 return ie_result
1782
7e88d7d7 1783 @_handle_extraction_exceptions
a0566bbf 1784 def __process_iterable_entry(self, entry, download, extra_info):
1785 return self.process_ie_result(
1786 entry, download=download, extra_info=extra_info)
1787
67134eab
JMF
1788 def _build_format_filter(self, filter_spec):
1789 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1790
1791 OPERATORS = {
1792 '<': operator.lt,
1793 '<=': operator.le,
1794 '>': operator.gt,
1795 '>=': operator.ge,
1796 '=': operator.eq,
1797 '!=': operator.ne,
1798 }
67134eab 1799 operator_rex = re.compile(r'''(?x)\s*
187986a8 1800 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1801 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1802 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1803 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1804 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1805 if m:
1806 try:
1807 comparison_value = int(m.group('value'))
1808 except ValueError:
1809 comparison_value = parse_filesize(m.group('value'))
1810 if comparison_value is None:
1811 comparison_value = parse_filesize(m.group('value') + 'B')
1812 if comparison_value is None:
1813 raise ValueError(
1814 'Invalid value %r in format specification %r' % (
67134eab 1815 m.group('value'), filter_spec))
9ddb6925
S
1816 op = OPERATORS[m.group('op')]
1817
083c9df9 1818 if not m:
9ddb6925
S
1819 STR_OPERATORS = {
1820 '=': operator.eq,
10d33b34
YCH
1821 '^=': lambda attr, value: attr.startswith(value),
1822 '$=': lambda attr, value: attr.endswith(value),
1823 '*=': lambda attr, value: value in attr,
1ce9a3cb 1824 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1825 }
187986a8 1826 str_operator_rex = re.compile(r'''(?x)\s*
1827 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1828 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1829 (?P<quote>["'])?
1830 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1831 (?(quote)(?P=quote))\s*
9ddb6925 1832 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 1833 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 1834 if m:
1ce9a3cb
LF
1835 if m.group('op') == '~=':
1836 comparison_value = re.compile(m.group('value'))
1837 else:
1838 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
1839 str_op = STR_OPERATORS[m.group('op')]
1840 if m.group('negation'):
e118a879 1841 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
1842 else:
1843 op = str_op
083c9df9 1844
9ddb6925 1845 if not m:
187986a8 1846 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
1847
1848 def _filter(f):
1849 actual_value = f.get(m.group('key'))
1850 if actual_value is None:
1851 return m.group('none_inclusive')
1852 return op(actual_value, comparison_value)
67134eab
JMF
1853 return _filter
1854
9f1a1c36 1855 def _check_formats(self, formats):
1856 for f in formats:
1857 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 1858 path = self.get_output_path('temp')
1859 if not self._ensure_dir_exists(f'{path}/'):
1860 continue
1861 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 1862 temp_file.close()
1863 try:
1864 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 1865 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 1866 success = False
1867 finally:
1868 if os.path.exists(temp_file.name):
1869 try:
1870 os.remove(temp_file.name)
1871 except OSError:
1872 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1873 if success:
1874 yield f
1875 else:
1876 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1877
0017d9ad 1878 def _default_format_spec(self, info_dict, download=True):
0017d9ad 1879
af0f7428
S
1880 def can_merge():
1881 merger = FFmpegMergerPP(self)
1882 return merger.available and merger.can_merge()
1883
91ebc640 1884 prefer_best = (
b7b04c78 1885 not self.params.get('simulate')
91ebc640 1886 and download
1887 and (
1888 not can_merge()
21633673 1889 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 1890 or self.params['outtmpl']['default'] == '-'))
53ed7066 1891 compat = (
1892 prefer_best
1893 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 1894 or 'format-spec' in self.params['compat_opts'])
91ebc640 1895
1896 return (
53ed7066 1897 'best/bestvideo+bestaudio' if prefer_best
1898 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 1899 else 'bestvideo+bestaudio/best')
0017d9ad 1900
67134eab
JMF
1901 def build_format_selector(self, format_spec):
1902 def syntax_error(note, start):
1903 message = (
1904 'Invalid format specification: '
86e5f3ed 1905 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
1906 return SyntaxError(message)
1907
1908 PICKFIRST = 'PICKFIRST'
1909 MERGE = 'MERGE'
1910 SINGLE = 'SINGLE'
0130afb7 1911 GROUP = 'GROUP'
67134eab
JMF
1912 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1913
91ebc640 1914 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1915 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 1916
9f1a1c36 1917 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 1918
67134eab
JMF
1919 def _parse_filter(tokens):
1920 filter_parts = []
1921 for type, string, start, _, _ in tokens:
1922 if type == tokenize.OP and string == ']':
1923 return ''.join(filter_parts)
1924 else:
1925 filter_parts.append(string)
1926
232541df 1927 def _remove_unused_ops(tokens):
17cc1534 1928 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
1929 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1930 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1931 last_string, last_start, last_end, last_line = None, None, None, None
1932 for type, string, start, end, line in tokens:
1933 if type == tokenize.OP and string == '[':
1934 if last_string:
1935 yield tokenize.NAME, last_string, last_start, last_end, last_line
1936 last_string = None
1937 yield type, string, start, end, line
1938 # everything inside brackets will be handled by _parse_filter
1939 for type, string, start, end, line in tokens:
1940 yield type, string, start, end, line
1941 if type == tokenize.OP and string == ']':
1942 break
1943 elif type == tokenize.OP and string in ALLOWED_OPS:
1944 if last_string:
1945 yield tokenize.NAME, last_string, last_start, last_end, last_line
1946 last_string = None
1947 yield type, string, start, end, line
1948 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1949 if not last_string:
1950 last_string = string
1951 last_start = start
1952 last_end = end
1953 else:
1954 last_string += string
1955 if last_string:
1956 yield tokenize.NAME, last_string, last_start, last_end, last_line
1957
cf2ac6df 1958 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
1959 selectors = []
1960 current_selector = None
1961 for type, string, start, _, _ in tokens:
1962 # ENCODING is only defined in python 3.x
1963 if type == getattr(tokenize, 'ENCODING', None):
1964 continue
1965 elif type in [tokenize.NAME, tokenize.NUMBER]:
1966 current_selector = FormatSelector(SINGLE, string, [])
1967 elif type == tokenize.OP:
cf2ac6df
JMF
1968 if string == ')':
1969 if not inside_group:
1970 # ')' will be handled by the parentheses group
1971 tokens.restore_last_token()
67134eab 1972 break
cf2ac6df 1973 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
1974 tokens.restore_last_token()
1975 break
cf2ac6df
JMF
1976 elif inside_choice and string == ',':
1977 tokens.restore_last_token()
1978 break
1979 elif string == ',':
0a31a350
JMF
1980 if not current_selector:
1981 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1982 selectors.append(current_selector)
1983 current_selector = None
1984 elif string == '/':
d96d604e
JMF
1985 if not current_selector:
1986 raise syntax_error('"/" must follow a format selector', start)
67134eab 1987 first_choice = current_selector
cf2ac6df 1988 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1989 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1990 elif string == '[':
1991 if not current_selector:
1992 current_selector = FormatSelector(SINGLE, 'best', [])
1993 format_filter = _parse_filter(tokens)
1994 current_selector.filters.append(format_filter)
0130afb7
JMF
1995 elif string == '(':
1996 if current_selector:
1997 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1998 group = _parse_format_selection(tokens, inside_group=True)
1999 current_selector = FormatSelector(GROUP, group, [])
67134eab 2000 elif string == '+':
d03cfdce 2001 if not current_selector:
2002 raise syntax_error('Unexpected "+"', start)
2003 selector_1 = current_selector
2004 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2005 if not selector_2:
2006 raise syntax_error('Expected a selector', start)
2007 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2008 else:
86e5f3ed 2009 raise syntax_error(f'Operator not recognized: "{string}"', start)
67134eab
JMF
2010 elif type == tokenize.ENDMARKER:
2011 break
2012 if current_selector:
2013 selectors.append(current_selector)
2014 return selectors
2015
f8d4ad9a 2016 def _merge(formats_pair):
2017 format_1, format_2 = formats_pair
2018
2019 formats_info = []
2020 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2021 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2022
2023 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2024 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2025 for (i, fmt_info) in enumerate(formats_info):
551f9388 2026 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2027 formats_info.pop(i)
2028 continue
2029 for aud_vid in ['audio', 'video']:
f8d4ad9a 2030 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2031 if get_no_more[aud_vid]:
2032 formats_info.pop(i)
f5510afe 2033 break
f8d4ad9a 2034 get_no_more[aud_vid] = True
2035
2036 if len(formats_info) == 1:
2037 return formats_info[0]
2038
2039 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2040 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2041
2042 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2043 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2044
2045 output_ext = self.params.get('merge_output_format')
2046 if not output_ext:
2047 if the_only_video:
2048 output_ext = the_only_video['ext']
2049 elif the_only_audio and not video_fmts:
2050 output_ext = the_only_audio['ext']
2051 else:
2052 output_ext = 'mkv'
2053
975a0d0d 2054 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2055
f8d4ad9a 2056 new_dict = {
2057 'requested_formats': formats_info,
975a0d0d 2058 'format': '+'.join(filtered('format')),
2059 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2060 'ext': output_ext,
975a0d0d 2061 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2062 'language': '+'.join(orderedSet(filtered('language'))) or None,
2063 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2064 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2065 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2066 }
2067
2068 if the_only_video:
2069 new_dict.update({
2070 'width': the_only_video.get('width'),
2071 'height': the_only_video.get('height'),
2072 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2073 'fps': the_only_video.get('fps'),
49a57e70 2074 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2075 'vcodec': the_only_video.get('vcodec'),
2076 'vbr': the_only_video.get('vbr'),
2077 'stretched_ratio': the_only_video.get('stretched_ratio'),
2078 })
2079
2080 if the_only_audio:
2081 new_dict.update({
2082 'acodec': the_only_audio.get('acodec'),
2083 'abr': the_only_audio.get('abr'),
975a0d0d 2084 'asr': the_only_audio.get('asr'),
f8d4ad9a 2085 })
2086
2087 return new_dict
2088
e8e73840 2089 def _check_formats(formats):
981052c9 2090 if not check_formats:
2091 yield from formats
b5ac45b1 2092 return
9f1a1c36 2093 yield from self._check_formats(formats)
e8e73840 2094
67134eab 2095 def _build_selector_function(selector):
909d24dd 2096 if isinstance(selector, list): # ,
67134eab
JMF
2097 fs = [_build_selector_function(s) for s in selector]
2098
317f7ab6 2099 def selector_function(ctx):
67134eab 2100 for f in fs:
981052c9 2101 yield from f(ctx)
67134eab 2102 return selector_function
909d24dd 2103
2104 elif selector.type == GROUP: # ()
0130afb7 2105 selector_function = _build_selector_function(selector.selector)
909d24dd 2106
2107 elif selector.type == PICKFIRST: # /
67134eab
JMF
2108 fs = [_build_selector_function(s) for s in selector.selector]
2109
317f7ab6 2110 def selector_function(ctx):
67134eab 2111 for f in fs:
317f7ab6 2112 picked_formats = list(f(ctx))
67134eab
JMF
2113 if picked_formats:
2114 return picked_formats
2115 return []
67134eab 2116
981052c9 2117 elif selector.type == MERGE: # +
2118 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2119
2120 def selector_function(ctx):
adbc4ec4 2121 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2122 yield _merge(pair)
2123
909d24dd 2124 elif selector.type == SINGLE: # atom
598d185d 2125 format_spec = selector.selector or 'best'
909d24dd 2126
f8d4ad9a 2127 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2128 if format_spec == 'all':
2129 def selector_function(ctx):
9222c381 2130 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2131 elif format_spec == 'mergeall':
2132 def selector_function(ctx):
316f2650 2133 formats = list(_check_formats(
2134 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2135 if not formats:
2136 return
921b76ca 2137 merged_format = formats[-1]
2138 for f in formats[-2::-1]:
f8d4ad9a 2139 merged_format = _merge((merged_format, f))
2140 yield merged_format
909d24dd 2141
2142 else:
85e801a9 2143 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2144 mobj = re.match(
2145 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2146 format_spec)
2147 if mobj is not None:
2148 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2149 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2150 format_type = (mobj.group('type') or [None])[0]
2151 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2152 format_modified = mobj.group('mod') is not None
909d24dd 2153
2154 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2155 _filter_f = (
eff63539 2156 (lambda f: f.get('%scodec' % format_type) != 'none')
2157 if format_type and format_modified # bv*, ba*, wv*, wa*
2158 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2159 if format_type # bv, ba, wv, wa
2160 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2161 if not format_modified # b, w
8326b00a 2162 else lambda f: True) # b*, w*
2163 filter_f = lambda f: _filter_f(f) and (
2164 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2165 else:
48ee10ee 2166 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2167 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2168 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2169 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2170 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2171 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2172 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2173 else:
b5ae35ee 2174 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2175
2176 def selector_function(ctx):
2177 formats = list(ctx['formats'])
909d24dd 2178 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2179 if not matches:
2180 if format_fallback and ctx['incomplete_formats']:
2181 # for extractors with incomplete formats (audio only (soundcloud)
2182 # or video only (imgur)) best/worst will fallback to
2183 # best/worst {video,audio}-only format
2184 matches = formats
2185 elif seperate_fallback and not ctx['has_merged_format']:
2186 # for compatibility with youtube-dl when there is no pre-merged format
2187 matches = list(filter(seperate_fallback, formats))
981052c9 2188 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2189 try:
e8e73840 2190 yield matches[format_idx - 1]
4abea8ca 2191 except LazyList.IndexError:
981052c9 2192 return
083c9df9 2193
67134eab 2194 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2195
317f7ab6 2196 def final_selector(ctx):
adbc4ec4 2197 ctx_copy = dict(ctx)
67134eab 2198 for _filter in filters:
317f7ab6
S
2199 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2200 return selector_function(ctx_copy)
67134eab 2201 return final_selector
083c9df9 2202
0f06bcd7 2203 stream = io.BytesIO(format_spec.encode())
0130afb7 2204 try:
f9934b96 2205 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2206 except tokenize.TokenError:
2207 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2208
86e5f3ed 2209 class TokenIterator:
0130afb7
JMF
2210 def __init__(self, tokens):
2211 self.tokens = tokens
2212 self.counter = 0
2213
2214 def __iter__(self):
2215 return self
2216
2217 def __next__(self):
2218 if self.counter >= len(self.tokens):
2219 raise StopIteration()
2220 value = self.tokens[self.counter]
2221 self.counter += 1
2222 return value
2223
2224 next = __next__
2225
2226 def restore_last_token(self):
2227 self.counter -= 1
2228
2229 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2230 return _build_selector_function(parsed_selector)
a9c58ad9 2231
e5660ee6 2232 def _calc_headers(self, info_dict):
8b7539d2 2233 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
e5660ee6 2234
c487cf00 2235 cookies = self._calc_cookies(info_dict['url'])
e5660ee6
JMF
2236 if cookies:
2237 res['Cookie'] = cookies
2238
0016b84e
S
2239 if 'X-Forwarded-For' not in res:
2240 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2241 if x_forwarded_for_ip:
2242 res['X-Forwarded-For'] = x_forwarded_for_ip
2243
e5660ee6
JMF
2244 return res
2245
c487cf00 2246 def _calc_cookies(self, url):
2247 pr = sanitized_Request(url)
e5660ee6 2248 self.cookiejar.add_cookie_header(pr)
662435f7 2249 return pr.get_header('Cookie')
e5660ee6 2250
9f1a1c36 2251 def _sort_thumbnails(self, thumbnails):
2252 thumbnails.sort(key=lambda t: (
2253 t.get('preference') if t.get('preference') is not None else -1,
2254 t.get('width') if t.get('width') is not None else -1,
2255 t.get('height') if t.get('height') is not None else -1,
2256 t.get('id') if t.get('id') is not None else '',
2257 t.get('url')))
2258
b0249bca 2259 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2260 thumbnails = info_dict.get('thumbnails')
2261 if thumbnails is None:
2262 thumbnail = info_dict.get('thumbnail')
2263 if thumbnail:
2264 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2265 if not thumbnails:
2266 return
2267
2268 def check_thumbnails(thumbnails):
2269 for t in thumbnails:
2270 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2271 try:
2272 self.urlopen(HEADRequest(t['url']))
2273 except network_exceptions as err:
2274 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2275 continue
2276 yield t
2277
2278 self._sort_thumbnails(thumbnails)
2279 for i, t in enumerate(thumbnails):
2280 if t.get('id') is None:
2281 t['id'] = '%d' % i
2282 if t.get('width') and t.get('height'):
2283 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2284 t['url'] = sanitize_url(t['url'])
2285
2286 if self.params.get('check_formats') is True:
282f5709 2287 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2288 else:
2289 info_dict['thumbnails'] = thumbnails
bc516a3f 2290
03f83004
LNO
2291 def _fill_common_fields(self, info_dict, is_video=True):
2292 # TODO: move sanitization here
2293 if is_video:
2294 # playlists are allowed to lack "title"
d4736fdb 2295 title = info_dict.get('title', NO_DEFAULT)
2296 if title is NO_DEFAULT:
03f83004
LNO
2297 raise ExtractorError('Missing "title" field in extractor result',
2298 video_id=info_dict['id'], ie=info_dict['extractor'])
d4736fdb 2299 info_dict['fulltitle'] = title
2300 if not title:
2301 if title == '':
2302 self.write_debug('Extractor gave empty title. Creating a generic title')
2303 else:
2304 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2305 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2306
2307 if info_dict.get('duration') is not None:
2308 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2309
2310 for ts_key, date_key in (
2311 ('timestamp', 'upload_date'),
2312 ('release_timestamp', 'release_date'),
2313 ('modified_timestamp', 'modified_date'),
2314 ):
2315 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2316 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2317 # see http://bugs.python.org/issue1646728)
19a03940 2318 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2319 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2320 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2321
2322 live_keys = ('is_live', 'was_live')
2323 live_status = info_dict.get('live_status')
2324 if live_status is None:
2325 for key in live_keys:
2326 if info_dict.get(key) is False:
2327 continue
2328 if info_dict.get(key):
2329 live_status = key
2330 break
2331 if all(info_dict.get(key) is False for key in live_keys):
2332 live_status = 'not_live'
2333 if live_status:
2334 info_dict['live_status'] = live_status
2335 for key in live_keys:
2336 if info_dict.get(key) is None:
2337 info_dict[key] = (live_status == key)
2338
2339 # Auto generate title fields corresponding to the *_number fields when missing
2340 # in order to always have clean titles. This is very common for TV series.
2341 for field in ('chapter', 'season', 'episode'):
2342 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2343 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2344
415f8d51 2345 def _raise_pending_errors(self, info):
2346 err = info.pop('__pending_error', None)
2347 if err:
2348 self.report_error(err, tb=False)
2349
dd82ffea
JMF
2350 def process_video_result(self, info_dict, download=True):
2351 assert info_dict.get('_type', 'video') == 'video'
9c906919 2352 self._num_videos += 1
dd82ffea 2353
bec1fad2 2354 if 'id' not in info_dict:
fc08bdd6 2355 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2356 elif not info_dict.get('id'):
2357 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2358
c9969434
S
2359 def report_force_conversion(field, field_not, conversion):
2360 self.report_warning(
2361 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2362 % (field, field_not, conversion))
2363
2364 def sanitize_string_field(info, string_field):
2365 field = info.get(string_field)
2366 if field is None or isinstance(field, compat_str):
2367 return
2368 report_force_conversion(string_field, 'a string', 'string')
2369 info[string_field] = compat_str(field)
2370
2371 def sanitize_numeric_fields(info):
2372 for numeric_field in self._NUMERIC_FIELDS:
2373 field = info.get(numeric_field)
f9934b96 2374 if field is None or isinstance(field, (int, float)):
c9969434
S
2375 continue
2376 report_force_conversion(numeric_field, 'numeric', 'int')
2377 info[numeric_field] = int_or_none(field)
2378
2379 sanitize_string_field(info_dict, 'id')
2380 sanitize_numeric_fields(info_dict)
3975b4d2 2381 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2382 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2383 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2384 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2385
dd82ffea
JMF
2386 if 'playlist' not in info_dict:
2387 # It isn't part of a playlist
2388 info_dict['playlist'] = None
2389 info_dict['playlist_index'] = None
2390
bc516a3f 2391 self._sanitize_thumbnails(info_dict)
d5519808 2392
536a55da 2393 thumbnail = info_dict.get('thumbnail')
bc516a3f 2394 thumbnails = info_dict.get('thumbnails')
536a55da
S
2395 if thumbnail:
2396 info_dict['thumbnail'] = sanitize_url(thumbnail)
2397 elif thumbnails:
d5519808
PH
2398 info_dict['thumbnail'] = thumbnails[-1]['url']
2399
ae30b840 2400 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2401 info_dict['display_id'] = info_dict['id']
2402
03f83004 2403 self._fill_common_fields(info_dict)
33d2fc2f 2404
05108a49
S
2405 for cc_kind in ('subtitles', 'automatic_captions'):
2406 cc = info_dict.get(cc_kind)
2407 if cc:
2408 for _, subtitle in cc.items():
2409 for subtitle_format in subtitle:
2410 if subtitle_format.get('url'):
2411 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2412 if subtitle_format.get('ext') is None:
2413 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2414
2415 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2416 subtitles = info_dict.get('subtitles')
4bba3716 2417
360e1ca5 2418 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2419 info_dict['id'], subtitles, automatic_captions)
a504ced0 2420
dd82ffea
JMF
2421 if info_dict.get('formats') is None:
2422 # There's only one format available
2423 formats = [info_dict]
2424 else:
2425 formats = info_dict['formats']
2426
0a5a191a 2427 # or None ensures --clean-infojson removes it
2428 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2429 if not self.params.get('allow_unplayable_formats'):
2430 formats = [f for f in formats if not f.get('has_drm')]
0a5a191a 2431 if info_dict['_has_drm'] and all(
c0b6e5c7 2432 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2433 self.report_warning(
2434 'This video is DRM protected and only images are available for download. '
2435 'Use --list-formats to see them')
88acdbc2 2436
319b6059 2437 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2438 if not get_from_start:
2439 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2440 if info_dict.get('is_live') and formats:
adbc4ec4 2441 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2442 if get_from_start and not formats:
a44ca5a4 2443 self.raise_no_formats(info_dict, msg=(
2444 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2445 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2446
db95dc13 2447 if not formats:
1151c407 2448 self.raise_no_formats(info_dict)
db95dc13 2449
73af5cc8
S
2450 def is_wellformed(f):
2451 url = f.get('url')
a5ac0c47 2452 if not url:
73af5cc8
S
2453 self.report_warning(
2454 '"url" field is missing or empty - skipping format, '
2455 'there is an error in extractor')
a5ac0c47
S
2456 return False
2457 if isinstance(url, bytes):
2458 sanitize_string_field(f, 'url')
2459 return True
73af5cc8
S
2460
2461 # Filter out malformed formats for better extraction robustness
2462 formats = list(filter(is_wellformed, formats))
2463
181c7053
S
2464 formats_dict = {}
2465
dd82ffea 2466 # We check that all the formats have the format and format_id fields
db95dc13 2467 for i, format in enumerate(formats):
c9969434
S
2468 sanitize_string_field(format, 'format_id')
2469 sanitize_numeric_fields(format)
dcf77cf1 2470 format['url'] = sanitize_url(format['url'])
e74e3b63 2471 if not format.get('format_id'):
8016c922 2472 format['format_id'] = compat_str(i)
e2effb08
S
2473 else:
2474 # Sanitize format_id from characters used in format selector expression
ec85ded8 2475 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
2476 format_id = format['format_id']
2477 if format_id not in formats_dict:
2478 formats_dict[format_id] = []
2479 formats_dict[format_id].append(format)
2480
2481 # Make sure all formats have unique format_id
03b4de72 2482 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2483 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2484 ambigious_id = len(ambiguous_formats) > 1
2485 for i, format in enumerate(ambiguous_formats):
2486 if ambigious_id:
181c7053 2487 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2488 if format.get('ext') is None:
2489 format['ext'] = determine_ext(format['url']).lower()
2490 # Ensure there is no conflict between id and ext in format selection
2491 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2492 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2493 format['format_id'] = 'f%s' % format['format_id']
181c7053
S
2494
2495 for i, format in enumerate(formats):
8c51aa65 2496 if format.get('format') is None:
6febd1c1 2497 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
2498 id=format['format_id'],
2499 res=self.format_resolution(format),
b868936c 2500 note=format_field(format, 'format_note', ' (%s)'),
8c51aa65 2501 )
6f0be937 2502 if format.get('protocol') is None:
b5559424 2503 format['protocol'] = determine_protocol(format)
239df021 2504 if format.get('resolution') is None:
2505 format['resolution'] = self.format_resolution(format, default=None)
176f1866 2506 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2507 format['dynamic_range'] = 'SDR'
f2fe69c7 2508 if (info_dict.get('duration') and format.get('tbr')
2509 and not format.get('filesize') and not format.get('filesize_approx')):
56ba69e4 2510 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
f2fe69c7 2511
e5660ee6
JMF
2512 # Add HTTP headers, so that external programs can use them from the
2513 # json output
2514 full_format_info = info_dict.copy()
2515 full_format_info.update(format)
2516 format['http_headers'] = self._calc_headers(full_format_info)
0016b84e
S
2517 # Remove private housekeeping stuff
2518 if '__x_forwarded_for_ip' in info_dict:
2519 del info_dict['__x_forwarded_for_ip']
dd82ffea 2520
9f1a1c36 2521 if self.params.get('check_formats') is True:
282f5709 2522 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2523
88acdbc2 2524 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2525 # only set the 'formats' fields if the original info_dict list them
2526 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2527 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2528 # which can't be exported to json
b3d9ef88 2529 info_dict['formats'] = formats
4ec82a72 2530
2531 info_dict, _ = self.pre_process(info_dict)
2532
6db9c4d5 2533 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2534 return info_dict
2535
2536 self.post_extract(info_dict)
2537 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2538
093a1710 2539 # The pre-processors may have modified the formats
2540 formats = info_dict.get('formats', [info_dict])
2541
fa9f30b8 2542 list_only = self.params.get('simulate') is None and (
2543 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2544 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2545 if self.params.get('list_thumbnails'):
2546 self.list_thumbnails(info_dict)
b7b04c78 2547 if self.params.get('listsubtitles'):
2548 if 'automatic_captions' in info_dict:
2549 self.list_subtitles(
2550 info_dict['id'], automatic_captions, 'automatic captions')
2551 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2552 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2553 self.list_formats(info_dict)
169dbde9 2554 if list_only:
b7b04c78 2555 # Without this printing, -F --print-json will not work
169dbde9 2556 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
c487cf00 2557 return info_dict
bfaae0a7 2558
187986a8 2559 format_selector = self.format_selector
2560 if format_selector is None:
0017d9ad 2561 req_format = self._default_format_spec(info_dict, download=download)
0760b0a7 2562 self.write_debug('Default format spec: %s' % req_format)
187986a8 2563 format_selector = self.build_format_selector(req_format)
317f7ab6 2564
fa9f30b8 2565 while True:
2566 if interactive_format_selection:
2567 req_format = input(
2568 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2569 try:
2570 format_selector = self.build_format_selector(req_format)
2571 except SyntaxError as err:
2572 self.report_error(err, tb=False, is_error=False)
2573 continue
2574
85e801a9 2575 formats_to_download = list(format_selector({
fa9f30b8 2576 'formats': formats,
85e801a9 2577 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2578 'incomplete_formats': (
2579 # All formats are video-only or
2580 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2581 # all formats are audio-only
2582 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2583 }))
fa9f30b8 2584 if interactive_format_selection and not formats_to_download:
2585 self.report_error('Requested format is not available', tb=False, is_error=False)
2586 continue
2587 break
317f7ab6 2588
dd82ffea 2589 if not formats_to_download:
b7da73eb 2590 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2591 raise ExtractorError(
2592 'Requested format is not available. Use --list-formats for a list of available formats',
2593 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2594 self.report_warning('Requested format is not available')
2595 # Process what we can, even without any available formats.
2596 formats_to_download = [{}]
a13e6848 2597
5ec1b6b7 2598 requested_ranges = self.params.get('download_ranges')
2599 if requested_ranges:
2600 requested_ranges = tuple(requested_ranges(info_dict, self))
2601
2602 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2603 if download:
2604 if best_format:
5ec1b6b7 2605 def to_screen(*msg):
2606 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2607
2608 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2609 (f['format_id'] for f in formats_to_download))
2610 if requested_ranges:
2611 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2612 (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
a13e6848 2613 max_downloads_reached = False
5ec1b6b7 2614
2615 for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2616 new_info = self._copy_infodict(info_dict)
b7da73eb 2617 new_info.update(fmt)
3975b4d2 2618 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2619 if chapter or offset:
5ec1b6b7 2620 new_info.update({
3975b4d2 2621 'section_start': offset + chapter.get('start_time', 0),
2622 'section_end': offset + min(chapter.get('end_time', 0), duration),
5ec1b6b7 2623 'section_title': chapter.get('title'),
2624 'section_number': chapter.get('index'),
2625 })
2626 downloaded_formats.append(new_info)
a13e6848 2627 try:
2628 self.process_info(new_info)
2629 except MaxDownloadsReached:
2630 max_downloads_reached = True
415f8d51 2631 self._raise_pending_errors(new_info)
f46e2f9d 2632 # Remove copied info
2633 for key, val in tuple(new_info.items()):
2634 if info_dict.get(key) == val:
2635 new_info.pop(key)
a13e6848 2636 if max_downloads_reached:
2637 break
ebed8b37 2638
5ec1b6b7 2639 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2640 assert write_archive.issubset({True, False, 'ignore'})
2641 if True in write_archive and False not in write_archive:
2642 self.record_download_archive(info_dict)
be72c624 2643
5ec1b6b7 2644 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2645 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2646 if max_downloads_reached:
2647 raise MaxDownloadsReached()
ebed8b37 2648
49a57e70 2649 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2650 info_dict.update(best_format)
dd82ffea
JMF
2651 return info_dict
2652
98c70d6f 2653 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2654 """Select the requested subtitles and their format"""
d8a58ddc 2655 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2656 if normal_subtitles and self.params.get('writesubtitles'):
2657 available_subs.update(normal_subtitles)
d8a58ddc 2658 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2659 if automatic_captions and self.params.get('writeautomaticsub'):
2660 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2661 if lang not in available_subs:
2662 available_subs[lang] = cap_info
2663
4d171848
JMF
2664 if (not self.params.get('writesubtitles') and not
2665 self.params.get('writeautomaticsub') or not
2666 available_subs):
2667 return None
a504ced0 2668
d8a58ddc 2669 all_sub_langs = tuple(available_subs.keys())
a504ced0 2670 if self.params.get('allsubtitles', False):
c32b0aab 2671 requested_langs = all_sub_langs
2672 elif self.params.get('subtitleslangs', False):
77c4a9ef 2673 # A list is used so that the order of languages will be the same as
2674 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2675 requested_langs = []
2676 for lang_re in self.params.get('subtitleslangs'):
77c4a9ef 2677 discard = lang_re[0] == '-'
c32b0aab 2678 if discard:
77c4a9ef 2679 lang_re = lang_re[1:]
3aa91540 2680 if lang_re == 'all':
2681 if discard:
2682 requested_langs = []
2683 else:
2684 requested_langs.extend(all_sub_langs)
2685 continue
77c4a9ef 2686 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
c32b0aab 2687 if discard:
2688 for lang in current_langs:
77c4a9ef 2689 while lang in requested_langs:
2690 requested_langs.remove(lang)
c32b0aab 2691 else:
77c4a9ef 2692 requested_langs.extend(current_langs)
2693 requested_langs = orderedSet(requested_langs)
d8a58ddc 2694 elif normal_sub_langs:
2695 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
a504ced0 2696 else:
d8a58ddc 2697 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
ad3dc496 2698 if requested_langs:
2699 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
a504ced0
JMF
2700
2701 formats_query = self.params.get('subtitlesformat', 'best')
2702 formats_preference = formats_query.split('/') if formats_query else []
2703 subs = {}
2704 for lang in requested_langs:
2705 formats = available_subs.get(lang)
2706 if formats is None:
86e5f3ed 2707 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2708 continue
a504ced0
JMF
2709 for ext in formats_preference:
2710 if ext == 'best':
2711 f = formats[-1]
2712 break
2713 matches = list(filter(lambda f: f['ext'] == ext, formats))
2714 if matches:
2715 f = matches[-1]
2716 break
2717 else:
2718 f = formats[-1]
2719 self.report_warning(
2720 'No subtitle format found matching "%s" for language %s, '
2721 'using %s' % (formats_query, lang, f['ext']))
2722 subs[lang] = f
2723 return subs
2724
bb66c247 2725 def _forceprint(self, key, info_dict):
2726 if info_dict is None:
2727 return
2728 info_copy = info_dict.copy()
2729 info_copy['formats_table'] = self.render_formats_table(info_dict)
2730 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2731 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2732 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2733
2734 def format_tmpl(tmpl):
2735 mobj = re.match(r'\w+(=?)$', tmpl)
2736 if mobj and mobj.group(1):
2737 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2738 elif mobj:
2739 return f'%({tmpl})s'
2740 return tmpl
8130779d 2741
bb66c247 2742 for tmpl in self.params['forceprint'].get(key, []):
2743 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2744
2745 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2746 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2747 tmpl = format_tmpl(tmpl)
2748 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2749 if self._ensure_dir_exists(filename):
86e5f3ed 2750 with open(filename, 'a', encoding='utf-8') as f:
8d93e69d 2751 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
ca30f449 2752
d06daf23 2753 def __forced_printings(self, info_dict, filename, incomplete):
53c18592 2754 def print_mandatory(field, actual_field=None):
2755 if actual_field is None:
2756 actual_field = field
d06daf23 2757 if (self.params.get('force%s' % field, False)
53c18592 2758 and (not incomplete or info_dict.get(actual_field) is not None)):
2759 self.to_stdout(info_dict[actual_field])
d06daf23
S
2760
2761 def print_optional(field):
2762 if (self.params.get('force%s' % field, False)
2763 and info_dict.get(field) is not None):
2764 self.to_stdout(info_dict[field])
2765
53c18592 2766 info_dict = info_dict.copy()
2767 if filename is not None:
2768 info_dict['filename'] = filename
2769 if info_dict.get('requested_formats') is not None:
2770 # For RTMP URLs, also include the playpath
2771 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
10331a26 2772 elif info_dict.get('url'):
53c18592 2773 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2774
bb66c247 2775 if (self.params.get('forcejson')
2776 or self.params['forceprint'].get('video')
2777 or self.params['print_to_file'].get('video')):
2b8a2973 2778 self.post_extract(info_dict)
bb66c247 2779 self._forceprint('video', info_dict)
53c18592 2780
d06daf23
S
2781 print_mandatory('title')
2782 print_mandatory('id')
53c18592 2783 print_mandatory('url', 'urls')
d06daf23
S
2784 print_optional('thumbnail')
2785 print_optional('description')
53c18592 2786 print_optional('filename')
b868936c 2787 if self.params.get('forceduration') and info_dict.get('duration') is not None:
d06daf23
S
2788 self.to_stdout(formatSeconds(info_dict['duration']))
2789 print_mandatory('format')
53c18592 2790
2b8a2973 2791 if self.params.get('forcejson'):
6e84b215 2792 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2793
e8e73840 2794 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2795 if not info.get('url'):
1151c407 2796 self.raise_no_formats(info, True)
e8e73840 2797
2798 if test:
2799 verbose = self.params.get('verbose')
2800 params = {
2801 'test': True,
a169858f 2802 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2803 'verbose': verbose,
2804 'noprogress': not verbose,
2805 'nopart': True,
2806 'skip_unavailable_fragments': False,
2807 'keep_fragments': False,
2808 'overwrites': True,
2809 '_no_ytdl_file': True,
2810 }
2811 else:
2812 params = self.params
96fccc10 2813 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2814 if not test:
2815 for ph in self._progress_hooks:
2816 fd.add_progress_hook(ph)
42676437
M
2817 urls = '", "'.join(
2818 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2819 for f in info.get('requested_formats', []) or [info])
3a408f9d 2820 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2821
adbc4ec4
THD
2822 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2823 # But it may contain objects that are not deep-copyable
2824 new_info = self._copy_infodict(info)
e8e73840 2825 if new_info.get('http_headers') is None:
2826 new_info['http_headers'] = self._calc_headers(new_info)
2827 return fd.download(name, new_info, subtitle)
2828
e04938ab 2829 def existing_file(self, filepaths, *, default_overwrite=True):
2830 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2831 if existing_files and not self.params.get('overwrites', default_overwrite):
2832 return existing_files[0]
2833
2834 for file in existing_files:
2835 self.report_file_delete(file)
2836 os.remove(file)
2837 return None
2838
8222d8de 2839 def process_info(self, info_dict):
09b49e1f 2840 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
2841
2842 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 2843 original_infodict = info_dict
fd288278 2844
4513a41a 2845 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
2846 info_dict['format'] = info_dict['ext']
2847
09b49e1f 2848 # This is mostly just for backward compatibility of process_info
2849 # As a side-effect, this allows for format-specific filters
c77495e3 2850 if self._match_entry(info_dict) is not None:
9e907ebd 2851 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
2852 return
2853
09b49e1f 2854 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 2855 self.post_extract(info_dict)
0c14d66a 2856 self._num_downloads += 1
8222d8de 2857
dcf64d43 2858 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 2859 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2860 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 2861 files_to_move = {}
8222d8de
JMF
2862
2863 # Forced printings
4513a41a 2864 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 2865
ca6d59d2 2866 def check_max_downloads():
2867 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2868 raise MaxDownloadsReached()
2869
b7b04c78 2870 if self.params.get('simulate'):
9e907ebd 2871 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 2872 check_max_downloads()
8222d8de
JMF
2873 return
2874
de6000d9 2875 if full_filename is None:
8222d8de 2876 return
e92caff5 2877 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 2878 return
e92caff5 2879 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
2880 return
2881
80c03fa9 2882 if self._write_description('video', info_dict,
2883 self.prepare_filename(info_dict, 'description')) is None:
2884 return
2885
2886 sub_files = self._write_subtitles(info_dict, temp_filename)
2887 if sub_files is None:
2888 return
2889 files_to_move.update(dict(sub_files))
2890
2891 thumb_files = self._write_thumbnails(
2892 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2893 if thumb_files is None:
2894 return
2895 files_to_move.update(dict(thumb_files))
8222d8de 2896
80c03fa9 2897 infofn = self.prepare_filename(info_dict, 'infojson')
2898 _infojson_written = self._write_info_json('video', info_dict, infofn)
2899 if _infojson_written:
dac5df5a 2900 info_dict['infojson_filename'] = infofn
e75bb0d6 2901 # For backward compatibility, even though it was a private field
80c03fa9 2902 info_dict['__infojson_filename'] = infofn
2903 elif _infojson_written is None:
2904 return
2905
2906 # Note: Annotations are deprecated
2907 annofn = None
1fb07d10 2908 if self.params.get('writeannotations', False):
de6000d9 2909 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 2910 if annofn:
e92caff5 2911 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 2912 return
0c3d0f51 2913 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 2914 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
2915 elif not info_dict.get('annotations'):
2916 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
2917 else:
2918 try:
6febd1c1 2919 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 2920 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
2921 annofile.write(info_dict['annotations'])
2922 except (KeyError, TypeError):
6febd1c1 2923 self.report_warning('There are no annotations to write.')
86e5f3ed 2924 except OSError:
6febd1c1 2925 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 2926 return
1fb07d10 2927
732044af 2928 # Write internet shortcut files
08438d2c 2929 def _write_link_file(link_type):
60f3e995 2930 url = try_get(info_dict['webpage_url'], iri_to_uri)
2931 if not url:
2932 self.report_warning(
2933 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2934 return True
08438d2c 2935 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
2936 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2937 return False
10e3742e 2938 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 2939 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2940 return True
2941 try:
2942 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 2943 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2944 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 2945 template_vars = {'url': url}
08438d2c 2946 if link_type == 'desktop':
2947 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2948 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 2949 except OSError:
08438d2c 2950 self.report_error(f'Cannot write internet shortcut {linkfn}')
2951 return False
732044af 2952 return True
2953
08438d2c 2954 write_links = {
2955 'url': self.params.get('writeurllink'),
2956 'webloc': self.params.get('writewebloclink'),
2957 'desktop': self.params.get('writedesktoplink'),
2958 }
2959 if self.params.get('writelink'):
2960 link_type = ('webloc' if sys.platform == 'darwin'
2961 else 'desktop' if sys.platform.startswith('linux')
2962 else 'url')
2963 write_links[link_type] = True
2964
2965 if any(should_write and not _write_link_file(link_type)
2966 for link_type, should_write in write_links.items()):
2967 return
732044af 2968
f46e2f9d 2969 def replace_info_dict(new_info):
2970 nonlocal info_dict
2971 if new_info == info_dict:
2972 return
2973 info_dict.clear()
2974 info_dict.update(new_info)
2975
415f8d51 2976 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2977 replace_info_dict(new_info)
56d868db 2978
a13e6848 2979 if self.params.get('skip_download'):
56d868db 2980 info_dict['filepath'] = temp_filename
2981 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2982 info_dict['__files_to_move'] = files_to_move
f46e2f9d 2983 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 2984 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 2985 else:
2986 # Download
b868936c 2987 info_dict.setdefault('__postprocessors', [])
4340deca 2988 try:
0202b52a 2989
e04938ab 2990 def existing_video_file(*filepaths):
6b591b29 2991 ext = info_dict.get('ext')
e04938ab 2992 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2993 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2994 default_overwrite=False)
2995 if file:
2996 info_dict['ext'] = os.path.splitext(file)[1][1:]
2997 return file
0202b52a 2998
7b2c3f47 2999 fd, success = None, True
fccf90e7 3000 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3001 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3002 if fd is not FFmpegFD and (
3003 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3004 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3005 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3006 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3007 return
5ec1b6b7 3008
4340deca 3009 if info_dict.get('requested_formats') is not None:
81cd954a
S
3010
3011 def compatible_formats(formats):
d03cfdce 3012 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3013 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3014 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3015 if len(video_formats) > 2 or len(audio_formats) > 2:
3016 return False
3017
81cd954a 3018 # Check extension
86e5f3ed 3019 exts = {format.get('ext') for format in formats}
d03cfdce 3020 COMPATIBLE_EXTS = (
86e5f3ed 3021 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3022 {'webm'},
d03cfdce 3023 )
3024 for ext_sets in COMPATIBLE_EXTS:
3025 if ext_sets.issuperset(exts):
3026 return True
81cd954a
S
3027 # TODO: Check acodec/vcodec
3028 return False
3029
3030 requested_formats = info_dict['requested_formats']
0202b52a 3031 old_ext = info_dict['ext']
4e3b637d 3032 if self.params.get('merge_output_format') is None:
3033 if not compatible_formats(requested_formats):
3034 info_dict['ext'] = 'mkv'
3035 self.report_warning(
3036 'Requested formats are incompatible for merge and will be merged into mkv')
3037 if (info_dict['ext'] == 'webm'
3038 and info_dict.get('thumbnails')
3039 # check with type instead of pp_key, __name__, or isinstance
3040 # since we dont want any custom PPs to trigger this
c487cf00 3041 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3042 info_dict['ext'] = 'mkv'
3043 self.report_warning(
3044 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3045 new_ext = info_dict['ext']
0202b52a 3046
124bc071 3047 def correct_ext(filename, ext=new_ext):
96fccc10 3048 if filename == '-':
3049 return filename
0202b52a 3050 filename_real_ext = os.path.splitext(filename)[1][1:]
3051 filename_wo_ext = (
3052 os.path.splitext(filename)[0]
124bc071 3053 if filename_real_ext in (old_ext, new_ext)
0202b52a 3054 else filename)
86e5f3ed 3055 return f'{filename_wo_ext}.{ext}'
0202b52a 3056
38c6902b 3057 # Ensure filename always has a correct extension for successful merge
0202b52a 3058 full_filename = correct_ext(full_filename)
3059 temp_filename = correct_ext(temp_filename)
e04938ab 3060 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3061 info_dict['__real_download'] = False
18e674b4 3062
7b2c3f47 3063 merger = FFmpegMergerPP(self)
adbc4ec4 3064 downloaded = []
dbf5416a 3065 if dl_filename is not None:
6c7274ec 3066 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3067 elif fd:
3068 for f in requested_formats if fd != FFmpegFD else []:
3069 f['filepath'] = fname = prepend_extension(
3070 correct_ext(temp_filename, info_dict['ext']),
3071 'f%s' % f['format_id'], info_dict['ext'])
3072 downloaded.append(fname)
dbf5416a 3073 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3074 success, real_download = self.dl(temp_filename, info_dict)
3075 info_dict['__real_download'] = real_download
18e674b4 3076 else:
18e674b4 3077 if self.params.get('allow_unplayable_formats'):
3078 self.report_warning(
3079 'You have requested merging of multiple formats '
3080 'while also allowing unplayable formats to be downloaded. '
3081 'The formats won\'t be merged to prevent data corruption.')
3082 elif not merger.available:
e8969bda 3083 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3084 if not self.params.get('ignoreerrors'):
3085 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3086 return
3087 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3088
96fccc10 3089 if temp_filename == '-':
adbc4ec4 3090 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3091 else 'but the formats are incompatible for simultaneous download' if merger.available
3092 else 'but ffmpeg is not installed')
3093 self.report_warning(
3094 f'You have requested downloading multiple formats to stdout {reason}. '
3095 'The formats will be streamed one after the other')
3096 fname = temp_filename
dbf5416a 3097 for f in requested_formats:
3098 new_info = dict(info_dict)
3099 del new_info['requested_formats']
3100 new_info.update(f)
96fccc10 3101 if temp_filename != '-':
124bc071 3102 fname = prepend_extension(
3103 correct_ext(temp_filename, new_info['ext']),
3104 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3105 if not self._ensure_dir_exists(fname):
3106 return
a21e0ab1 3107 f['filepath'] = fname
96fccc10 3108 downloaded.append(fname)
dbf5416a 3109 partial_success, real_download = self.dl(fname, new_info)
3110 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3111 success = success and partial_success
adbc4ec4
THD
3112
3113 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3114 info_dict['__postprocessors'].append(merger)
3115 info_dict['__files_to_merge'] = downloaded
3116 # Even if there were no downloads, it is being merged only now
3117 info_dict['__real_download'] = True
3118 else:
3119 for file in downloaded:
3120 files_to_move[file] = None
4340deca
P
3121 else:
3122 # Just a single file
e04938ab 3123 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3124 if dl_filename is None or dl_filename == temp_filename:
3125 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3126 # So we should try to resume the download
e8e73840 3127 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3128 info_dict['__real_download'] = real_download
6c7274ec 3129 else:
3130 self.report_file_already_downloaded(dl_filename)
0202b52a 3131
0202b52a 3132 dl_filename = dl_filename or temp_filename
c571435f 3133 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3134
3158150c 3135 except network_exceptions as err:
7960b056 3136 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3137 return
86e5f3ed 3138 except OSError as err:
4340deca
P
3139 raise UnavailableVideoError(err)
3140 except (ContentTooShortError, ) as err:
86e5f3ed 3141 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3142 return
8222d8de 3143
415f8d51 3144 self._raise_pending_errors(info_dict)
de6000d9 3145 if success and full_filename != '-':
f17f8651 3146
fd7cfb64 3147 def fixup():
3148 do_fixup = True
3149 fixup_policy = self.params.get('fixup')
3150 vid = info_dict['id']
3151
3152 if fixup_policy in ('ignore', 'never'):
3153 return
3154 elif fixup_policy == 'warn':
3fe75fdc 3155 do_fixup = 'warn'
f89b3e2d 3156 elif fixup_policy != 'force':
3157 assert fixup_policy in ('detect_or_warn', None)
3158 if not info_dict.get('__real_download'):
3159 do_fixup = False
fd7cfb64 3160
3161 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3162 if not (do_fixup and cndn):
fd7cfb64 3163 return
3fe75fdc 3164 elif do_fixup == 'warn':
fd7cfb64 3165 self.report_warning(f'{vid}: {msg}')
3166 return
3167 pp = cls(self)
3168 if pp.available:
3169 info_dict['__postprocessors'].append(pp)
3170 else:
3171 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3172
3173 stretched_ratio = info_dict.get('stretched_ratio')
3174 ffmpeg_fixup(
3175 stretched_ratio not in (1, None),
3176 f'Non-uniform pixel ratio {stretched_ratio}',
3177 FFmpegFixupStretchedPP)
3178
3179 ffmpeg_fixup(
3180 (info_dict.get('requested_formats') is None
3181 and info_dict.get('container') == 'm4a_dash'
3182 and info_dict.get('ext') == 'm4a'),
3183 'writing DASH m4a. Only some players support this container',
3184 FFmpegFixupM4aPP)
3185
993191c0 3186 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3187 downloader = downloader.FD_NAME if downloader else None
adbc4ec4
THD
3188
3189 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
24146491 3190 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3191 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3192 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3193 FFmpegFixupM3u8PP)
3194 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3195 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3196
24146491 3197 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3198 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3199
3200 fixup()
8222d8de 3201 try:
f46e2f9d 3202 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3203 except PostProcessingError as err:
3204 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3205 return
ab8e5e51
AM
3206 try:
3207 for ph in self._post_hooks:
23c1a667 3208 ph(info_dict['filepath'])
ab8e5e51
AM
3209 except Exception as err:
3210 self.report_error('post hooks: %s' % str(err))
3211 return
9e907ebd 3212 info_dict['__write_download_archive'] = True
2d30509f 3213
c487cf00 3214 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3215 if self.params.get('force_write_download_archive'):
9e907ebd 3216 info_dict['__write_download_archive'] = True
ca6d59d2 3217 check_max_downloads()
8222d8de 3218
aa9369a2 3219 def __download_wrapper(self, func):
3220 @functools.wraps(func)
3221 def wrapper(*args, **kwargs):
3222 try:
3223 res = func(*args, **kwargs)
3224 except UnavailableVideoError as e:
3225 self.report_error(e)
b222c271 3226 except DownloadCancelled as e:
3227 self.to_screen(f'[info] {e}')
3228 if not self.params.get('break_per_url'):
3229 raise
aa9369a2 3230 else:
3231 if self.params.get('dump_single_json', False):
3232 self.post_extract(res)
3233 self.to_stdout(json.dumps(self.sanitize_info(res)))
3234 return wrapper
3235
8222d8de
JMF
3236 def download(self, url_list):
3237 """Download a given list of URLs."""
aa9369a2 3238 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3239 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3240 if (len(url_list) > 1
3241 and outtmpl != '-'
3242 and '%' not in outtmpl
3243 and self.params.get('max_downloads') != 1):
acd69589 3244 raise SameFileError(outtmpl)
8222d8de
JMF
3245
3246 for url in url_list:
aa9369a2 3247 self.__download_wrapper(self.extract_info)(
3248 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3249
3250 return self._download_retcode
3251
1dcc4c0c 3252 def download_with_info_file(self, info_filename):
31bd3925
JMF
3253 with contextlib.closing(fileinput.FileInput(
3254 [info_filename], mode='r',
3255 openhook=fileinput.hook_encoded('utf-8'))) as f:
3256 # FileInput doesn't have a read method, we can't call json.load
8012d892 3257 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
d4943898 3258 try:
aa9369a2 3259 self.__download_wrapper(self.process_ie_result)(info, download=True)
f2ebc5c7 3260 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
bf5f605e 3261 if not isinstance(e, EntryNotInPlaylist):
3262 self.to_stderr('\r')
d4943898
JMF
3263 webpage_url = info.get('webpage_url')
3264 if webpage_url is not None:
aa9369a2 3265 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
d4943898
JMF
3266 return self.download([webpage_url])
3267 else:
3268 raise
3269 return self._download_retcode
1dcc4c0c 3270
cb202fd2 3271 @staticmethod
8012d892 3272 def sanitize_info(info_dict, remove_private_keys=False):
3273 ''' Sanitize the infodict for converting to json '''
3ad56b42 3274 if info_dict is None:
3275 return info_dict
6e84b215 3276 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3277 info_dict.setdefault('_type', 'video')
09b49e1f 3278
8012d892 3279 if remove_private_keys:
0a5a191a 3280 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3281 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
0a5a191a 3282 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
6e84b215 3283 }
ae8f99e6 3284 else:
09b49e1f 3285 reject = lambda k, v: False
adbc4ec4
THD
3286
3287 def filter_fn(obj):
3288 if isinstance(obj, dict):
3289 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3290 elif isinstance(obj, (list, tuple, set, LazyList)):
3291 return list(map(filter_fn, obj))
3292 elif obj is None or isinstance(obj, (str, int, float, bool)):
3293 return obj
3294 else:
3295 return repr(obj)
3296
5226731e 3297 return filter_fn(info_dict)
cb202fd2 3298
8012d892 3299 @staticmethod
3300 def filter_requested_info(info_dict, actually_filter=True):
3301 ''' Alias of sanitize_info for backward compatibility '''
3302 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3303
43d7f5a5 3304 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3305 for filename in set(filter(None, files_to_delete)):
3306 if msg:
3307 self.to_screen(msg % filename)
3308 try:
3309 os.remove(filename)
3310 except OSError:
3311 self.report_warning(f'Unable to delete file {filename}')
3312 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3313 del info['__files_to_move'][filename]
3314
ed5835b4 3315 @staticmethod
3316 def post_extract(info_dict):
3317 def actual_post_extract(info_dict):
3318 if info_dict.get('_type') in ('playlist', 'multi_video'):
3319 for video_dict in info_dict.get('entries', {}):
3320 actual_post_extract(video_dict or {})
3321 return
3322
09b49e1f 3323 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3324 info_dict.update(post_extractor())
ed5835b4 3325
3326 actual_post_extract(info_dict or {})
3327
dcf64d43 3328 def run_pp(self, pp, infodict):
5bfa4862 3329 files_to_delete = []
dcf64d43 3330 if '__files_to_move' not in infodict:
3331 infodict['__files_to_move'] = {}
b1940459 3332 try:
3333 files_to_delete, infodict = pp.run(infodict)
3334 except PostProcessingError as e:
3335 # Must be True and not 'only_download'
3336 if self.params.get('ignoreerrors') is True:
3337 self.report_error(e)
3338 return infodict
3339 raise
3340
5bfa4862 3341 if not files_to_delete:
dcf64d43 3342 return infodict
5bfa4862 3343 if self.params.get('keepvideo', False):
3344 for f in files_to_delete:
dcf64d43 3345 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3346 else:
43d7f5a5 3347 self._delete_downloaded_files(
3348 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3349 return infodict
5bfa4862 3350
ed5835b4 3351 def run_all_pps(self, key, info, *, additional_pps=None):
bb66c247 3352 self._forceprint(key, info)
ed5835b4 3353 for pp in (additional_pps or []) + self._pps[key]:
dc5f409c 3354 info = self.run_pp(pp, info)
ed5835b4 3355 return info
277d6ff5 3356
56d868db 3357 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3358 info = dict(ie_info)
56d868db 3359 info['__files_to_move'] = files_to_move or {}
415f8d51 3360 try:
3361 info = self.run_all_pps(key, info)
3362 except PostProcessingError as err:
3363 msg = f'Preprocessing: {err}'
3364 info.setdefault('__pending_error', msg)
3365 self.report_error(msg, is_error=False)
56d868db 3366 return info, info.pop('__files_to_move', None)
5bfa4862 3367
f46e2f9d 3368 def post_process(self, filename, info, files_to_move=None):
8222d8de 3369 """Run all the postprocessors on the given file."""
8222d8de 3370 info['filepath'] = filename
dcf64d43 3371 info['__files_to_move'] = files_to_move or {}
ed5835b4 3372 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3373 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3374 del info['__files_to_move']
ed5835b4 3375 return self.run_all_pps('after_move', info)
c1c9a79c 3376
5db07df6 3377 def _make_archive_id(self, info_dict):
e9fef7ee
S
3378 video_id = info_dict.get('id')
3379 if not video_id:
3380 return
5db07df6
PH
3381 # Future-proof against any change in case
3382 # and backwards compatibility with prior versions
e9fef7ee 3383 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3384 if extractor is None:
1211bb6d
S
3385 url = str_or_none(info_dict.get('url'))
3386 if not url:
3387 return
e9fef7ee 3388 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3389 for ie_key, ie in self._ies.items():
1211bb6d 3390 if ie.suitable(url):
8b7491c8 3391 extractor = ie_key
e9fef7ee
S
3392 break
3393 else:
3394 return
86e5f3ed 3395 return f'{extractor.lower()} {video_id}'
5db07df6
PH
3396
3397 def in_download_archive(self, info_dict):
3398 fn = self.params.get('download_archive')
3399 if fn is None:
3400 return False
3401
3402 vid_id = self._make_archive_id(info_dict)
e9fef7ee 3403 if not vid_id:
7012b23c 3404 return False # Incomplete video information
5db07df6 3405
a45e8619 3406 return vid_id in self.archive
c1c9a79c
PH
3407
3408 def record_download_archive(self, info_dict):
3409 fn = self.params.get('download_archive')
3410 if fn is None:
3411 return
5db07df6
PH
3412 vid_id = self._make_archive_id(info_dict)
3413 assert vid_id
a13e6848 3414 self.write_debug(f'Adding to archive: {vid_id}')
c1c9a79c 3415 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 3416 archive_file.write(vid_id + '\n')
a45e8619 3417 self.archive.add(vid_id)
dd82ffea 3418
8c51aa65 3419 @staticmethod
8abeeb94 3420 def format_resolution(format, default='unknown'):
9359f3d4 3421 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3422 return 'audio only'
f49d89ee
PH
3423 if format.get('resolution') is not None:
3424 return format['resolution']
35615307 3425 if format.get('width') and format.get('height'):
ff51ed58 3426 return '%dx%d' % (format['width'], format['height'])
35615307 3427 elif format.get('height'):
ff51ed58 3428 return '%sp' % format['height']
35615307 3429 elif format.get('width'):
ff51ed58 3430 return '%dx?' % format['width']
3431 return default
8c51aa65 3432
8130779d 3433 def _list_format_headers(self, *headers):
3434 if self.params.get('listformats_table', True) is not False:
591bb9d3 3435 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3436 return headers
3437
c57f7757
PH
3438 def _format_note(self, fdict):
3439 res = ''
3440 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3441 res += '(unsupported)'
32f90364
PH
3442 if fdict.get('language'):
3443 if res:
3444 res += ' '
f304da8a 3445 res += '[%s]' % fdict['language']
c57f7757 3446 if fdict.get('format_note') is not None:
f304da8a 3447 if res:
3448 res += ' '
3449 res += fdict['format_note']
c57f7757 3450 if fdict.get('tbr') is not None:
f304da8a 3451 if res:
3452 res += ', '
3453 res += '%4dk' % fdict['tbr']
c57f7757
PH
3454 if fdict.get('container') is not None:
3455 if res:
3456 res += ', '
3457 res += '%s container' % fdict['container']
3089bc74
S
3458 if (fdict.get('vcodec') is not None
3459 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3460 if res:
3461 res += ', '
3462 res += fdict['vcodec']
91c7271a 3463 if fdict.get('vbr') is not None:
c57f7757
PH
3464 res += '@'
3465 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3466 res += 'video@'
3467 if fdict.get('vbr') is not None:
3468 res += '%4dk' % fdict['vbr']
fbb21cf5 3469 if fdict.get('fps') is not None:
5d583bdf
S
3470 if res:
3471 res += ', '
3472 res += '%sfps' % fdict['fps']
c57f7757
PH
3473 if fdict.get('acodec') is not None:
3474 if res:
3475 res += ', '
3476 if fdict['acodec'] == 'none':
3477 res += 'video only'
3478 else:
3479 res += '%-5s' % fdict['acodec']
3480 elif fdict.get('abr') is not None:
3481 if res:
3482 res += ', '
3483 res += 'audio'
3484 if fdict.get('abr') is not None:
3485 res += '@%3dk' % fdict['abr']
3486 if fdict.get('asr') is not None:
3487 res += ' (%5dHz)' % fdict['asr']
3488 if fdict.get('filesize') is not None:
3489 if res:
3490 res += ', '
3491 res += format_bytes(fdict['filesize'])
9732d77e
PH
3492 elif fdict.get('filesize_approx') is not None:
3493 if res:
3494 res += ', '
3495 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3496 return res
91c7271a 3497
8130779d 3498 def render_formats_table(self, info_dict):
b69fd25c 3499 if not info_dict.get('formats') and not info_dict.get('url'):
8130779d 3500 return None
b69fd25c 3501
94badb25 3502 formats = info_dict.get('formats', [info_dict])
8130779d 3503 if not self.params.get('listformats_table', True) is not False:
76d321f6 3504 table = [
3505 [
3506 format_field(f, 'format_id'),
3507 format_field(f, 'ext'),
3508 self.format_resolution(f),
8130779d 3509 self._format_note(f)
3510 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3511 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3512
591bb9d3 3513 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3514 table = [
3515 [
591bb9d3 3516 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3517 format_field(f, 'ext'),
3518 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3519 format_field(f, 'fps', '\t%d'),
3520 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3521 delim,
3522 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3523 format_field(f, 'tbr', '\t%dk'),
3524 shorten_protocol_name(f.get('protocol', '')),
3525 delim,
3526 format_field(f, 'vcodec', default='unknown').replace(
3527 'none', 'images' if f.get('acodec') == 'none'
591bb9d3 3528 else self._format_out('audio only', self.Styles.SUPPRESS)),
8130779d 3529 format_field(f, 'vbr', '\t%dk'),
3530 format_field(f, 'acodec', default='unknown').replace(
3531 'none', '' if f.get('vcodec') == 'none'
591bb9d3 3532 else self._format_out('video only', self.Styles.SUPPRESS)),
8130779d 3533 format_field(f, 'abr', '\t%dk'),
3534 format_field(f, 'asr', '\t%dHz'),
3535 join_nonempty(
591bb9d3 3536 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
8130779d 3537 format_field(f, 'language', '[%s]'),
3538 join_nonempty(format_field(f, 'format_note'),
3539 format_field(f, 'container', ignore=(None, f.get('ext'))),
3540 delim=', '),
3541 delim=' '),
3542 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3543 header_line = self._list_format_headers(
3544 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3545 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3546
3547 return render_table(
3548 header_line, table, hide_empty=True,
591bb9d3 3549 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3550
3551 def render_thumbnails_table(self, info_dict):
88f23a18 3552 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3553 if not thumbnails:
8130779d 3554 return None
3555 return render_table(
ec11a9f4 3556 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
6970b600 3557 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
2412044c 3558
8130779d 3559 def render_subtitles_table(self, video_id, subtitles):
2412044c 3560 def _row(lang, formats):
49c258e1 3561 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3562 if len(set(names)) == 1:
7aee40c1 3563 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3564 return [lang, ', '.join(names), ', '.join(exts)]
3565
8130779d 3566 if not subtitles:
3567 return None
3568 return render_table(
ec11a9f4 3569 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3570 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3571 hide_empty=True)
3572
3573 def __list_table(self, video_id, name, func, *args):
3574 table = func(*args)
3575 if not table:
3576 self.to_screen(f'{video_id} has no {name}')
3577 return
3578 self.to_screen(f'[info] Available {name} for {video_id}:')
3579 self.to_stdout(table)
3580
3581 def list_formats(self, info_dict):
3582 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3583
3584 def list_thumbnails(self, info_dict):
3585 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3586
3587 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3588 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3589
dca08720
PH
3590 def urlopen(self, req):
3591 """ Start an HTTP download """
f9934b96 3592 if isinstance(req, str):
67dda517 3593 req = sanitized_Request(req)
19a41fc6 3594 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3595
3596 def print_debug_header(self):
3597 if not self.params.get('verbose'):
3598 return
49a57e70 3599
560738f3 3600 # These imports can be slow. So import them only as needed
3601 from .extractor.extractors import _LAZY_LOADER
3602 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3603
49a57e70 3604 def get_encoding(stream):
2a938746 3605 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
49a57e70 3606 if not supports_terminal_sequences(stream):
53973b4d 3607 from .utils import WINDOWS_VT_MODE # Must be imported locally
e3c7d495 3608 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
49a57e70 3609 return ret
3610
591bb9d3 3611 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3612 locale.getpreferredencoding(),
3613 sys.getfilesystemencoding(),
591bb9d3 3614 self.get_encoding(),
3615 ', '.join(
64fa820c 3616 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3617 if stream is not None and key != 'console')
3618 )
883d4b1e 3619
3620 logger = self.params.get('logger')
3621 if logger:
3622 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3623 write_debug(encoding_str)
3624 else:
96565c7e 3625 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3626 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3627
4c88ff87 3628 source = detect_variant()
36eaf303 3629 write_debug(join_nonempty(
3630 'yt-dlp version', __version__,
3631 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3632 '' if source == 'unknown' else f'({source})',
3633 delim=' '))
6e21fdd2 3634 if not _LAZY_LOADER:
3635 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3636 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3637 else:
49a57e70 3638 write_debug('Lazy loading extractors is disabled')
3ae5e797 3639 if plugin_extractors or plugin_postprocessors:
49a57e70 3640 write_debug('Plugins: %s' % [
3ae5e797 3641 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3642 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
8a82af35 3643 if self.params['compat_opts']:
3644 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3645
3646 if source == 'source':
dca08720 3647 try:
f0c9fb96 3648 stdout, _, _ = Popen.run(
36eaf303 3649 ['git', 'rev-parse', '--short', 'HEAD'],
f0c9fb96 3650 text=True, cwd=os.path.dirname(os.path.abspath(__file__)),
3651 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
3652 if re.fullmatch('[0-9a-f]+', stdout.strip()):
3653 write_debug(f'Git HEAD: {stdout.strip()}')
70a1165b 3654 except Exception:
19a03940 3655 with contextlib.suppress(Exception):
36eaf303 3656 sys.exc_clear()
b300cda4
S
3657
3658 def python_implementation():
3659 impl_name = platform.python_implementation()
3660 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3661 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3662 return impl_name
3663
49a57e70 3664 write_debug('Python version %s (%s %s) - %s' % (
e5813e53 3665 platform.python_version(),
3666 python_implementation(),
3667 platform.architecture()[0],
b300cda4 3668 platform_name()))
d28b5171 3669
8913ef74 3670 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3671 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3672 if ffmpeg_features:
19a03940 3673 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3674
4c83c967 3675 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3676 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3677 exe_str = ', '.join(
2831b468 3678 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3679 ) or 'none'
49a57e70 3680 write_debug('exe versions: %s' % exe_str)
dca08720 3681
1d485a1a 3682 from .compat.compat_utils import get_package_info
9b8ee23b 3683 from .dependencies import available_dependencies
3684
3685 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3686 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3687 })) or 'none'))
2831b468 3688
97ec5bc5 3689 self._setup_opener()
dca08720
PH
3690 proxy_map = {}
3691 for handler in self._opener.handlers:
3692 if hasattr(handler, 'proxies'):
3693 proxy_map.update(handler.proxies)
49a57e70 3694 write_debug(f'Proxy map: {proxy_map}')
dca08720 3695
49a57e70 3696 # Not implemented
3697 if False and self.params.get('call_home'):
0f06bcd7 3698 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3699 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3700 latest_version = self.urlopen(
0f06bcd7 3701 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3702 if version_tuple(latest_version) > version_tuple(__version__):
3703 self.report_warning(
3704 'You are using an outdated version (newest version: %s)! '
3705 'See https://yt-dl.org/update if you need help updating.' %
3706 latest_version)
3707
e344693b 3708 def _setup_opener(self):
97ec5bc5 3709 if hasattr(self, '_opener'):
3710 return
6ad14cab 3711 timeout_val = self.params.get('socket_timeout')
17bddf3e 3712 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3713
982ee69a 3714 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3715 opts_cookiefile = self.params.get('cookiefile')
3716 opts_proxy = self.params.get('proxy')
3717
982ee69a 3718 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3719
6a3f4c3f 3720 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3721 if opts_proxy is not None:
3722 if opts_proxy == '':
3723 proxies = {}
3724 else:
3725 proxies = {'http': opts_proxy, 'https': opts_proxy}
3726 else:
3727 proxies = compat_urllib_request.getproxies()
067aa17e 3728 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3729 if 'http' in proxies and 'https' not in proxies:
3730 proxies['https'] = proxies['http']
91410c9b 3731 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3732
3733 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3734 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3735 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3736 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3737 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3738
3739 # When passing our own FileHandler instance, build_opener won't add the
3740 # default FileHandler and allows us to disable the file protocol, which
3741 # can be used for malicious purposes (see
067aa17e 3742 # https://github.com/ytdl-org/youtube-dl/issues/8227)
6240b0a2
JMF
3743 file_handler = compat_urllib_request.FileHandler()
3744
3745 def file_open(*args, **kwargs):
7a5c1cfe 3746 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
6240b0a2
JMF
3747 file_handler.file_open = file_open
3748
3749 opener = compat_urllib_request.build_opener(
fca6dba8 3750 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3751
dca08720
PH
3752 # Delete the default user-agent header, which would otherwise apply in
3753 # cases where our custom HTTP handler doesn't come into play
067aa17e 3754 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3755 opener.addheaders = []
3756 self._opener = opener
62fec3b2
PH
3757
3758 def encode(self, s):
3759 if isinstance(s, bytes):
3760 return s # Already encoded
3761
3762 try:
3763 return s.encode(self.get_encoding())
3764 except UnicodeEncodeError as err:
3765 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3766 raise
3767
3768 def get_encoding(self):
3769 encoding = self.params.get('encoding')
3770 if encoding is None:
3771 encoding = preferredencoding()
3772 return encoding
ec82d85a 3773
e08a85d8 3774 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3775 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3776 if overwrite is None:
3777 overwrite = self.params.get('overwrites', True)
80c03fa9 3778 if not self.params.get('writeinfojson'):
3779 return False
3780 elif not infofn:
3781 self.write_debug(f'Skipping writing {label} infojson')
3782 return False
3783 elif not self._ensure_dir_exists(infofn):
3784 return None
e08a85d8 3785 elif not overwrite and os.path.exists(infofn):
80c03fa9 3786 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3787 return 'exists'
3788
3789 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3790 try:
3791 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3792 return True
86e5f3ed 3793 except OSError:
cb96c5be 3794 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3795 return None
80c03fa9 3796
3797 def _write_description(self, label, ie_result, descfn):
3798 ''' Write description and returns True = written, False = skip, None = error '''
3799 if not self.params.get('writedescription'):
3800 return False
3801 elif not descfn:
3802 self.write_debug(f'Skipping writing {label} description')
3803 return False
3804 elif not self._ensure_dir_exists(descfn):
3805 return None
3806 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3807 self.to_screen(f'[info] {label.title()} description is already present')
3808 elif ie_result.get('description') is None:
3809 self.report_warning(f'There\'s no {label} description to write')
3810 return False
3811 else:
3812 try:
3813 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 3814 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 3815 descfile.write(ie_result['description'])
86e5f3ed 3816 except OSError:
80c03fa9 3817 self.report_error(f'Cannot write {label} description file {descfn}')
3818 return None
3819 return True
3820
3821 def _write_subtitles(self, info_dict, filename):
3822 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3823 ret = []
3824 subtitles = info_dict.get('requested_subtitles')
3825 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3826 # subtitles download errors are already managed as troubles in relevant IE
3827 # that way it will silently go on when used with unsupporting IE
3828 return ret
3829
3830 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3831 if not sub_filename_base:
3832 self.to_screen('[info] Skipping writing video subtitles')
3833 return ret
3834 for sub_lang, sub_info in subtitles.items():
3835 sub_format = sub_info['ext']
3836 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3837 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 3838 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3839 if existing_sub:
80c03fa9 3840 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 3841 sub_info['filepath'] = existing_sub
3842 ret.append((existing_sub, sub_filename_final))
80c03fa9 3843 continue
3844
3845 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3846 if sub_info.get('data') is not None:
3847 try:
3848 # Use newline='' to prevent conversion of newline characters
3849 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 3850 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 3851 subfile.write(sub_info['data'])
3852 sub_info['filepath'] = sub_filename
3853 ret.append((sub_filename, sub_filename_final))
3854 continue
86e5f3ed 3855 except OSError:
80c03fa9 3856 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3857 return None
3858
3859 try:
3860 sub_copy = sub_info.copy()
3861 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3862 self.dl(sub_filename, sub_copy, subtitle=True)
3863 sub_info['filepath'] = sub_filename
3864 ret.append((sub_filename, sub_filename_final))
6020e05d 3865 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 3866 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 3867 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 3868 if not self.params.get('ignoreerrors'):
3869 self.report_error(msg)
3870 raise DownloadError(msg)
3871 self.report_warning(msg)
519804a9 3872 return ret
80c03fa9 3873
3874 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3875 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 3876 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 3877 thumbnails, ret = [], []
6c4fd172 3878 if write_all or self.params.get('writethumbnail', False):
0202b52a 3879 thumbnails = info_dict.get('thumbnails') or []
6c4fd172 3880 multiple = write_all and len(thumbnails) > 1
ec82d85a 3881
80c03fa9 3882 if thumb_filename_base is None:
3883 thumb_filename_base = filename
3884 if thumbnails and not thumb_filename_base:
3885 self.write_debug(f'Skipping writing {label} thumbnail')
3886 return ret
3887
dd0228ce 3888 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 3889 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 3890 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 3891 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3892 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 3893
e04938ab 3894 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3895 if existing_thumb:
aa9369a2 3896 self.to_screen('[info] %s is already present' % (
3897 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 3898 t['filepath'] = existing_thumb
3899 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 3900 else:
80c03fa9 3901 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 3902 try:
297e9952 3903 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 3904 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 3905 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 3906 shutil.copyfileobj(uf, thumbf)
80c03fa9 3907 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 3908 t['filepath'] = thumb_filename
3158150c 3909 except network_exceptions as err:
dd0228ce 3910 thumbnails.pop(idx)
80c03fa9 3911 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 3912 if ret and not write_all:
3913 break
0202b52a 3914 return ret