]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[ie/youtube:tab] Fix `tags` extraction (#9413)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
31215122 3import copy
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
31215122 7import http.cookiejar
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
f8271158 14import random
8222d8de
JMF
15import re
16import shutil
6f2287cb 17import string
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
961ea474 25
f8271158 26from .cache import Cache
227bf1a3 27from .compat import functools, urllib # isort: split
28from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
31215122 29from .cookies import LenientSimpleCookie, load_cookies
f8271158 30from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31from .downloader.rtmp import rtmpdump_version
f8271158 32from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 33from .extractor.common import UnsupportedURLIE
f8271158 34from .extractor.openload import PhantomJSwrapper
35from .minicurses import format_text
3d2623a8 36from .networking import HEADRequest, Request, RequestDirector
db7b054a 37from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
227bf1a3 38from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
3d2623a8 43 network_exceptions,
227bf1a3 44)
8e40b9d1 45from .plugins import directories as plugin_directories
e756f45b 46from .postprocessor import _PLUGIN_CLASSES as plugin_pps
f8271158 47from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
ca9def71 57 FFmpegVideoConvertorPP,
f8271158 58 MoveFilesAfterDownloadPP,
59 get_postprocessor,
60)
ca9def71 61from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
f9fb3ce8
SS
62from .update import (
63 REPOSITORY,
64 _get_system_deprecation,
65 _make_label,
66 current_git_head,
67 detect_variant,
68)
8c25f81b 69from .utils import (
f8271158 70 DEFAULT_OUTTMPL,
7b2c3f47 71 IDENTITY,
f8271158 72 LINK_TEMPLATES,
8dc59305 73 MEDIA_EXTENSIONS,
f8271158 74 NO_DEFAULT,
1d485a1a 75 NUMBER_RE,
f8271158 76 OUTTMPL_TYPES,
77 POSTPROCESS_WHEN,
78 STR_FORMAT_RE_TMPL,
79 STR_FORMAT_TYPES,
80 ContentTooShortError,
81 DateRange,
82 DownloadCancelled,
83 DownloadError,
84 EntryNotInPlaylist,
85 ExistingVideoReached,
86 ExtractorError,
784320c9 87 FormatSorter,
f8271158 88 GeoRestrictedError,
f8271158 89 ISO3166Utils,
90 LazyList,
91 MaxDownloadsReached,
19a03940 92 Namespace,
f8271158 93 PagedList,
7e88d7d7 94 PlaylistEntries,
f8271158 95 Popen,
96 PostProcessingError,
97 ReExtractInfo,
98 RejectedVideoReached,
99 SameFileError,
100 UnavailableVideoError,
693f0600 101 UserNotLive,
eedb7ba5
S
102 age_restricted,
103 args_to_str,
cb794ee0 104 bug_reports_message,
ce02ed60 105 date_from_str,
da4db748 106 deprecation_warning,
ce02ed60 107 determine_ext,
b5559424 108 determine_protocol,
c0384f22 109 encode_compat_str,
ce02ed60 110 encodeFilename,
a06916d9 111 error_to_compat_str,
47cdc68e 112 escapeHTML,
590bc6f6 113 expand_path,
227bf1a3 114 extract_basic_auth,
90137ca4 115 filter_dict,
e29663c6 116 float_or_none,
02dbf93f 117 format_bytes,
e0fd9573 118 format_decimal_suffix,
f8271158 119 format_field,
525ef922 120 formatSeconds,
fc61aff4 121 get_compatible_ext,
0bb322b9 122 get_domain,
c9969434 123 int_or_none,
732044af 124 iri_to_uri,
941e881e 125 is_path_like,
34921b43 126 join_nonempty,
ce02ed60 127 locked_file,
0647d925 128 make_archive_id,
0202b52a 129 make_dir,
ec11a9f4 130 number_of_digits,
cd6fc19e 131 orderedSet,
5314b521 132 orderedSet_from_options,
083c9df9 133 parse_filesize,
ce02ed60 134 preferredencoding,
eedb7ba5 135 prepend_extension,
3efb96a6 136 remove_terminal_sequences,
cfb56d1a 137 render_table,
eedb7ba5 138 replace_extension,
ce02ed60 139 sanitize_filename,
1bb5c511 140 sanitize_path,
dcf77cf1 141 sanitize_url,
1211bb6d 142 str_or_none,
e29663c6 143 strftime_or_none,
ce02ed60 144 subtitles_filename,
819e0531 145 supports_terminal_sequences,
b1f94422 146 system_identifier,
f2ebc5c7 147 timetuple_from_msec,
732044af 148 to_high_limit_path,
324ad820 149 traverse_obj,
fc61aff4 150 try_call,
6033d980 151 try_get,
29eb5174 152 url_basename,
7d1eb38a 153 variadic,
58b1f00d 154 version_tuple,
53973b4d 155 windows_enable_vt_mode,
ce02ed60
PH
156 write_json_file,
157 write_string,
4f026faf 158)
227bf1a3 159from .utils._utils import _YDLLogger
160from .utils.networking import (
161 HTTPHeaderDict,
162 clean_headers,
163 clean_proxies,
3d2623a8 164 std_headers,
227bf1a3 165)
20314dd4 166from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 167
e9c0cdd3
YCH
168if compat_os_name == 'nt':
169 import ctypes
170
2459b6e1 171
86e5f3ed 172class YoutubeDL:
8222d8de
JMF
173 """YoutubeDL class.
174
175 YoutubeDL objects are the ones responsible of downloading the
176 actual video file and writing it to disk if the user has requested
177 it, among some other tasks. In most cases there should be one per
178 program. As, given a video URL, the downloader doesn't know how to
179 extract all the needed information, task that InfoExtractors do, it
180 has to pass the URL to one of them.
181
182 For this, YoutubeDL objects have a method that allows
183 InfoExtractors to be registered in a given order. When it is passed
184 a URL, the YoutubeDL object handles it to the first InfoExtractor it
185 finds that reports being able to handle it. The InfoExtractor extracts
186 all the information about the video or videos the URL refers to, and
187 YoutubeDL process the extracted information, possibly using a File
188 Downloader to download the video.
189
190 YoutubeDL objects accept a lot of parameters. In order not to saturate
191 the object constructor with arguments, it receives a dictionary of
192 options instead. These options are available through the params
193 attribute for the InfoExtractors to use. The YoutubeDL also
194 registers itself as the downloader in charge for the InfoExtractors
195 that are added to it, so this is a "mutual registration".
196
197 Available options:
198
199 username: Username for authentication purposes.
200 password: Password for authentication purposes.
180940e0 201 videopassword: Password for accessing a video.
1da50aa3
S
202 ap_mso: Adobe Pass multiple-system operator identifier.
203 ap_username: Multiple-system operator account username.
204 ap_password: Multiple-system operator account password.
8222d8de 205 usenetrc: Use netrc for authentication instead.
c8bc203f 206 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
db3ad8a6 207 netrc_cmd: Use a shell command to get credentials
8222d8de
JMF
208 verbose: Print additional info to stdout.
209 quiet: Do not print messages to stdout.
ad8915b7 210 no_warnings: Do not print out anything for warnings.
bb66c247 211 forceprint: A dict with keys WHEN mapped to a list of templates to
212 print to stdout. The allowed keys are video or any of the
213 items in utils.POSTPROCESS_WHEN.
ca30f449 214 For compatibility, a single list is also accepted
bb66c247 215 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
216 a list of tuples with (template, filename)
8694c600 217 forcejson: Force printing info_dict as JSON.
63e0be34
PH
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
c25228e5 220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
b7b04c78 222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 224 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
63ad4d43 228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
0930b11f 232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
c25228e5 234 format_sort_force: Force the given format_sort. see "Sorting Formats"
235 for more details.
08d30158 236 prefer_free_formats: Whether to prefer video formats with free containers
237 over non-free ones of same quality.
c25228e5 238 allow_multiple_video_streams: Allow multiple video streams to be merged
239 into a single file
240 allow_multiple_audio_streams: Allow multiple audio streams to be merged
241 into a single file
0ba692ac 242 check_formats Whether to test if the formats are downloadable.
9f1a1c36 243 Can be True (check all), False (check none),
244 'selected' (check selected formats),
0ba692ac 245 or None (check only if requested by extractor)
4524baf0 246 paths: Dictionary of output paths. The allowed keys are 'home'
5ca095cb 247 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
de6000d9 248 outtmpl: Dictionary of templates for output names. Allowed keys
5ca095cb 249 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
34488702 250 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
251 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
252 restrictfilenames: Do not allow "&" and spaces in file names
253 trim_file_name: Limit length of filename (extension excluded)
4524baf0 254 windowsfilenames: Force the filenames to be windows compatible
b1940459 255 ignoreerrors: Do not stop on download/postprocessing errors.
256 Can be 'only_download' to ignore only download errors.
257 Default is 'only_download' for CLI, but False for API
26e2805c 258 skip_playlist_after_errors: Number of allowed failures until the rest of
259 the playlist is skipped
fe7866d0 260 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 261 overwrites: Overwrite all video and metadata files if True,
262 overwrite only non-video files if None
263 and don't overwrite any file if False
c14e88f0 264 playlist_items: Specific indices of playlist to download.
75822ca7 265 playlistrandom: Download playlist items in random order.
7e9a6125 266 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
267 matchtitle: Download only matching titles.
268 rejecttitle: Reject downloads for matching titles.
8bf9319e 269 logger: Log messages to a logging.Logger instance.
17ffed18 270 logtostderr: Print everything to stderr instead of stdout.
271 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
272 writedescription: Write the video description to a .description file
273 writeinfojson: Write the video description to a .info.json file
ad54c913 274 clean_infojson: Remove internal metadata from the infojson
34488702 275 getcomments: Extract video comments. This will not be written to disk
06167fbb 276 unless writeinfojson is also given
1fb07d10 277 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 278 writethumbnail: Write the thumbnail image to a file
c25228e5 279 allow_playlist_files: Whether to write playlists' description, infojson etc
280 also to disk when using the 'write*' options
ec82d85a 281 write_all_thumbnails: Write all thumbnail formats to files
732044af 282 writelink: Write an internet shortcut file, depending on the
283 current platform (.url/.webloc/.desktop)
284 writeurllink: Write a Windows internet shortcut file (.url)
285 writewebloclink: Write a macOS internet shortcut file (.webloc)
286 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 287 writesubtitles: Write the video subtitles to a file
741dd8ea 288 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 289 listsubtitles: Lists all available subtitles for the video
a504ced0 290 subtitlesformat: The format code for subtitles
c32b0aab 291 subtitleslangs: List of languages of the subtitles to download (can be regex).
292 The list may contain "all" to refer to all the available
293 subtitles. The language can be prefixed with a "-" to
62b58c09 294 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de 295 keepvideo: Keep the video file after post-processing
46f1370e 296 daterange: A utils.DateRange object, download only if the upload_date is in the range.
8222d8de 297 skip_download: Skip the actual download of the video file
c35f9e72 298 cachedir: Location of the cache files in the filesystem.
a0e07d31 299 False to disable filesystem cache.
47192f92 300 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
301 age_limit: An integer representing the user's age in years.
302 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
303 min_views: An integer representing the minimum view count the video
304 must have in order to not be skipped.
305 Videos without view count information are always
306 downloaded. None for no limit.
307 max_views: An integer representing the maximum view count.
308 Videos that are more popular than that are not
309 downloaded.
310 Videos without view count information are always
311 downloaded. None for no limit.
ae103564 312 download_archive: A set, or the name of a file where all downloads are recorded.
313 Videos already present in the file are not downloaded again.
8a51f564 314 break_on_existing: Stop the download process after attempting to download a
315 file that is in the archive.
b222c271 316 break_per_url: Whether break_on_reject and break_on_existing
317 should act on each input URL as opposed to for the entire queue
d76fa1f3 318 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 319 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 320 name/path from where cookies are loaded, the name of the keyring,
321 and the container name, e.g. ('chrome', ) or
322 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 323 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
324 support RFC 5746 secure renegotiation
f59f5ef8 325 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 326 client_certificate: Path to client certificate file in PEM format. May include the private key
327 client_certificate_key: Path to private key file for client certificate
328 client_certificate_password: Password for client certificate private key, if encrypted.
329 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 330 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 331 (Only supported by some extractors)
8300774c 332 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
8b7539d2 333 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 334 proxy: URL of the proxy server to use
38cce791 335 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 336 on geo-restricted sites.
e344693b 337 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
338 bidi_workaround: Work around buggy terminals without bidirectional text
339 support, using fridibi
a0ddb8a2 340 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
341 default_search: Prepend this string if an input url is not valid.
342 'auto' for elaborate guessing
62fec3b2 343 encoding: Use this encoding instead of the system-specified.
134c913c 344 extract_flat: Whether to resolve and process url_results further
46f1370e 345 * False: Always process. Default for API
134c913c 346 * True: Never process
347 * 'in_playlist': Do not process inside playlist/multi_video
348 * 'discard': Always process, but don't return the result
349 from inside playlist/multi_video
350 * 'discard_in_playlist': Same as "discard", but only for
46f1370e 351 playlists (not multi_video). Default for CLI
f2ebc5c7 352 wait_for_video: If given, wait for scheduled streams to become available.
353 The value should be a tuple containing the range
354 (min_secs, max_secs) to wait between retries
4f026faf 355 postprocessors: A list of dictionaries, each with an entry
71b640cc 356 * key: The name of the postprocessor. See
7a5c1cfe 357 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 358 * when: When to run the postprocessor. Allowed values are
359 the entries of utils.POSTPROCESS_WHEN
56d868db 360 Assumed to be 'post_process' if not given
71b640cc
PH
361 progress_hooks: A list of functions that get called on download
362 progress, with a dictionary with the entries
5cda4eda 363 * status: One of "downloading", "error", or "finished".
ee69b99a 364 Check this first and ignore unknown values.
3ba7740d 365 * info_dict: The extracted info_dict
71b640cc 366
5cda4eda 367 If status is one of "downloading", or "finished", the
ee69b99a
PH
368 following properties may also be present:
369 * filename: The final filename (always present)
5cda4eda 370 * tmpfilename: The filename we're currently writing to
71b640cc
PH
371 * downloaded_bytes: Bytes on disk
372 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
373 * total_bytes_estimate: Guess of the eventual file size,
374 None if unavailable.
375 * elapsed: The number of seconds since download started.
71b640cc
PH
376 * eta: The estimated time in seconds, None if unknown
377 * speed: The download speed in bytes/second, None if
378 unknown
5cda4eda
PH
379 * fragment_index: The counter of the currently
380 downloaded video fragment.
381 * fragment_count: The number of fragments (= individual
382 files that will be merged)
71b640cc
PH
383
384 Progress hooks are guaranteed to be called at least once
385 (with status "finished") if the download is successful.
819e0531 386 postprocessor_hooks: A list of functions that get called on postprocessing
387 progress, with a dictionary with the entries
388 * status: One of "started", "processing", or "finished".
389 Check this first and ignore unknown values.
390 * postprocessor: Name of the postprocessor
391 * info_dict: The extracted info_dict
392
393 Progress hooks are guaranteed to be called at least twice
394 (with status "started" and "finished") if the processing is successful.
fc61aff4 395 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 396 final_ext: Expected final extension; used to detect when the file was
59a7a13e 397 already downloaded and converted
6271f1ca
PH
398 fixup: Automatically correct known faults of the file.
399 One of:
400 - "never": do nothing
401 - "warn": only emit a warning
402 - "detect_or_warn": check whether we can do anything
62cd676c 403 about it, warn otherwise (default)
504f20dd 404 source_address: Client-side IP address to bind to.
1cf376f5 405 sleep_interval_requests: Number of seconds to sleep between requests
406 during extraction
7aa589a5
S
407 sleep_interval: Number of seconds to sleep before each download when
408 used alone or a lower bound of a range for randomized
409 sleep before each download (minimum possible number
410 of seconds to sleep) when used along with
411 max_sleep_interval.
412 max_sleep_interval:Upper bound of a range for randomized sleep before each
413 download (maximum possible number of seconds to sleep).
414 Must only be used along with sleep_interval.
415 Actual sleep time will be a random float from range
416 [sleep_interval; max_sleep_interval].
1cf376f5 417 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
418 listformats: Print an overview of available video formats and exit.
419 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 420 match_filter: A function that gets called for every video with the signature
421 (info_dict, *, incomplete: bool) -> Optional[str]
422 For backward compatibility with youtube-dl, the signature
423 (info_dict) -> Optional[str] is also allowed.
424 - If it returns a message, the video is ignored.
425 - If it returns None, the video is downloaded.
426 - If it returns utils.NO_DEFAULT, the user is interactively
427 asked whether to download the video.
fe2ce85a 428 - Raise utils.DownloadCancelled(msg) to abort remaining
429 downloads when a video is rejected.
5ca095cb 430 match_filter_func in utils/_utils.py is one example for this.
8417f26b
SS
431 color: A Dictionary with output stream names as keys
432 and their respective color policy as values.
433 Can also just be a single color policy,
434 in which case it applies to all outputs.
435 Valid stream names are 'stdout' and 'stderr'.
436 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
0a840f58 437 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 438 HTTP header
0a840f58 439 geo_bypass_country:
773f291d
S
440 Two-letter ISO 3166-2 country code that will be used for
441 explicit geographic restriction bypassing via faking
504f20dd 442 X-Forwarded-For HTTP header
5f95927a
S
443 geo_bypass_ip_block:
444 IP range in CIDR notation that will be used similarly to
504f20dd 445 geo_bypass_country
52a8a1e1 446 external_downloader: A dictionary of protocol keys and the executable of the
447 external downloader to use for it. The allowed protocols
448 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
449 Set the value to 'native' to use the native downloader
53ed7066 450 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 451 The following options do not work when used through the API:
b5ae35ee 452 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 453 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 454 Refer __init__.py for their implementation
819e0531 455 progress_template: Dictionary of templates for progress outputs.
456 Allowed keys are 'download', 'postprocess',
457 'download-title' (console title) and 'postprocess-title'.
458 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 459 retry_sleep_functions: Dictionary of functions that takes the number of attempts
460 as argument and returns the time to sleep in seconds.
461 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
462 download_ranges: A callback function that gets called for every video with
463 the signature (info_dict, ydl) -> Iterable[Section].
464 Only the returned sections will be downloaded.
465 Each Section is a dict with the following keys:
5ec1b6b7 466 * start_time: Start time of the section in seconds
467 * end_time: End time of the section in seconds
468 * title: Section title (Optional)
469 * index: Section number (Optional)
0f446365 470 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 471 noprogress: Do not print the progress bar
a831c2ea 472 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 473
8222d8de 474 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 475 the downloader (see yt_dlp/downloader/common.py):
51d9739f 476 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 477 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 478 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 479 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
480
481 The following options are used by the post processors:
c0b7d117
S
482 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
483 to the binary or its containing directory.
43820c03 484 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 485 and a list of additional command-line arguments for the
486 postprocessor/executable. The dict can also have "PP+EXE" keys
487 which are used when the given exe is used by the given PP.
488 Use 'default' as the name for arguments to passed to all PP
489 For compatibility with youtube-dl, a single list of args
490 can also be used
e409895f 491
492 The following options are used by the extractors:
46f1370e 493 extractor_retries: Number of times to retry for known errors (default: 3)
62bff2c1 494 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 495 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 496 discontinuities such as ad breaks (default: False)
5d3a0e79 497 extractor_args: A dictionary of arguments to be passed to the extractors.
498 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 499 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 500 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 501
502 The following options are deprecated and may be removed in the future:
503
fe2ce85a 504 break_on_reject: Stop the download process when encountering a video that
505 has been filtered out.
506 - `raise DownloadCancelled(msg)` in match_filter instead
fe7866d0 507 force_generic_extractor: Force downloader to use the generic extractor
508 - Use allowed_extractors = ['generic', 'default']
7e9a6125 509 playliststart: - Use playlist_items
510 Playlist item to start at.
511 playlistend: - Use playlist_items
512 Playlist item to end at.
513 playlistreverse: - Use playlist_items
514 Download playlist items in reverse order.
1890fc63 515 forceurl: - Use forceprint
516 Force printing final URL.
517 forcetitle: - Use forceprint
518 Force printing title.
519 forceid: - Use forceprint
520 Force printing ID.
521 forcethumbnail: - Use forceprint
522 Force printing thumbnail URL.
523 forcedescription: - Use forceprint
524 Force printing description.
525 forcefilename: - Use forceprint
526 Force printing final filename.
527 forceduration: - Use forceprint
528 Force printing duration.
529 allsubtitles: - Use subtitleslangs = ['all']
530 Downloads all the subtitles of the video
531 (requires writesubtitles or writeautomaticsub)
532 include_ads: - Doesn't work
533 Download ads as well
534 call_home: - Not implemented
535 Boolean, true iff we are allowed to contact the
536 yt-dlp servers for debugging.
537 post_hooks: - Register a custom postprocessor
538 A list of functions that get called as the final step
539 for each video file, after all postprocessors have been
540 called. The filename will be passed as the only argument.
541 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
542 Use the native HLS downloader instead of ffmpeg/avconv
543 if True, otherwise use ffmpeg/avconv if False, otherwise
544 use downloader suggested by extractor if None.
545 prefer_ffmpeg: - avconv support is deprecated
546 If False, use avconv instead of ffmpeg if both are available,
547 otherwise prefer ffmpeg.
548 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 549 If True (default), DASH manifests and related
62bff2c1 550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about DASH. (only for youtube)
1890fc63 553 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 554 If True (default), HLS manifests and related
62bff2c1 555 data will be downloaded and processed by extractor.
556 You can reduce network I/O by disabling it if you don't
557 care about HLS. (only for youtube)
8417f26b 558 no_color: Same as `color='no_color'`
6148833f 559 no_overwrites: Same as `overwrites=False`
8222d8de
JMF
560 """
561
86e5f3ed 562 _NUMERIC_FIELDS = {
b8ed0f15 563 'width', 'height', 'asr', 'audio_channels', 'fps',
564 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 565 'timestamp', 'release_timestamp',
c9969434
S
566 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
567 'average_rating', 'comment_count', 'age_limit',
568 'start_time', 'end_time',
569 'chapter_number', 'season_number', 'episode_number',
570 'track_number', 'disc_number', 'release_year',
86e5f3ed 571 }
c9969434 572
6db9c4d5 573 _format_fields = {
574 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 575 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 576 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 577 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 578 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
62b5c94c 579 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
7e68567e 580 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
6db9c4d5 581 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
582 }
104a7b5a
L
583 _deprecated_multivalue_fields = {
584 'album_artist': 'album_artists',
585 'artist': 'artists',
586 'composer': 'composers',
587 'creator': 'creators',
588 'genre': 'genres',
589 }
48ee10ee 590 _format_selection_exts = {
8dc59305 591 'audio': set(MEDIA_EXTENSIONS.common_audio),
592 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
593 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 594 }
595
3511266b 596 def __init__(self, params=None, auto_init=True):
883d4b1e 597 """Create a FileDownloader object with the given options.
598 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 599 Set to 'no_verbose_header' to not print the header
883d4b1e 600 """
e9f9a10f
JMF
601 if params is None:
602 params = {}
592b7485 603 self.params = params
8b7491c8 604 self._ies = {}
56c73665 605 self._ies_instances = {}
1e43a6f7 606 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 607 self._printed_messages = set()
1cf376f5 608 self._first_webpage_request = True
ab8e5e51 609 self._post_hooks = []
933605d7 610 self._progress_hooks = []
819e0531 611 self._postprocessor_hooks = []
8222d8de
JMF
612 self._download_retcode = 0
613 self._num_downloads = 0
9c906919 614 self._num_videos = 0
592b7485 615 self._playlist_level = 0
616 self._playlist_urls = set()
a0e07d31 617 self.cache = Cache(self)
6148833f 618 self.__header_cookies = []
34308b30 619
591bb9d3 620 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
621 self._out_files = Namespace(
622 out=stdout,
623 error=sys.stderr,
624 screen=sys.stderr if self.params.get('quiet') else stdout,
625 console=None if compat_os_name == 'nt' else next(
cf4f42cb 626 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 627 )
f0795149 628
629 try:
630 windows_enable_vt_mode()
631 except Exception as e:
632 self.write_debug(f'Failed to enable VT mode: {e}')
633
8417f26b
SS
634 if self.params.get('no_color'):
635 if self.params.get('color') is not None:
62b5c94c 636 self.params.setdefault('_warnings', []).append(
637 'Overwriting params from "color" with "no_color"')
8417f26b
SS
638 self.params['color'] = 'no_color'
639
a0b19d31 640 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
641 no_color = bool(os.getenv('NO_COLOR'))
8417f26b
SS
642
643 def process_color_policy(stream):
644 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
645 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
646 if policy in ('auto', None):
a0b19d31 647 if term_allow_color and supports_terminal_sequences(stream):
648 return 'no_color' if no_color else True
649 return False
6148833f 650 assert policy in ('always', 'never', 'no_color'), policy
8417f26b
SS
651 return {'always': True, 'never': False}.get(policy, policy)
652
591bb9d3 653 self._allow_colors = Namespace(**{
8417f26b
SS
654 name: process_color_policy(stream)
655 for name, stream in self._out_files.items_ if name != 'console'
591bb9d3 656 })
819e0531 657
61bdf15f
SS
658 system_deprecation = _get_system_deprecation()
659 if system_deprecation:
660 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
a61f4b28 661
88acdbc2 662 if self.params.get('allow_unplayable_formats'):
663 self.report_warning(
ec11a9f4 664 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 665 'This is a developer option intended for debugging. \n'
666 ' If you experience any issues while using this option, '
ec11a9f4 667 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 668
497074f0 669 if self.params.get('bidi_workaround', False):
670 try:
671 import pty
672 master, slave = pty.openpty()
673 width = shutil.get_terminal_size().columns
674 width_args = [] if width is None else ['-w', str(width)]
675 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
676 try:
677 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
678 except OSError:
679 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
680 self._output_channel = os.fdopen(master, 'rb')
681 except OSError as ose:
682 if ose.errno == errno.ENOENT:
683 self.report_warning(
684 'Could not find fribidi executable, ignoring --bidi-workaround. '
685 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
686 else:
687 raise
688
689 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
227bf1a3 690 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
6c5211ce 691 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
692 self.params['http_headers'].pop('Cookie', None)
693
497074f0 694 if auto_init and auto_init != 'no_verbose_header':
695 self.print_debug_header()
696
be5df5ee
S
697 def check_deprecated(param, option, suggestion):
698 if self.params.get(param) is not None:
86e5f3ed 699 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
700 return True
701 return False
702
703 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
704 if self.params.get('geo_verification_proxy') is None:
705 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
706
0d1bb027 707 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
708 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 709 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 710
49a57e70 711 for msg in self.params.get('_warnings', []):
0d1bb027 712 self.report_warning(msg)
ee8dd27a 713 for msg in self.params.get('_deprecation_warnings', []):
da4db748 714 self.deprecated_feature(msg)
0d1bb027 715
8a82af35 716 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 717 self.params['listformats_table'] = False
718
b5ae35ee 719 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 720 # nooverwrites was unnecessarily changed to overwrites
721 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
722 # This ensures compatibility with both keys
723 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 724 elif self.params.get('overwrites') is None:
725 self.params.pop('overwrites', None)
b868936c 726 else:
727 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 728
e4221b70 729 if self.params.get('simulate') is None and any((
730 self.params.get('list_thumbnails'),
731 self.params.get('listformats'),
732 self.params.get('listsubtitles'),
733 )):
734 self.params['simulate'] = 'list_only'
735
455a15e2 736 self.params.setdefault('forceprint', {})
737 self.params.setdefault('print_to_file', {})
bb66c247 738
739 # Compatibility with older syntax
ca30f449 740 if not isinstance(params['forceprint'], dict):
455a15e2 741 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 742
97ec5bc5 743 if auto_init:
97ec5bc5 744 self.add_default_info_extractors()
745
3089bc74
S
746 if (sys.platform != 'win32'
747 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 748 and not self.params.get('restrictfilenames', False)):
e9137224 749 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 750 self.report_warning(
6febd1c1 751 'Assuming --restrict-filenames since file system encoding '
1b725173 752 'cannot encode all characters. '
6febd1c1 753 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 754 self.params['restrictfilenames'] = True
34308b30 755
bf1824b3 756 self._parse_outtmpl()
486dd09e 757
187986a8 758 # Creating format selector here allows us to catch syntax errors before the extraction
759 self.format_selector = (
fa9f30b8 760 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 761 else self.params['format'] if callable(self.params['format'])
187986a8 762 else self.build_format_selector(self.params['format']))
763
013b50b7 764 hooks = {
765 'post_hooks': self.add_post_hook,
766 'progress_hooks': self.add_progress_hook,
767 'postprocessor_hooks': self.add_postprocessor_hook,
768 }
769 for opt, fn in hooks.items():
770 for ph in self.params.get(opt, []):
771 fn(ph)
71b640cc 772
5bfc8bee 773 for pp_def_raw in self.params.get('postprocessors', []):
774 pp_def = dict(pp_def_raw)
775 when = pp_def.pop('when', 'post_process')
776 self.add_post_processor(
f9934b96 777 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 778 when=when)
779
ed39cac5 780 def preload_download_archive(fn):
781 """Preload the archive, if any is specified"""
ae103564 782 archive = set()
ed39cac5 783 if fn is None:
ae103564 784 return archive
941e881e 785 elif not is_path_like(fn):
ae103564 786 return fn
787
49a57e70 788 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 789 try:
790 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
791 for line in archive_file:
ae103564 792 archive.add(line.strip())
86e5f3ed 793 except OSError as ioe:
ed39cac5 794 if ioe.errno != errno.ENOENT:
795 raise
ae103564 796 return archive
ed39cac5 797
ae103564 798 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 799
7d4111ed
PH
800 def warn_if_short_id(self, argv):
801 # short YouTube ID starting with dash?
802 idxs = [
803 i for i, a in enumerate(argv)
804 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
805 if idxs:
806 correct_argv = (
7a5c1cfe 807 ['yt-dlp']
3089bc74
S
808 + [a for i, a in enumerate(argv) if i not in idxs]
809 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
810 )
811 self.report_warning(
812 'Long argument string detected. '
49a57e70 813 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
814 args_to_str(correct_argv))
815
8222d8de
JMF
816 def add_info_extractor(self, ie):
817 """Add an InfoExtractor object to the end of the list."""
8b7491c8 818 ie_key = ie.ie_key()
819 self._ies[ie_key] = ie
e52d7f85 820 if not isinstance(ie, type):
8b7491c8 821 self._ies_instances[ie_key] = ie
e52d7f85 822 ie.set_downloader(self)
8222d8de 823
56c73665
JMF
824 def get_info_extractor(self, ie_key):
825 """
826 Get an instance of an IE with name ie_key, it will try to get one from
827 the _ies list, if there's no instance it will create a new one and add
828 it to the extractor list.
829 """
830 ie = self._ies_instances.get(ie_key)
831 if ie is None:
832 ie = get_info_extractor(ie_key)()
833 self.add_info_extractor(ie)
834 return ie
835
023fa8c4
JMF
836 def add_default_info_extractors(self):
837 """
838 Add the InfoExtractors returned by gen_extractors to the end of the list
839 """
fe7866d0 840 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
841 all_ies['end'] = UnsupportedURLIE()
842 try:
843 ie_names = orderedSet_from_options(
844 self.params.get('allowed_extractors', ['default']), {
845 'all': list(all_ies),
846 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
847 }, use_regex=True)
848 except re.error as e:
849 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
850 for name in ie_names:
851 self.add_info_extractor(all_ies[name])
852 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 853
56d868db 854 def add_post_processor(self, pp, when='post_process'):
8222d8de 855 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 856 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 857 self._pps[when].append(pp)
8222d8de
JMF
858 pp.set_downloader(self)
859
ab8e5e51
AM
860 def add_post_hook(self, ph):
861 """Add the post hook"""
862 self._post_hooks.append(ph)
863
933605d7 864 def add_progress_hook(self, ph):
819e0531 865 """Add the download progress hook"""
933605d7 866 self._progress_hooks.append(ph)
8ab470f1 867
819e0531 868 def add_postprocessor_hook(self, ph):
869 """Add the postprocessing progress hook"""
870 self._postprocessor_hooks.append(ph)
5bfc8bee 871 for pps in self._pps.values():
872 for pp in pps:
873 pp.add_progress_hook(ph)
819e0531 874
1c088fa8 875 def _bidi_workaround(self, message):
5d681e96 876 if not hasattr(self, '_output_channel'):
1c088fa8
PH
877 return message
878
5d681e96 879 assert hasattr(self, '_output_process')
14f25df2 880 assert isinstance(message, str)
6febd1c1 881 line_count = message.count('\n') + 1
0f06bcd7 882 self._output_process.stdin.write((message + '\n').encode())
5d681e96 883 self._output_process.stdin.flush()
0f06bcd7 884 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 885 for _ in range(line_count))
6febd1c1 886 return res[:-len('\n')]
1c088fa8 887
b35496d8 888 def _write_string(self, message, out=None, only_once=False):
889 if only_once:
890 if message in self._printed_messages:
891 return
892 self._printed_messages.add(message)
893 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 894
cf4f42cb 895 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 896 """Print message to stdout"""
cf4f42cb 897 if quiet is not None:
da4db748 898 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
899 'Use "YoutubeDL.to_screen" instead')
8a82af35 900 if skip_eol is not False:
da4db748 901 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
902 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 903 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 904
dfea94f8 905 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 906 """Print message to screen if not in quiet mode"""
8bf9319e 907 if self.params.get('logger'):
43afe285 908 self.params['logger'].debug(message)
cf4f42cb 909 return
910 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
911 return
912 self._write_string(
913 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 914 self._out_files.screen, only_once=only_once)
8222d8de 915
b35496d8 916 def to_stderr(self, message, only_once=False):
0760b0a7 917 """Print message to stderr"""
14f25df2 918 assert isinstance(message, str)
8bf9319e 919 if self.params.get('logger'):
43afe285
IB
920 self.params['logger'].error(message)
921 else:
5792c950 922 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 923
924 def _send_console_code(self, code):
591bb9d3 925 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 926 return
591bb9d3 927 self._write_string(code, self._out_files.console)
8222d8de 928
1e5b9a95
PH
929 def to_console_title(self, message):
930 if not self.params.get('consoletitle', False):
931 return
3efb96a6 932 message = remove_terminal_sequences(message)
4bede0d8
C
933 if compat_os_name == 'nt':
934 if ctypes.windll.kernel32.GetConsoleWindow():
935 # c_wchar_p() might not be necessary if `message` is
936 # already of type unicode()
937 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 938 else:
939 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 940
bdde425c 941 def save_console_title(self):
cf4f42cb 942 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 943 return
592b7485 944 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
945
946 def restore_console_title(self):
cf4f42cb 947 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 948 return
592b7485 949 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
950
951 def __enter__(self):
952 self.save_console_title()
953 return self
954
c365dba8 955 def save_cookies(self):
dca08720 956 if self.params.get('cookiefile') is not None:
62b5c94c 957 self.cookiejar.save()
bdde425c 958
c365dba8 959 def __exit__(self, *args):
960 self.restore_console_title()
227bf1a3 961 self.close()
962
963 def close(self):
c365dba8 964 self.save_cookies()
dbd8b1bf
SS
965 if '_request_director' in self.__dict__:
966 self._request_director.close()
967 del self._request_director
c365dba8 968
fa9f30b8 969 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
970 """Determine action to take when a download problem appears.
971
972 Depending on if the downloader has been configured to ignore
973 download errors or not, this method may throw an exception or
974 not when errors are found, after printing the message.
975
fa9f30b8 976 @param tb If given, is additional traceback information
977 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
978 """
979 if message is not None:
980 self.to_stderr(message)
981 if self.params.get('verbose'):
982 if tb is None:
983 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 984 tb = ''
8222d8de 985 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 986 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 987 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
988 else:
989 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 990 tb = ''.join(tb_data)
c19bc311 991 if tb:
992 self.to_stderr(tb)
fa9f30b8 993 if not is_error:
994 return
b1940459 995 if not self.params.get('ignoreerrors'):
8222d8de
JMF
996 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
997 exc_info = sys.exc_info()[1].exc_info
998 else:
999 exc_info = sys.exc_info()
1000 raise DownloadError(message, exc_info)
1001 self._download_retcode = 1
1002
19a03940 1003 Styles = Namespace(
1004 HEADERS='yellow',
1005 EMPHASIS='light blue',
492272fe 1006 FILENAME='green',
19a03940 1007 ID='green',
1008 DELIM='blue',
1009 ERROR='red',
bc344cd4 1010 BAD_FORMAT='light red',
19a03940 1011 WARNING='yellow',
1012 SUPPRESS='light black',
1013 )
ec11a9f4 1014
7578d77d 1015 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 1016 text = str(text)
ec11a9f4 1017 if test_encoding:
1018 original_text = text
5c104538 1019 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1020 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 1021 text = text.encode(encoding, 'ignore').decode(encoding)
1022 if fallback is not None and text != original_text:
1023 text = fallback
8417f26b 1024 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
ec11a9f4 1025
591bb9d3 1026 def _format_out(self, *args, **kwargs):
1027 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1028
ec11a9f4 1029 def _format_screen(self, *args, **kwargs):
591bb9d3 1030 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 1031
1032 def _format_err(self, *args, **kwargs):
591bb9d3 1033 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 1034
c84aeac6 1035 def report_warning(self, message, only_once=False):
8222d8de
JMF
1036 '''
1037 Print the message to stderr, it will be prefixed with 'WARNING:'
1038 If stderr is a tty file the 'WARNING:' will be colored
1039 '''
6d07ce01
JMF
1040 if self.params.get('logger') is not None:
1041 self.params['logger'].warning(message)
8222d8de 1042 else:
ad8915b7
PH
1043 if self.params.get('no_warnings'):
1044 return
ec11a9f4 1045 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 1046
da4db748 1047 def deprecation_warning(self, message, *, stacklevel=0):
1048 deprecation_warning(
1049 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1050
1051 def deprecated_feature(self, message):
ee8dd27a 1052 if self.params.get('logger') is not None:
da4db748 1053 self.params['logger'].warning(f'Deprecated Feature: {message}')
1054 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1055
fa9f30b8 1056 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1057 '''
1058 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1059 in red if stderr is a tty file.
1060 '''
fa9f30b8 1061 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1062
b35496d8 1063 def write_debug(self, message, only_once=False):
0760b0a7 1064 '''Log debug message or Print message to stderr'''
1065 if not self.params.get('verbose', False):
1066 return
8a82af35 1067 message = f'[debug] {message}'
0760b0a7 1068 if self.params.get('logger'):
1069 self.params['logger'].debug(message)
1070 else:
b35496d8 1071 self.to_stderr(message, only_once)
0760b0a7 1072
8222d8de
JMF
1073 def report_file_already_downloaded(self, file_name):
1074 """Report file has already been fully downloaded."""
1075 try:
6febd1c1 1076 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1077 except UnicodeEncodeError:
6febd1c1 1078 self.to_screen('[download] The file has already been downloaded')
8222d8de 1079
0c3d0f51 1080 def report_file_delete(self, file_name):
1081 """Report that existing file will be deleted."""
1082 try:
c25228e5 1083 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1084 except UnicodeEncodeError:
c25228e5 1085 self.to_screen('Deleting existing file')
0c3d0f51 1086
319b6059 1087 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1088 has_drm = info.get('_has_drm')
319b6059 1089 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1090 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1091 if forced or not ignored:
1151c407 1092 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1093 expected=has_drm or ignored or expected)
88acdbc2 1094 else:
1095 self.report_warning(msg)
1096
de6000d9 1097 def parse_outtmpl(self):
bf1824b3 1098 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1099 self._parse_outtmpl()
1100 return self.params['outtmpl']
1101
1102 def _parse_outtmpl(self):
7b2c3f47 1103 sanitize = IDENTITY
bf1824b3 1104 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1105 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1106
1107 outtmpl = self.params.setdefault('outtmpl', {})
1108 if not isinstance(outtmpl, dict):
1109 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1110 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1111
21cd8fae 1112 def get_output_path(self, dir_type='', filename=None):
1113 paths = self.params.get('paths', {})
d2c8aadf 1114 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1115 path = os.path.join(
1116 expand_path(paths.get('home', '').strip()),
1117 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1118 filename or '')
21cd8fae 1119 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1120
76a264ac 1121 @staticmethod
901130bb 1122 def _outtmpl_expandpath(outtmpl):
1123 # expand_path translates '%%' into '%' and '$$' into '$'
1124 # correspondingly that is not what we want since we need to keep
1125 # '%%' intact for template dict substitution step. Working around
1126 # with boundary-alike separator hack.
6f2287cb 1127 sep = ''.join(random.choices(string.ascii_letters, k=32))
86e5f3ed 1128 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1129
1130 # outtmpl should be expand_path'ed before template dict substitution
1131 # because meta fields may contain env variables we don't want to
62b58c09 1132 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1133 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1134 return expand_path(outtmpl).replace(sep, '')
1135
1136 @staticmethod
1137 def escape_outtmpl(outtmpl):
1138 ''' Escape any remaining strings like %s, %abc% etc. '''
1139 return re.sub(
1140 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1141 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1142 outtmpl)
1143
1144 @classmethod
1145 def validate_outtmpl(cls, outtmpl):
76a264ac 1146 ''' @return None or Exception object '''
7d1eb38a 1147 outtmpl = re.sub(
47cdc68e 1148 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1149 lambda mobj: f'{mobj.group(0)[:-1]}s',
1150 cls._outtmpl_expandpath(outtmpl))
76a264ac 1151 try:
7d1eb38a 1152 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1153 return None
1154 except ValueError as err:
1155 return err
1156
03b4de72 1157 @staticmethod
1158 def _copy_infodict(info_dict):
1159 info_dict = dict(info_dict)
09b49e1f 1160 info_dict.pop('__postprocessors', None)
415f8d51 1161 info_dict.pop('__pending_error', None)
03b4de72 1162 return info_dict
1163
e0fd9573 1164 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1165 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1166 @param sanitize Whether to sanitize the output as a filename.
1167 For backward compatibility, a function can also be passed
1168 """
1169
6e84b215 1170 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1171
03b4de72 1172 info_dict = self._copy_infodict(info_dict)
752cda38 1173 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1174 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1175 if info_dict.get('duration', None) is not None
1176 else None)
1d485a1a 1177 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1178 info_dict['video_autonumber'] = self._num_videos
752cda38 1179 if info_dict.get('resolution') is None:
1180 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1181
e6f21b3d 1182 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1183 # of %(field)s to %(field)0Nd for backward compatibility
1184 field_size_compat_map = {
0a5a191a 1185 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1186 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1187 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1188 }
752cda38 1189
385a27fa 1190 TMPL_DICT = {}
47cdc68e 1191 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1192 MATH_FUNCTIONS = {
1193 '+': float.__add__,
1194 '-': float.__sub__,
993edd3f 1195 '*': float.__mul__,
385a27fa 1196 }
e625be0d 1197 # Field is of the form key1.key2...
07a1250e 1198 # where keys (except first) can be string, int, slice or "{field, ...}"
1199 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1200 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1201 'inner': FIELD_INNER_RE,
1202 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1203 }
1d485a1a 1204 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1205 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
78fde6e3 1206 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
e625be0d 1207 (?P<negate>-)?
1d485a1a 1208 (?P<fields>{FIELD_RE})
1209 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1210 (?:>(?P<strf_format>.+?))?
34baa9fd 1211 (?P<remaining>
1212 (?P<alternate>(?<!\\),[^|&)]+)?
1213 (?:&(?P<replacement>.*?))?
1214 (?:\|(?P<default>.*?))?
1d485a1a 1215 )$''')
752cda38 1216
0b6f829b
SS
1217 def _from_user_input(field):
1218 if field == ':':
1219 return ...
1220 elif ':' in field:
1221 return slice(*map(int_or_none, field.split(':')))
1222 elif int_or_none(field) is not None:
1223 return int(field)
1224 return field
1225
07a1250e 1226 def _traverse_infodict(fields):
1227 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1228 for f in ([x] if x.startswith('{') else x.split('.'))]
1229 for i in (0, -1):
1230 if fields and not fields[i]:
1231 fields.pop(i)
1232
1233 for i, f in enumerate(fields):
1234 if not f.startswith('{'):
0b6f829b 1235 fields[i] = _from_user_input(f)
07a1250e 1236 continue
1237 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
0b6f829b 1238 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
07a1250e 1239
0b6f829b 1240 return traverse_obj(info_dict, fields, traverse_string=True)
76a264ac 1241
752cda38 1242 def get_value(mdict):
1243 # Object traversal
2b8a2973 1244 value = _traverse_infodict(mdict['fields'])
752cda38 1245 # Negative
1246 if mdict['negate']:
1247 value = float_or_none(value)
1248 if value is not None:
1249 value *= -1
1250 # Do maths
385a27fa 1251 offset_key = mdict['maths']
1252 if offset_key:
752cda38 1253 value = float_or_none(value)
1254 operator = None
385a27fa 1255 while offset_key:
1256 item = re.match(
1257 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1258 offset_key).group(0)
1259 offset_key = offset_key[len(item):]
1260 if operator is None:
752cda38 1261 operator = MATH_FUNCTIONS[item]
385a27fa 1262 continue
1263 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1264 offset = float_or_none(item)
1265 if offset is None:
2b8a2973 1266 offset = float_or_none(_traverse_infodict(item))
385a27fa 1267 try:
1268 value = operator(value, multiplier * offset)
1269 except (TypeError, ZeroDivisionError):
1270 return None
1271 operator = None
752cda38 1272 # Datetime formatting
1273 if mdict['strf_format']:
7c37ff97 1274 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1275
a6bcaf71 1276 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1277 if sanitize and value == '':
1278 value = None
752cda38 1279 return value
1280
b868936c 1281 na = self.params.get('outtmpl_na_placeholder', 'NA')
1282
e0fd9573 1283 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1284 return sanitize_filename(str(value), restricted=restricted, is_id=(
1285 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1286 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1287 else NO_DEFAULT))
e0fd9573 1288
1289 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1290 sanitize = bool(sanitize)
1291
6e84b215 1292 def _dumpjson_default(obj):
1293 if isinstance(obj, (set, LazyList)):
1294 return list(obj)
adbc4ec4 1295 return repr(obj)
6e84b215 1296
6f2287cb 1297 class _ReplacementFormatter(string.Formatter):
ec9311c4 1298 def get_field(self, field_name, args, kwargs):
1299 if field_name.isdigit():
1300 return args[0], -1
1301 raise ValueError('Unsupported field')
1302
1303 replacement_formatter = _ReplacementFormatter()
1304
752cda38 1305 def create_key(outer_mobj):
1306 if not outer_mobj.group('has_key'):
b836dc94 1307 return outer_mobj.group(0)
752cda38 1308 key = outer_mobj.group('key')
752cda38 1309 mobj = re.match(INTERNAL_FORMAT_RE, key)
47bcd437 1310 value, replacement, default, last_field = None, None, na, ''
7c37ff97 1311 while mobj:
e625be0d 1312 mobj = mobj.groupdict()
7c37ff97 1313 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1314 value = get_value(mobj)
47bcd437 1315 last_field, replacement = mobj['fields'], mobj['replacement']
7c37ff97 1316 if value is None and mobj['alternate']:
34baa9fd 1317 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1318 else:
1319 break
752cda38 1320
ebe1b4e3 1321 if None not in (value, replacement):
ec9311c4 1322 try:
1323 value = replacement_formatter.format(replacement, value)
1324 except ValueError:
ebe1b4e3 1325 value, default = None, na
752cda38 1326
a264433c 1327 fmt = outer_mobj.group('format')
1328 if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1329 fmt = f'0{field_size_compat_map[last_field]:d}d'
1330
4476d2c7 1331 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1332 str_fmt = f'{fmt[:-1]}s'
ebe1b4e3 1333 if value is None:
1334 value, fmt = default, 's'
1335 elif fmt[-1] == 'l': # list
4476d2c7 1336 delim = '\n' if '#' in flags else ', '
9e907ebd 1337 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1338 elif fmt[-1] == 'j': # json
deae7c17 1339 value, fmt = json.dumps(
1340 value, default=_dumpjson_default,
9b9dad11 1341 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1342 elif fmt[-1] == 'h': # html
deae7c17 1343 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1344 elif fmt[-1] == 'q': # quoted
4476d2c7 1345 value = map(str, variadic(value) if '#' in flags else [value])
1346 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1347 elif fmt[-1] == 'B': # bytes
0f06bcd7 1348 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1349 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1350 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1351 value, fmt = unicodedata.normalize(
1352 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1353 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1354 value), str_fmt
e0fd9573 1355 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1356 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1357 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1358 factor=1024 if '#' in flags else 1000)
37893bb0 1359 elif fmt[-1] == 'S': # filename sanitization
47bcd437 1360 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1361 elif fmt[-1] == 'c':
524e2e4f 1362 if value:
1363 value = str(value)[0]
76a264ac 1364 else:
524e2e4f 1365 fmt = str_fmt
1619ab3e 1366 elif fmt[-1] not in 'rsa': # numeric
a439a3a4 1367 value = float_or_none(value)
752cda38 1368 if value is None:
1369 value, fmt = default, 's'
901130bb 1370
752cda38 1371 if sanitize:
1619ab3e 1372 # If value is an object, sanitize might convert it to a string
1373 # So we convert it to repr first
752cda38 1374 if fmt[-1] == 'r':
7d1eb38a 1375 value, fmt = repr(value), str_fmt
1619ab3e 1376 elif fmt[-1] == 'a':
1377 value, fmt = ascii(value), str_fmt
1378 if fmt[-1] in 'csra':
47bcd437 1379 value = sanitizer(last_field, value)
901130bb 1380
b868936c 1381 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1382 TMPL_DICT[key] = value
b868936c 1383 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1384
385a27fa 1385 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1386
819e0531 1387 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1388 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1389 return self.escape_outtmpl(outtmpl) % info_dict
1390
5127e92a 1391 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1392 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1393 if outtmpl is None:
bf1824b3 1394 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1395 try:
5127e92a 1396 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1397 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1398 if not filename:
1399 return None
15da37c7 1400
5127e92a 1401 if tmpl_type in ('', 'temp'):
6a0546e3 1402 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1403 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1404 filename = replace_extension(filename, ext, final_ext)
5127e92a 1405 elif tmpl_type:
6a0546e3 1406 force_ext = OUTTMPL_TYPES[tmpl_type]
1407 if force_ext:
1408 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1409
bdc3fd2f
U
1410 # https://github.com/blackjack4494/youtube-dlc/issues/85
1411 trim_file_name = self.params.get('trim_file_name', False)
1412 if trim_file_name:
5c22c63d 1413 no_ext, *ext = filename.rsplit('.', 2)
1414 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1415
0202b52a 1416 return filename
8222d8de 1417 except ValueError as err:
6febd1c1 1418 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1419 return None
1420
5127e92a 1421 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1422 """Generate the output filename"""
1423 if outtmpl:
1424 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1425 dir_type = None
1426 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1427 if not filename and dir_type not in ('', 'temp'):
1428 return ''
de6000d9 1429
c84aeac6 1430 if warn:
21cd8fae 1431 if not self.params.get('paths'):
de6000d9 1432 pass
1433 elif filename == '-':
c84aeac6 1434 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1435 elif os.path.isabs(filename):
c84aeac6 1436 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1437 if filename == '-' or not filename:
1438 return filename
1439
21cd8fae 1440 return self.get_output_path(dir_type, filename)
0202b52a 1441
120fe513 1442 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1443 """Returns None if the file should be downloaded"""
93b39cdb 1444 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
d7b460d0 1445 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1446
3bec830a 1447 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1448
8b0d7497 1449 def check_filter():
d7b460d0 1450 if _type in ('playlist', 'multi_video'):
1451 return
1452 elif _type in ('url', 'url_transparent') and not try_call(
1453 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1454 return
1455
8b0d7497 1456 if 'title' in info_dict:
1457 # This can happen when we're just evaluating the playlist
1458 title = info_dict['title']
1459 matchtitle = self.params.get('matchtitle', False)
1460 if matchtitle:
1461 if not re.search(matchtitle, title, re.IGNORECASE):
1462 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1463 rejecttitle = self.params.get('rejecttitle', False)
1464 if rejecttitle:
1465 if re.search(rejecttitle, title, re.IGNORECASE):
1466 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1467
8b0d7497 1468 date = info_dict.get('upload_date')
1469 if date is not None:
1470 dateRange = self.params.get('daterange', DateRange())
1471 if date not in dateRange:
86e5f3ed 1472 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1473 view_count = info_dict.get('view_count')
1474 if view_count is not None:
1475 min_views = self.params.get('min_views')
1476 if min_views is not None and view_count < min_views:
1477 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1478 max_views = self.params.get('max_views')
1479 if max_views is not None and view_count > max_views:
1480 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1481 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1482 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1483
8f18aca8 1484 match_filter = self.params.get('match_filter')
fe2ce85a 1485 if match_filter is None:
1486 return None
1487
1488 cancelled = None
1489 try:
8f18aca8 1490 try:
1491 ret = match_filter(info_dict, incomplete=incomplete)
1492 except TypeError:
1493 # For backward compatibility
1494 ret = None if incomplete else match_filter(info_dict)
fe2ce85a 1495 except DownloadCancelled as err:
1496 if err.msg is not NO_DEFAULT:
1497 raise
1498 ret, cancelled = err.msg, err
1499
1500 if ret is NO_DEFAULT:
1501 while True:
1502 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1503 reply = input(self._format_screen(
1504 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1505 if reply in {'y', ''}:
1506 return None
1507 elif reply == 'n':
1508 if cancelled:
1509 raise type(cancelled)(f'Skipping {video_title}')
1510 return f'Skipping {video_title}'
1511 return ret
8b0d7497 1512
c77495e3 1513 if self.in_download_archive(info_dict):
2b029ca0
AK
1514 reason = ''.join((
1515 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1516 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1517 'has already been recorded in the archive'))
c77495e3 1518 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1519 else:
fe2ce85a 1520 try:
1521 reason = check_filter()
1522 except DownloadCancelled as e:
1523 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1524 else:
1525 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1526 if reason is not None:
120fe513 1527 if not silent:
1528 self.to_screen('[download] ' + reason)
c77495e3 1529 if self.params.get(break_opt, False):
1530 raise break_err()
8b0d7497 1531 return reason
fe7e0c98 1532
b6c45014
JMF
1533 @staticmethod
1534 def add_extra_info(info_dict, extra_info):
1535 '''Set the keys from extra_info in info dict if they are missing'''
1536 for key, value in extra_info.items():
1537 info_dict.setdefault(key, value)
1538
409e1828 1539 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1540 process=True, force_generic_extractor=False):
41d1cca3 1541 """
17ffed18 1542 Extract and return the information dictionary of the URL
41d1cca3 1543
1544 Arguments:
17ffed18 1545 @param url URL to extract
41d1cca3 1546
1547 Keyword arguments:
17ffed18 1548 @param download Whether to download videos
1549 @param process Whether to resolve all unresolved references (URLs, playlist items).
1550 Must be True for download to work
1551 @param ie_key Use only the extractor with this key
1552
1553 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1554 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1555 """
fe7e0c98 1556
409e1828 1557 if extra_info is None:
1558 extra_info = {}
1559
61aa5ba3 1560 if not ie_key and force_generic_extractor:
d22dec74
S
1561 ie_key = 'Generic'
1562
8222d8de 1563 if ie_key:
fe7866d0 1564 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1565 else:
1566 ies = self._ies
1567
fe7866d0 1568 for key, ie in ies.items():
8222d8de
JMF
1569 if not ie.suitable(url):
1570 continue
1571
1572 if not ie.working():
6febd1c1
PH
1573 self.report_warning('The program functionality for this site has been marked as broken, '
1574 'and will probably not work.')
8222d8de 1575
1151c407 1576 temp_id = ie.get_temp_id(url)
fe7866d0 1577 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
2b029ca0
AK
1578 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1579 'has already been recorded in the archive')
5e5be0c0 1580 if self.params.get('break_on_existing', False):
1581 raise ExistingVideoReached()
a0566bbf 1582 break
fe7866d0 1583 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1584 else:
fe7866d0 1585 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1586 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1587 tb=False if extractors_restricted else None)
a0566bbf 1588
7e88d7d7 1589 def _handle_extraction_exceptions(func):
b5ae35ee 1590 @functools.wraps(func)
a0566bbf 1591 def wrapper(self, *args, **kwargs):
6da22e7d 1592 while True:
1593 try:
1594 return func(self, *args, **kwargs)
1595 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1596 raise
6da22e7d 1597 except ReExtractInfo as e:
1598 if e.expected:
1599 self.to_screen(f'{e}; Re-extracting data')
1600 else:
1601 self.to_stderr('\r')
1602 self.report_warning(f'{e}; Re-extracting data')
1603 continue
1604 except GeoRestrictedError as e:
1605 msg = e.msg
1606 if e.countries:
1607 msg += '\nThis video is available in %s.' % ', '.join(
1608 map(ISO3166Utils.short2full, e.countries))
1609 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1610 self.report_error(msg)
1611 except ExtractorError as e: # An error we somewhat expected
1612 self.report_error(str(e), e.format_traceback())
1613 except Exception as e:
1614 if self.params.get('ignoreerrors'):
1615 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1616 else:
1617 raise
1618 break
a0566bbf 1619 return wrapper
1620
693f0600 1621 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1622 if (not self.params.get('wait_for_video')
1623 or ie_result.get('_type', 'video') != 'video'
1624 or ie_result.get('formats') or ie_result.get('url')):
1625 return
1626
1627 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1628 last_msg = ''
1629
1630 def progress(msg):
1631 nonlocal last_msg
a7dc6a89 1632 full_msg = f'{msg}\n'
1633 if not self.params.get('noprogress'):
1634 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1635 elif last_msg:
1636 return
1637 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1638 last_msg = msg
1639
1640 min_wait, max_wait = self.params.get('wait_for_video')
1641 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1642 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1643 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1644 self.report_warning('Release time of video is not known')
693f0600 1645 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1646 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1647 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1648 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1649
1650 wait_till = time.time() + diff
1651 try:
1652 while True:
1653 diff = wait_till - time.time()
1654 if diff <= 0:
1655 progress('')
1656 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1657 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1658 time.sleep(1)
1659 except KeyboardInterrupt:
1660 progress('')
1661 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1662 except BaseException as e:
1663 if not isinstance(e, ReExtractInfo):
1664 self.to_screen('')
1665 raise
1666
6c5211ce 1667 def _load_cookies(self, data, *, autoscope=True):
31215122
SS
1668 """Loads cookies from a `Cookie` header
1669
1670 This tries to work around the security vulnerability of passing cookies to every domain.
1671 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
31215122
SS
1672
1673 @param data The Cookie header as string to load the cookies from
6c5211ce 1674 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1675 If `True`, save cookies for later to be stored in the jar with a limited scope
1676 If a URL, save cookies in the jar with the domain of the URL
31215122
SS
1677 """
1678 for cookie in LenientSimpleCookie(data).values():
6c5211ce 1679 if autoscope and any(cookie.values()):
31215122
SS
1680 raise ValueError('Invalid syntax in Cookie Header')
1681
1682 domain = cookie.get('domain') or ''
1683 expiry = cookie.get('expires')
1684 if expiry == '': # 0 is valid
1685 expiry = None
1686 prepared_cookie = http.cookiejar.Cookie(
1687 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1688 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1689 cookie.get('secure') or False, expiry, False, None, None, {})
1690
1691 if domain:
1692 self.cookiejar.set_cookie(prepared_cookie)
6c5211ce 1693 elif autoscope is True:
31215122
SS
1694 self.deprecated_feature(
1695 'Passing cookies as a header is a potential security risk; '
1696 'they will be scoped to the domain of the downloaded urls. '
1697 'Please consider loading cookies from a file or browser instead.')
1698 self.__header_cookies.append(prepared_cookie)
6c5211ce 1699 elif autoscope:
1700 self.report_warning(
1701 'The extractor result contains an unscoped cookie as an HTTP header. '
1702 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1703 only_once=True)
1704 self._apply_header_cookies(autoscope, [prepared_cookie])
31215122
SS
1705 else:
1706 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1707 tb=False, is_error=False)
1708
6c5211ce 1709 def _apply_header_cookies(self, url, cookies=None):
31215122
SS
1710 """Applies stray header cookies to the provided url
1711
1712 This loads header cookies and scopes them to the domain provided in `url`.
1713 While this is not ideal, it helps reduce the risk of them being sent
1714 to an unintended destination while mostly maintaining compatibility.
1715 """
1716 parsed = urllib.parse.urlparse(url)
1717 if not parsed.hostname:
1718 return
1719
6c5211ce 1720 for cookie in map(copy.copy, cookies or self.__header_cookies):
31215122
SS
1721 cookie.domain = f'.{parsed.hostname}'
1722 self.cookiejar.set_cookie(cookie)
1723
7e88d7d7 1724 @_handle_extraction_exceptions
58f197b7 1725 def __extract_info(self, url, ie, download, extra_info, process):
31215122
SS
1726 self._apply_header_cookies(url)
1727
693f0600 1728 try:
1729 ie_result = ie.extract(url)
1730 except UserNotLive as e:
1731 if process:
1732 if self.params.get('wait_for_video'):
1733 self.report_warning(e)
1734 self._wait_for_video()
1735 raise
a0566bbf 1736 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1737 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1738 return
1739 if isinstance(ie_result, list):
1740 # Backwards compatibility: old IE result format
1741 ie_result = {
1742 '_type': 'compat_list',
1743 'entries': ie_result,
1744 }
e37d0efb 1745 if extra_info.get('original_url'):
1746 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1747 self.add_default_extra_info(ie_result, ie, url)
1748 if process:
f2ebc5c7 1749 self._wait_for_video(ie_result)
a0566bbf 1750 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1751 else:
a0566bbf 1752 return ie_result
fe7e0c98 1753
ea38e55f 1754 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1755 if url is not None:
1756 self.add_extra_info(ie_result, {
1757 'webpage_url': url,
1758 'original_url': url,
57ebfca3 1759 })
1760 webpage_url = ie_result.get('webpage_url')
1761 if webpage_url:
1762 self.add_extra_info(ie_result, {
1763 'webpage_url_basename': url_basename(webpage_url),
1764 'webpage_url_domain': get_domain(webpage_url),
6033d980 1765 })
1766 if ie is not None:
1767 self.add_extra_info(ie_result, {
1768 'extractor': ie.IE_NAME,
1769 'extractor_key': ie.ie_key(),
1770 })
ea38e55f 1771
58adec46 1772 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1773 """
1774 Take the result of the ie(may be modified) and resolve all unresolved
1775 references (URLs, playlist items).
1776
1777 It will also download the videos if 'download'.
1778 Returns the resolved ie_result.
1779 """
58adec46 1780 if extra_info is None:
1781 extra_info = {}
e8ee972c
PH
1782 result_type = ie_result.get('_type', 'video')
1783
057a5206 1784 if result_type in ('url', 'url_transparent'):
8f97a15d 1785 ie_result['url'] = sanitize_url(
1786 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1787 if ie_result.get('original_url') and not extra_info.get('original_url'):
1788 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1789
057a5206 1790 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1791 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1792 or extract_flat is True):
ecb54191 1793 info_copy = ie_result.copy()
6033d980 1794 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1795 if ie and not ie_result.get('id'):
4614bc22 1796 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1797 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1798 self.add_extra_info(info_copy, extra_info)
b5475f11 1799 info_copy, _ = self.pre_process(info_copy)
94dc8604 1800 self._fill_common_fields(info_copy, False)
17060584 1801 self.__forced_printings(info_copy)
415f8d51 1802 self._raise_pending_errors(info_copy)
4614bc22 1803 if self.params.get('force_write_download_archive', False):
1804 self.record_download_archive(info_copy)
e8ee972c
PH
1805 return ie_result
1806
8222d8de 1807 if result_type == 'video':
b6c45014 1808 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1809 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1810 self._raise_pending_errors(ie_result)
28b0eb0f 1811 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1812 if additional_urls:
e9f4ccd1 1813 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1814 if isinstance(additional_urls, str):
9c2b75b5 1815 additional_urls = [additional_urls]
1816 self.to_screen(
1817 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1818 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1819 ie_result['additional_entries'] = [
1820 self.extract_info(
b69fd25c 1821 url, download, extra_info=extra_info,
9c2b75b5 1822 force_generic_extractor=self.params.get('force_generic_extractor'))
1823 for url in additional_urls
1824 ]
1825 return ie_result
8222d8de
JMF
1826 elif result_type == 'url':
1827 # We have to add extra_info to the results because it may be
1828 # contained in a playlist
07cce701 1829 return self.extract_info(
1830 ie_result['url'], download,
1831 ie_key=ie_result.get('ie_key'),
1832 extra_info=extra_info)
7fc3fa05
PH
1833 elif result_type == 'url_transparent':
1834 # Use the information from the embedding page
1835 info = self.extract_info(
1836 ie_result['url'], ie_key=ie_result.get('ie_key'),
1837 extra_info=extra_info, download=False, process=False)
1838
1640eb09
S
1839 # extract_info may return None when ignoreerrors is enabled and
1840 # extraction failed with an error, don't crash and return early
1841 # in this case
1842 if not info:
1843 return info
1844
3975b4d2 1845 exempted_fields = {'_type', 'url', 'ie_key'}
1846 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1847 # For video clips, the id etc of the clip extractor should be used
1848 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1849
412c617d 1850 new_result = info.copy()
3975b4d2 1851 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1852
0563f7ac
S
1853 # Extracted info may not be a video result (i.e.
1854 # info.get('_type', 'video') != video) but rather an url or
1855 # url_transparent. In such cases outer metadata (from ie_result)
1856 # should be propagated to inner one (info). For this to happen
1857 # _type of info should be overridden with url_transparent. This
067aa17e 1858 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1859 if new_result.get('_type') == 'url':
1860 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1861
1862 return self.process_ie_result(
1863 new_result, download=download, extra_info=extra_info)
40fcba5e 1864 elif result_type in ('playlist', 'multi_video'):
30a074c2 1865 # Protect from infinite recursion due to recursively nested playlists
1866 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1867 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1868 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1869 self.to_screen(
30a074c2 1870 '[download] Skipping already downloaded playlist: %s'
1871 % ie_result.get('title') or ie_result.get('id'))
1872 return
7e85e872 1873
30a074c2 1874 self._playlist_level += 1
1875 self._playlist_urls.add(webpage_url)
03f83004 1876 self._fill_common_fields(ie_result, False)
bc516a3f 1877 self._sanitize_thumbnails(ie_result)
30a074c2 1878 try:
1879 return self.__process_playlist(ie_result, download)
1880 finally:
1881 self._playlist_level -= 1
1882 if not self._playlist_level:
1883 self._playlist_urls.clear()
8222d8de 1884 elif result_type == 'compat_list':
c9bf4114
PH
1885 self.report_warning(
1886 'Extractor %s returned a compat_list result. '
1887 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1888
8222d8de 1889 def _fixup(r):
b868936c 1890 self.add_extra_info(r, {
1891 'extractor': ie_result['extractor'],
1892 'webpage_url': ie_result['webpage_url'],
1893 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1894 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1895 'extractor_key': ie_result['extractor_key'],
1896 })
8222d8de
JMF
1897 return r
1898 ie_result['entries'] = [
b6c45014 1899 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1900 for r in ie_result['entries']
1901 ]
1902 return ie_result
1903 else:
1904 raise Exception('Invalid result type: %s' % result_type)
1905
e92caff5 1906 def _ensure_dir_exists(self, path):
1907 return make_dir(path, self.report_error)
1908
3b603dbd 1909 @staticmethod
3bec830a 1910 def _playlist_infodict(ie_result, strict=False, **kwargs):
1911 info = {
1912 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1913 'playlist': ie_result.get('title') or ie_result.get('id'),
1914 'playlist_id': ie_result.get('id'),
1915 'playlist_title': ie_result.get('title'),
1916 'playlist_uploader': ie_result.get('uploader'),
1917 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1918 **kwargs,
1919 }
3bec830a 1920 if strict:
1921 return info
0bd5a039 1922 if ie_result.get('webpage_url'):
1923 info.update({
1924 'webpage_url': ie_result['webpage_url'],
1925 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1926 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1927 })
3bec830a 1928 return {
1929 **info,
1930 'playlist_index': 0,
59d7de0d 1931 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
3bec830a 1932 'extractor': ie_result['extractor'],
3bec830a 1933 'extractor_key': ie_result['extractor_key'],
1934 }
3b603dbd 1935
30a074c2 1936 def __process_playlist(self, ie_result, download):
7e88d7d7 1937 """Process each entry in the playlist"""
f5ea4748 1938 assert ie_result['_type'] in ('playlist', 'multi_video')
1939
3bec830a 1940 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1941 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1942 if self._match_entry(common_info, incomplete=True) is not None:
1943 return
c6e07cf1 1944 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1945
7e88d7d7 1946 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1947 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1948
1949 lazy = self.params.get('lazy_playlist')
1950 if lazy:
1951 resolved_entries, n_entries = [], 'N/A'
1952 ie_result['requested_entries'], ie_result['entries'] = None, None
1953 else:
1954 entries = resolved_entries = list(entries)
1955 n_entries = len(resolved_entries)
1956 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1957 if not ie_result.get('playlist_count'):
1958 # Better to do this after potentially exhausting entries
1959 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1960
0647d925 1961 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1962 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1963
e08a85d8 1964 _infojson_written = False
0bfc53d0 1965 write_playlist_files = self.params.get('allow_playlist_files', True)
1966 if write_playlist_files and self.params.get('list_thumbnails'):
1967 self.list_thumbnails(ie_result)
1968 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1969 _infojson_written = self._write_info_json(
1970 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1971 if _infojson_written is None:
80c03fa9 1972 return
1973 if self._write_description('playlist', ie_result,
1974 self.prepare_filename(ie_copy, 'pl_description')) is None:
1975 return
681de68e 1976 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1977 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1978
7e9a6125 1979 if lazy:
1980 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1981 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1982 elif self.params.get('playlistreverse'):
1983 entries.reverse()
1984 elif self.params.get('playlistrandom'):
30a074c2 1985 random.shuffle(entries)
1986
bc5c2f8a 1987 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1988 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1989
134c913c 1990 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1991 if self.params.get('extract_flat') == 'discard_in_playlist':
1992 keep_resolved_entries = ie_result['_type'] != 'playlist'
1993 if keep_resolved_entries:
1994 self.write_debug('The information of all playlist entries will be held in memory')
1995
26e2805c 1996 failures = 0
1997 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1998 for i, (playlist_index, entry) in enumerate(entries):
1999 if lazy:
2000 resolved_entries.append((playlist_index, entry))
3bec830a 2001 if not entry:
7e88d7d7 2002 continue
2003
7e88d7d7 2004 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
ad54c913 2005 if not lazy and 'playlist-index' in self.params['compat_opts']:
7e9a6125 2006 playlist_index = ie_result['requested_entries'][i]
2007
0647d925 2008 entry_copy = collections.ChainMap(entry, {
3bec830a 2009 **common_info,
3955b207 2010 'n_entries': int_or_none(n_entries),
71729754 2011 'playlist_index': playlist_index,
7e9a6125 2012 'playlist_autonumber': i + 1,
0647d925 2013 })
3bec830a 2014
0647d925 2015 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 2016 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2017 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 2018 continue
2019
bc5c2f8a 2020 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 2021 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2022
ec54bd43 2023 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 2024 'playlist_index': playlist_index,
2025 'playlist_autonumber': i + 1,
ec54bd43 2026 }, extra))
26e2805c 2027 if not entry_result:
2028 failures += 1
2029 if failures >= max_failures:
2030 self.report_error(
7e88d7d7 2031 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 2032 break
134c913c 2033 if keep_resolved_entries:
2034 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 2035
2036 # Update with processed data
f0ad6f8c 2037 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 2038 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2039 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2040 # Do not set for full playlist
2041 ie_result.pop('requested_entries')
e08a85d8 2042
2043 # Write the updated info to json
cb96c5be 2044 if _infojson_written is True and self._write_info_json(
e08a85d8 2045 'updated playlist', ie_result,
2046 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2047 return
ca30f449 2048
ed5835b4 2049 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 2050 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 2051 return ie_result
2052
7e88d7d7 2053 @_handle_extraction_exceptions
a0566bbf 2054 def __process_iterable_entry(self, entry, download, extra_info):
2055 return self.process_ie_result(
2056 entry, download=download, extra_info=extra_info)
2057
67134eab
JMF
2058 def _build_format_filter(self, filter_spec):
2059 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
2060
2061 OPERATORS = {
2062 '<': operator.lt,
2063 '<=': operator.le,
2064 '>': operator.gt,
2065 '>=': operator.ge,
2066 '=': operator.eq,
2067 '!=': operator.ne,
2068 }
67134eab 2069 operator_rex = re.compile(r'''(?x)\s*
c3f624ef 2070 (?P<key>[\w.-]+)\s*
187986a8 2071 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2072 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 2073 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 2074 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
2075 if m:
2076 try:
2077 comparison_value = int(m.group('value'))
2078 except ValueError:
2079 comparison_value = parse_filesize(m.group('value'))
2080 if comparison_value is None:
2081 comparison_value = parse_filesize(m.group('value') + 'B')
2082 if comparison_value is None:
2083 raise ValueError(
2084 'Invalid value %r in format specification %r' % (
67134eab 2085 m.group('value'), filter_spec))
9ddb6925
S
2086 op = OPERATORS[m.group('op')]
2087
083c9df9 2088 if not m:
9ddb6925
S
2089 STR_OPERATORS = {
2090 '=': operator.eq,
10d33b34
YCH
2091 '^=': lambda attr, value: attr.startswith(value),
2092 '$=': lambda attr, value: attr.endswith(value),
2093 '*=': lambda attr, value: value in attr,
1ce9a3cb 2094 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 2095 }
187986a8 2096 str_operator_rex = re.compile(r'''(?x)\s*
2097 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
2098 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2099 (?P<quote>["'])?
2100 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2101 (?(quote)(?P=quote))\s*
9ddb6925 2102 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 2103 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 2104 if m:
1ce9a3cb
LF
2105 if m.group('op') == '~=':
2106 comparison_value = re.compile(m.group('value'))
2107 else:
2108 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
2109 str_op = STR_OPERATORS[m.group('op')]
2110 if m.group('negation'):
e118a879 2111 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
2112 else:
2113 op = str_op
083c9df9 2114
9ddb6925 2115 if not m:
187986a8 2116 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
2117
2118 def _filter(f):
2119 actual_value = f.get(m.group('key'))
2120 if actual_value is None:
2121 return m.group('none_inclusive')
2122 return op(actual_value, comparison_value)
67134eab
JMF
2123 return _filter
2124
9f1a1c36 2125 def _check_formats(self, formats):
2126 for f in formats:
2127 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 2128 path = self.get_output_path('temp')
2129 if not self._ensure_dir_exists(f'{path}/'):
2130 continue
2131 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 2132 temp_file.close()
2133 try:
2134 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 2135 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 2136 success = False
2137 finally:
2138 if os.path.exists(temp_file.name):
2139 try:
2140 os.remove(temp_file.name)
2141 except OSError:
2142 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2143 if success:
2144 yield f
2145 else:
2146 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2147
0017d9ad 2148 def _default_format_spec(self, info_dict, download=True):
0017d9ad 2149
af0f7428
S
2150 def can_merge():
2151 merger = FFmpegMergerPP(self)
2152 return merger.available and merger.can_merge()
2153
91ebc640 2154 prefer_best = (
b7b04c78 2155 not self.params.get('simulate')
91ebc640 2156 and download
2157 and (
2158 not can_merge()
21633673 2159 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2160 or self.params['outtmpl']['default'] == '-'))
53ed7066 2161 compat = (
2162 prefer_best
2163 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2164 or 'format-spec' in self.params['compat_opts'])
91ebc640 2165
2166 return (
53ed7066 2167 'best/bestvideo+bestaudio' if prefer_best
2168 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2169 else 'bestvideo+bestaudio/best')
0017d9ad 2170
67134eab
JMF
2171 def build_format_selector(self, format_spec):
2172 def syntax_error(note, start):
2173 message = (
2174 'Invalid format specification: '
86e5f3ed 2175 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2176 return SyntaxError(message)
2177
2178 PICKFIRST = 'PICKFIRST'
2179 MERGE = 'MERGE'
2180 SINGLE = 'SINGLE'
0130afb7 2181 GROUP = 'GROUP'
67134eab
JMF
2182 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2183
91ebc640 2184 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2185 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2186
67134eab
JMF
2187 def _parse_filter(tokens):
2188 filter_parts = []
6f2287cb 2189 for type, string_, start, _, _ in tokens:
2190 if type == tokenize.OP and string_ == ']':
67134eab
JMF
2191 return ''.join(filter_parts)
2192 else:
6f2287cb 2193 filter_parts.append(string_)
67134eab 2194
232541df 2195 def _remove_unused_ops(tokens):
62b58c09
L
2196 # Remove operators that we don't use and join them with the surrounding strings.
2197 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2198 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2199 last_string, last_start, last_end, last_line = None, None, None, None
6f2287cb 2200 for type, string_, start, end, line in tokens:
2201 if type == tokenize.OP and string_ == '[':
232541df
JMF
2202 if last_string:
2203 yield tokenize.NAME, last_string, last_start, last_end, last_line
2204 last_string = None
6f2287cb 2205 yield type, string_, start, end, line
232541df 2206 # everything inside brackets will be handled by _parse_filter
6f2287cb 2207 for type, string_, start, end, line in tokens:
2208 yield type, string_, start, end, line
2209 if type == tokenize.OP and string_ == ']':
232541df 2210 break
6f2287cb 2211 elif type == tokenize.OP and string_ in ALLOWED_OPS:
232541df
JMF
2212 if last_string:
2213 yield tokenize.NAME, last_string, last_start, last_end, last_line
2214 last_string = None
6f2287cb 2215 yield type, string_, start, end, line
232541df
JMF
2216 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2217 if not last_string:
6f2287cb 2218 last_string = string_
232541df
JMF
2219 last_start = start
2220 last_end = end
2221 else:
6f2287cb 2222 last_string += string_
232541df
JMF
2223 if last_string:
2224 yield tokenize.NAME, last_string, last_start, last_end, last_line
2225
cf2ac6df 2226 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2227 selectors = []
2228 current_selector = None
6f2287cb 2229 for type, string_, start, _, _ in tokens:
67134eab
JMF
2230 # ENCODING is only defined in python 3.x
2231 if type == getattr(tokenize, 'ENCODING', None):
2232 continue
2233 elif type in [tokenize.NAME, tokenize.NUMBER]:
6f2287cb 2234 current_selector = FormatSelector(SINGLE, string_, [])
67134eab 2235 elif type == tokenize.OP:
6f2287cb 2236 if string_ == ')':
cf2ac6df
JMF
2237 if not inside_group:
2238 # ')' will be handled by the parentheses group
2239 tokens.restore_last_token()
67134eab 2240 break
6f2287cb 2241 elif inside_merge and string_ in ['/', ',']:
0130afb7
JMF
2242 tokens.restore_last_token()
2243 break
6f2287cb 2244 elif inside_choice and string_ == ',':
cf2ac6df
JMF
2245 tokens.restore_last_token()
2246 break
6f2287cb 2247 elif string_ == ',':
0a31a350
JMF
2248 if not current_selector:
2249 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2250 selectors.append(current_selector)
2251 current_selector = None
6f2287cb 2252 elif string_ == '/':
d96d604e
JMF
2253 if not current_selector:
2254 raise syntax_error('"/" must follow a format selector', start)
67134eab 2255 first_choice = current_selector
cf2ac6df 2256 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2257 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
6f2287cb 2258 elif string_ == '[':
67134eab
JMF
2259 if not current_selector:
2260 current_selector = FormatSelector(SINGLE, 'best', [])
2261 format_filter = _parse_filter(tokens)
2262 current_selector.filters.append(format_filter)
6f2287cb 2263 elif string_ == '(':
0130afb7
JMF
2264 if current_selector:
2265 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2266 group = _parse_format_selection(tokens, inside_group=True)
2267 current_selector = FormatSelector(GROUP, group, [])
6f2287cb 2268 elif string_ == '+':
d03cfdce 2269 if not current_selector:
2270 raise syntax_error('Unexpected "+"', start)
2271 selector_1 = current_selector
2272 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2273 if not selector_2:
2274 raise syntax_error('Expected a selector', start)
2275 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2276 else:
6f2287cb 2277 raise syntax_error(f'Operator not recognized: "{string_}"', start)
67134eab
JMF
2278 elif type == tokenize.ENDMARKER:
2279 break
2280 if current_selector:
2281 selectors.append(current_selector)
2282 return selectors
2283
f8d4ad9a 2284 def _merge(formats_pair):
2285 format_1, format_2 = formats_pair
2286
2287 formats_info = []
2288 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2289 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2290
2291 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2292 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2293 for (i, fmt_info) in enumerate(formats_info):
551f9388 2294 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2295 formats_info.pop(i)
2296 continue
2297 for aud_vid in ['audio', 'video']:
f8d4ad9a 2298 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2299 if get_no_more[aud_vid]:
2300 formats_info.pop(i)
f5510afe 2301 break
f8d4ad9a 2302 get_no_more[aud_vid] = True
2303
2304 if len(formats_info) == 1:
2305 return formats_info[0]
2306
2307 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2308 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2309
2310 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2311 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2312
fc61aff4
LL
2313 output_ext = get_compatible_ext(
2314 vcodecs=[f.get('vcodec') for f in video_fmts],
2315 acodecs=[f.get('acodec') for f in audio_fmts],
2316 vexts=[f['ext'] for f in video_fmts],
2317 aexts=[f['ext'] for f in audio_fmts],
2318 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2319 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2320
975a0d0d 2321 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2322
f8d4ad9a 2323 new_dict = {
2324 'requested_formats': formats_info,
975a0d0d 2325 'format': '+'.join(filtered('format')),
2326 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2327 'ext': output_ext,
975a0d0d 2328 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2329 'language': '+'.join(orderedSet(filtered('language'))) or None,
2330 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2331 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2332 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2333 }
2334
2335 if the_only_video:
2336 new_dict.update({
2337 'width': the_only_video.get('width'),
2338 'height': the_only_video.get('height'),
2339 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2340 'fps': the_only_video.get('fps'),
49a57e70 2341 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2342 'vcodec': the_only_video.get('vcodec'),
2343 'vbr': the_only_video.get('vbr'),
2344 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2345 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2346 })
2347
2348 if the_only_audio:
2349 new_dict.update({
2350 'acodec': the_only_audio.get('acodec'),
2351 'abr': the_only_audio.get('abr'),
975a0d0d 2352 'asr': the_only_audio.get('asr'),
b8ed0f15 2353 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2354 })
2355
2356 return new_dict
2357
e8e73840 2358 def _check_formats(formats):
8cb7fc44 2359 if self.params.get('check_formats') == 'selected':
2360 yield from self._check_formats(formats)
2361 return
2362 elif (self.params.get('check_formats') is not None
bc344cd4 2363 or self.params.get('allow_unplayable_formats')):
981052c9 2364 yield from formats
b5ac45b1 2365 return
bc344cd4 2366
2367 for f in formats:
ef79d20d 2368 if f.get('has_drm') or f.get('__needs_testing'):
bc344cd4 2369 yield from self._check_formats([f])
2370 else:
2371 yield f
e8e73840 2372
67134eab 2373 def _build_selector_function(selector):
909d24dd 2374 if isinstance(selector, list): # ,
67134eab
JMF
2375 fs = [_build_selector_function(s) for s in selector]
2376
317f7ab6 2377 def selector_function(ctx):
67134eab 2378 for f in fs:
981052c9 2379 yield from f(ctx)
67134eab 2380 return selector_function
909d24dd 2381
2382 elif selector.type == GROUP: # ()
0130afb7 2383 selector_function = _build_selector_function(selector.selector)
909d24dd 2384
2385 elif selector.type == PICKFIRST: # /
67134eab
JMF
2386 fs = [_build_selector_function(s) for s in selector.selector]
2387
317f7ab6 2388 def selector_function(ctx):
67134eab 2389 for f in fs:
317f7ab6 2390 picked_formats = list(f(ctx))
67134eab
JMF
2391 if picked_formats:
2392 return picked_formats
2393 return []
67134eab 2394
981052c9 2395 elif selector.type == MERGE: # +
2396 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2397
2398 def selector_function(ctx):
adbc4ec4 2399 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2400 yield _merge(pair)
2401
909d24dd 2402 elif selector.type == SINGLE: # atom
598d185d 2403 format_spec = selector.selector or 'best'
909d24dd 2404
f8d4ad9a 2405 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2406 if format_spec == 'all':
2407 def selector_function(ctx):
9222c381 2408 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2409 elif format_spec == 'mergeall':
2410 def selector_function(ctx):
316f2650 2411 formats = list(_check_formats(
2412 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2413 if not formats:
2414 return
921b76ca 2415 merged_format = formats[-1]
2416 for f in formats[-2::-1]:
f8d4ad9a 2417 merged_format = _merge((merged_format, f))
2418 yield merged_format
909d24dd 2419
2420 else:
85e801a9 2421 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2422 mobj = re.match(
2423 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2424 format_spec)
2425 if mobj is not None:
2426 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2427 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2428 format_type = (mobj.group('type') or [None])[0]
2429 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2430 format_modified = mobj.group('mod') is not None
909d24dd 2431
2432 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2433 _filter_f = (
eff63539 2434 (lambda f: f.get('%scodec' % format_type) != 'none')
2435 if format_type and format_modified # bv*, ba*, wv*, wa*
2436 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2437 if format_type # bv, ba, wv, wa
2438 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2439 if not format_modified # b, w
8326b00a 2440 else lambda f: True) # b*, w*
2441 filter_f = lambda f: _filter_f(f) and (
2442 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2443 else:
48ee10ee 2444 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2445 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2446 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2447 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2448 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2449 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2450 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2451 else:
b5ae35ee 2452 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2453
2454 def selector_function(ctx):
2455 formats = list(ctx['formats'])
909d24dd 2456 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2457 if not matches:
2458 if format_fallback and ctx['incomplete_formats']:
2459 # for extractors with incomplete formats (audio only (soundcloud)
2460 # or video only (imgur)) best/worst will fallback to
2461 # best/worst {video,audio}-only format
d63eae7e 2462 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
85e801a9 2463 elif seperate_fallback and not ctx['has_merged_format']:
2464 # for compatibility with youtube-dl when there is no pre-merged format
2465 matches = list(filter(seperate_fallback, formats))
981052c9 2466 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2467 try:
e8e73840 2468 yield matches[format_idx - 1]
4abea8ca 2469 except LazyList.IndexError:
981052c9 2470 return
083c9df9 2471
67134eab 2472 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2473
317f7ab6 2474 def final_selector(ctx):
adbc4ec4 2475 ctx_copy = dict(ctx)
67134eab 2476 for _filter in filters:
317f7ab6
S
2477 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2478 return selector_function(ctx_copy)
67134eab 2479 return final_selector
083c9df9 2480
00cdda4f
SS
2481 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2482 # Prefix numbers with random letters to avoid it being classified as a number
2483 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2484 # TODO: Implement parser not reliant on tokenize.tokenize
2485 prefix = ''.join(random.choices(string.ascii_letters, k=32))
2486 stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
0130afb7 2487 try:
00cdda4f
SS
2488 tokens = list(_remove_unused_ops(
2489 token._replace(string=token.string.replace(prefix, ''))
2490 for token in tokenize.tokenize(stream.readline)))
0130afb7
JMF
2491 except tokenize.TokenError:
2492 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2493
86e5f3ed 2494 class TokenIterator:
0130afb7
JMF
2495 def __init__(self, tokens):
2496 self.tokens = tokens
2497 self.counter = 0
2498
2499 def __iter__(self):
2500 return self
2501
2502 def __next__(self):
2503 if self.counter >= len(self.tokens):
2504 raise StopIteration()
2505 value = self.tokens[self.counter]
2506 self.counter += 1
2507 return value
2508
2509 next = __next__
2510
2511 def restore_last_token(self):
2512 self.counter -= 1
2513
2514 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2515 return _build_selector_function(parsed_selector)
a9c58ad9 2516
6c5211ce 2517 def _calc_headers(self, info_dict, load_cookies=False):
227bf1a3 2518 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
c365dba8 2519 clean_headers(res)
6c5211ce 2520
2521 if load_cookies: # For --load-info-json
2522 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2523 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2524 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2525 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2526 res.pop('Cookie', None)
31215122 2527 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
e5660ee6 2528 if cookies:
31215122
SS
2529 encoder = LenientSimpleCookie()
2530 values = []
2531 for cookie in cookies:
2532 _, value = encoder.value_encode(cookie.value)
2533 values.append(f'{cookie.name}={value}')
2534 if cookie.domain:
2535 values.append(f'Domain={cookie.domain}')
2536 if cookie.path:
2537 values.append(f'Path={cookie.path}')
2538 if cookie.secure:
2539 values.append('Secure')
2540 if cookie.expires:
2541 values.append(f'Expires={cookie.expires}')
2542 if cookie.version:
2543 values.append(f'Version={cookie.version}')
2544 info_dict['cookies'] = '; '.join(values)
e5660ee6 2545
0016b84e
S
2546 if 'X-Forwarded-For' not in res:
2547 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2548 if x_forwarded_for_ip:
2549 res['X-Forwarded-For'] = x_forwarded_for_ip
2550
e5660ee6
JMF
2551 return res
2552
c487cf00 2553 def _calc_cookies(self, url):
b87e01c1 2554 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2555 return self.cookiejar.get_cookie_header(url)
e5660ee6 2556
9f1a1c36 2557 def _sort_thumbnails(self, thumbnails):
2558 thumbnails.sort(key=lambda t: (
2559 t.get('preference') if t.get('preference') is not None else -1,
2560 t.get('width') if t.get('width') is not None else -1,
2561 t.get('height') if t.get('height') is not None else -1,
2562 t.get('id') if t.get('id') is not None else '',
2563 t.get('url')))
2564
b0249bca 2565 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2566 thumbnails = info_dict.get('thumbnails')
2567 if thumbnails is None:
2568 thumbnail = info_dict.get('thumbnail')
2569 if thumbnail:
2570 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2571 if not thumbnails:
2572 return
2573
2574 def check_thumbnails(thumbnails):
2575 for t in thumbnails:
2576 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2577 try:
2578 self.urlopen(HEADRequest(t['url']))
2579 except network_exceptions as err:
2580 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2581 continue
2582 yield t
2583
2584 self._sort_thumbnails(thumbnails)
2585 for i, t in enumerate(thumbnails):
2586 if t.get('id') is None:
2587 t['id'] = '%d' % i
2588 if t.get('width') and t.get('height'):
2589 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2590 t['url'] = sanitize_url(t['url'])
2591
2592 if self.params.get('check_formats') is True:
282f5709 2593 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2594 else:
2595 info_dict['thumbnails'] = thumbnails
bc516a3f 2596
94dc8604 2597 def _fill_common_fields(self, info_dict, final=True):
03f83004 2598 # TODO: move sanitization here
94dc8604 2599 if final:
7aefd19a 2600 title = info_dict['fulltitle'] = info_dict.get('title')
d4736fdb 2601 if not title:
2602 if title == '':
2603 self.write_debug('Extractor gave empty title. Creating a generic title')
2604 else:
2605 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2606 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2607
2608 if info_dict.get('duration') is not None:
2609 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2610
2611 for ts_key, date_key in (
2612 ('timestamp', 'upload_date'),
2613 ('release_timestamp', 'release_date'),
2614 ('modified_timestamp', 'modified_date'),
2615 ):
2616 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2617 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2618 # see http://bugs.python.org/issue1646728)
19a03940 2619 with contextlib.suppress(ValueError, OverflowError, OSError):
836e06d2 2620 upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
03f83004 2621 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004 2622
1732eccc 2623 if not info_dict.get('release_year'):
2624 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2625
03f83004
LNO
2626 live_keys = ('is_live', 'was_live')
2627 live_status = info_dict.get('live_status')
2628 if live_status is None:
2629 for key in live_keys:
2630 if info_dict.get(key) is False:
2631 continue
2632 if info_dict.get(key):
2633 live_status = key
2634 break
2635 if all(info_dict.get(key) is False for key in live_keys):
2636 live_status = 'not_live'
2637 if live_status:
2638 info_dict['live_status'] = live_status
2639 for key in live_keys:
2640 if info_dict.get(key) is None:
2641 info_dict[key] = (live_status == key)
a057779d 2642 if live_status == 'post_live':
2643 info_dict['was_live'] = True
03f83004
LNO
2644
2645 # Auto generate title fields corresponding to the *_number fields when missing
2646 # in order to always have clean titles. This is very common for TV series.
2647 for field in ('chapter', 'season', 'episode'):
94dc8604 2648 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2649 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2650
104a7b5a
L
2651 for old_key, new_key in self._deprecated_multivalue_fields.items():
2652 if new_key in info_dict and old_key in info_dict:
b136e2af 2653 if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
2654 self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
104a7b5a
L
2655 elif old_value := info_dict.get(old_key):
2656 info_dict[new_key] = old_value.split(', ')
2657 elif new_value := info_dict.get(new_key):
2658 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2659
415f8d51 2660 def _raise_pending_errors(self, info):
2661 err = info.pop('__pending_error', None)
2662 if err:
2663 self.report_error(err, tb=False)
2664
784320c9 2665 def sort_formats(self, info_dict):
2666 formats = self._get_formats(info_dict)
784320c9 2667 formats.sort(key=FormatSorter(
c154302c 2668 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
784320c9 2669
dd82ffea
JMF
2670 def process_video_result(self, info_dict, download=True):
2671 assert info_dict.get('_type', 'video') == 'video'
9c906919 2672 self._num_videos += 1
dd82ffea 2673
bec1fad2 2674 if 'id' not in info_dict:
fc08bdd6 2675 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2676 elif not info_dict.get('id'):
2677 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2678
c9969434
S
2679 def report_force_conversion(field, field_not, conversion):
2680 self.report_warning(
2681 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2682 % (field, field_not, conversion))
2683
2684 def sanitize_string_field(info, string_field):
2685 field = info.get(string_field)
14f25df2 2686 if field is None or isinstance(field, str):
c9969434
S
2687 return
2688 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2689 info[string_field] = str(field)
c9969434
S
2690
2691 def sanitize_numeric_fields(info):
2692 for numeric_field in self._NUMERIC_FIELDS:
2693 field = info.get(numeric_field)
f9934b96 2694 if field is None or isinstance(field, (int, float)):
c9969434
S
2695 continue
2696 report_force_conversion(numeric_field, 'numeric', 'int')
2697 info[numeric_field] = int_or_none(field)
2698
2699 sanitize_string_field(info_dict, 'id')
2700 sanitize_numeric_fields(info_dict)
3975b4d2 2701 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2702 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2703 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2704 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2705
9eef7c4e 2706 chapters = info_dict.get('chapters') or []
a3976e07 2707 if chapters and chapters[0].get('start_time'):
2708 chapters.insert(0, {'start_time': 0})
2709
9eef7c4e 2710 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2711 for idx, (prev, current, next_) in enumerate(zip(
2712 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2713 if current.get('start_time') is None:
2714 current['start_time'] = prev.get('end_time')
2715 if not current.get('end_time'):
2716 current['end_time'] = next_.get('start_time')
a3976e07 2717 if not current.get('title'):
2718 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2719
dd82ffea
JMF
2720 if 'playlist' not in info_dict:
2721 # It isn't part of a playlist
2722 info_dict['playlist'] = None
2723 info_dict['playlist_index'] = None
2724
bc516a3f 2725 self._sanitize_thumbnails(info_dict)
d5519808 2726
536a55da 2727 thumbnail = info_dict.get('thumbnail')
bc516a3f 2728 thumbnails = info_dict.get('thumbnails')
536a55da
S
2729 if thumbnail:
2730 info_dict['thumbnail'] = sanitize_url(thumbnail)
2731 elif thumbnails:
d5519808
PH
2732 info_dict['thumbnail'] = thumbnails[-1]['url']
2733
ae30b840 2734 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2735 info_dict['display_id'] = info_dict['id']
2736
03f83004 2737 self._fill_common_fields(info_dict)
33d2fc2f 2738
05108a49
S
2739 for cc_kind in ('subtitles', 'automatic_captions'):
2740 cc = info_dict.get(cc_kind)
2741 if cc:
2742 for _, subtitle in cc.items():
2743 for subtitle_format in subtitle:
2744 if subtitle_format.get('url'):
2745 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2746 if subtitle_format.get('ext') is None:
2747 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2748
2749 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2750 subtitles = info_dict.get('subtitles')
4bba3716 2751
360e1ca5 2752 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2753 info_dict['id'], subtitles, automatic_captions)
a504ced0 2754
aebb4f4b 2755 formats = self._get_formats(info_dict)
dd82ffea 2756
c154302c 2757 # Backward compatibility with InfoExtractor._sort_formats
9ebac355 2758 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
c154302c 2759 if field_preference:
2760 info_dict['_format_sort_fields'] = field_preference
2761
bc344cd4 2762 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2763 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
88acdbc2 2764 if not self.params.get('allow_unplayable_formats'):
bc344cd4 2765 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
17ffed18 2766
2767 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2768 self.report_warning(
2769 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2770 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2771
319b6059 2772 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2773 if not get_from_start:
2774 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2775 if info_dict.get('is_live') and formats:
adbc4ec4 2776 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2777 if get_from_start and not formats:
a44ca5a4 2778 self.raise_no_formats(info_dict, msg=(
2779 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2780 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2781
73af5cc8
S
2782 def is_wellformed(f):
2783 url = f.get('url')
a5ac0c47 2784 if not url:
73af5cc8
S
2785 self.report_warning(
2786 '"url" field is missing or empty - skipping format, '
2787 'there is an error in extractor')
a5ac0c47
S
2788 return False
2789 if isinstance(url, bytes):
2790 sanitize_string_field(f, 'url')
2791 return True
73af5cc8
S
2792
2793 # Filter out malformed formats for better extraction robustness
1ac7f461 2794 formats = list(filter(is_wellformed, formats or []))
2795
2796 if not formats:
2797 self.raise_no_formats(info_dict)
73af5cc8 2798
39f32f17 2799 for format in formats:
c9969434
S
2800 sanitize_string_field(format, 'format_id')
2801 sanitize_numeric_fields(format)
dcf77cf1 2802 format['url'] = sanitize_url(format['url'])
39f32f17 2803 if format.get('ext') is None:
2804 format['ext'] = determine_ext(format['url']).lower()
2805 if format.get('protocol') is None:
2806 format['protocol'] = determine_protocol(format)
2807 if format.get('resolution') is None:
2808 format['resolution'] = self.format_resolution(format, default=None)
2809 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2810 format['dynamic_range'] = 'SDR'
2811 if format.get('aspect_ratio') is None:
2812 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
10025b71 2813 # For fragmented formats, "tbr" is often max bitrate and not average
2814 if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
0dff8e4d 2815 and info_dict.get('duration') and format.get('tbr')
39f32f17 2816 and not format.get('filesize') and not format.get('filesize_approx')):
2817 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
6c5211ce 2818 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2819
2820 # Safeguard against old/insecure infojson when using --load-info-json
2821 if info_dict.get('http_headers'):
2822 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2823 info_dict['http_headers'].pop('Cookie', None)
39f32f17 2824
2825 # This is copied to http_headers by the above _calc_headers and can now be removed
2826 if '__x_forwarded_for_ip' in info_dict:
2827 del info_dict['__x_forwarded_for_ip']
2828
c154302c 2829 self.sort_formats({
2830 'formats': formats,
2831 '_format_sort_fields': info_dict.get('_format_sort_fields')
2832 })
39f32f17 2833
2834 # Sanitize and group by format_id
2835 formats_dict = {}
2836 for i, format in enumerate(formats):
e74e3b63 2837 if not format.get('format_id'):
14f25df2 2838 format['format_id'] = str(i)
e2effb08
S
2839 else:
2840 # Sanitize format_id from characters used in format selector expression
ec85ded8 2841 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
39f32f17 2842 formats_dict.setdefault(format['format_id'], []).append(format)
181c7053
S
2843
2844 # Make sure all formats have unique format_id
03b4de72 2845 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2846 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2847 ambigious_id = len(ambiguous_formats) > 1
2848 for i, format in enumerate(ambiguous_formats):
2849 if ambigious_id:
181c7053 2850 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2851 # Ensure there is no conflict between id and ext in format selection
2852 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2853 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2854 format['format_id'] = 'f%s' % format['format_id']
181c7053 2855
39f32f17 2856 if format.get('format') is None:
2857 format['format'] = '{id} - {res}{note}'.format(
2858 id=format['format_id'],
2859 res=self.format_resolution(format),
2860 note=format_field(format, 'format_note', ' (%s)'),
2861 )
dd82ffea 2862
9f1a1c36 2863 if self.params.get('check_formats') is True:
282f5709 2864 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2865
88acdbc2 2866 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2867 # only set the 'formats' fields if the original info_dict list them
2868 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2869 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2870 # which can't be exported to json
b3d9ef88 2871 info_dict['formats'] = formats
4ec82a72 2872
2873 info_dict, _ = self.pre_process(info_dict)
2874
6db9c4d5 2875 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2876 return info_dict
2877
2878 self.post_extract(info_dict)
2879 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2880
093a1710 2881 # The pre-processors may have modified the formats
aebb4f4b 2882 formats = self._get_formats(info_dict)
093a1710 2883
e4221b70 2884 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2885 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2886 if self.params.get('list_thumbnails'):
2887 self.list_thumbnails(info_dict)
b7b04c78 2888 if self.params.get('listsubtitles'):
2889 if 'automatic_captions' in info_dict:
2890 self.list_subtitles(
2891 info_dict['id'], automatic_captions, 'automatic captions')
2892 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2893 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2894 self.list_formats(info_dict)
169dbde9 2895 if list_only:
b7b04c78 2896 # Without this printing, -F --print-json will not work
17060584 2897 self.__forced_printings(info_dict)
c487cf00 2898 return info_dict
bfaae0a7 2899
187986a8 2900 format_selector = self.format_selector
fa9f30b8 2901 while True:
2902 if interactive_format_selection:
372a0f3b
IS
2903 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2904 + '(Press ENTER for default, or Ctrl+C to quit)'
2905 + self._format_screen(': ', self.Styles.EMPHASIS))
fa9f30b8 2906 try:
372a0f3b 2907 format_selector = self.build_format_selector(req_format) if req_format else None
fa9f30b8 2908 except SyntaxError as err:
2909 self.report_error(err, tb=False, is_error=False)
2910 continue
2911
372a0f3b
IS
2912 if format_selector is None:
2913 req_format = self._default_format_spec(info_dict, download=download)
2914 self.write_debug(f'Default format spec: {req_format}')
2915 format_selector = self.build_format_selector(req_format)
2916
85e801a9 2917 formats_to_download = list(format_selector({
fa9f30b8 2918 'formats': formats,
85e801a9 2919 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
906c0bdc 2920 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2921 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
85e801a9 2922 }))
fa9f30b8 2923 if interactive_format_selection and not formats_to_download:
2924 self.report_error('Requested format is not available', tb=False, is_error=False)
2925 continue
2926 break
317f7ab6 2927
dd82ffea 2928 if not formats_to_download:
b7da73eb 2929 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2930 raise ExtractorError(
2931 'Requested format is not available. Use --list-formats for a list of available formats',
2932 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2933 self.report_warning('Requested format is not available')
2934 # Process what we can, even without any available formats.
2935 formats_to_download = [{}]
a13e6848 2936
0500ee3d 2937 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2938 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2939 if download:
0500ee3d 2940 if best_format and requested_ranges:
5ec1b6b7 2941 def to_screen(*msg):
2942 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2943
2944 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2945 (f['format_id'] for f in formats_to_download))
0500ee3d 2946 if requested_ranges != ({}, ):
5ec1b6b7 2947 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2948 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2949 max_downloads_reached = False
5ec1b6b7 2950
0500ee3d 2951 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2952 new_info = self._copy_infodict(info_dict)
b7da73eb 2953 new_info.update(fmt)
3975b4d2 2954 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2955 end_time = offset + min(chapter.get('end_time', duration), duration)
b4e0d758 2956 # duration may not be accurate. So allow deviations <1sec
2957 if end_time == float('inf') or end_time > offset + duration + 1:
2958 end_time = None
3975b4d2 2959 if chapter or offset:
5ec1b6b7 2960 new_info.update({
3975b4d2 2961 'section_start': offset + chapter.get('start_time', 0),
b4e0d758 2962 'section_end': end_time,
5ec1b6b7 2963 'section_title': chapter.get('title'),
2964 'section_number': chapter.get('index'),
2965 })
2966 downloaded_formats.append(new_info)
a13e6848 2967 try:
2968 self.process_info(new_info)
2969 except MaxDownloadsReached:
2970 max_downloads_reached = True
415f8d51 2971 self._raise_pending_errors(new_info)
f46e2f9d 2972 # Remove copied info
2973 for key, val in tuple(new_info.items()):
2974 if info_dict.get(key) == val:
2975 new_info.pop(key)
a13e6848 2976 if max_downloads_reached:
2977 break
ebed8b37 2978
5ec1b6b7 2979 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2980 assert write_archive.issubset({True, False, 'ignore'})
2981 if True in write_archive and False not in write_archive:
2982 self.record_download_archive(info_dict)
be72c624 2983
5ec1b6b7 2984 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2985 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2986 if max_downloads_reached:
2987 raise MaxDownloadsReached()
ebed8b37 2988
49a57e70 2989 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2990 info_dict.update(best_format)
dd82ffea
JMF
2991 return info_dict
2992
98c70d6f 2993 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2994 """Select the requested subtitles and their format"""
d8a58ddc 2995 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2996 if normal_subtitles and self.params.get('writesubtitles'):
2997 available_subs.update(normal_subtitles)
d8a58ddc 2998 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2999 if automatic_captions and self.params.get('writeautomaticsub'):
3000 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
3001 if lang not in available_subs:
3002 available_subs[lang] = cap_info
3003
d2c8aadf 3004 if not available_subs or (
3005 not self.params.get('writesubtitles')
3006 and not self.params.get('writeautomaticsub')):
4d171848 3007 return None
a504ced0 3008
d8a58ddc 3009 all_sub_langs = tuple(available_subs.keys())
a504ced0 3010 if self.params.get('allsubtitles', False):
c32b0aab 3011 requested_langs = all_sub_langs
3012 elif self.params.get('subtitleslangs', False):
5314b521 3013 try:
3014 requested_langs = orderedSet_from_options(
3015 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3016 except re.error as e:
3017 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
a504ced0 3018 else:
376aa24b
SS
3019 requested_langs = LazyList(itertools.chain(
3020 ['en'] if 'en' in normal_sub_langs else [],
3021 filter(lambda f: f.startswith('en'), normal_sub_langs),
3022 ['en'] if 'en' in all_sub_langs else [],
3023 filter(lambda f: f.startswith('en'), all_sub_langs),
3024 normal_sub_langs, all_sub_langs,
3025 ))[:1]
ad3dc496 3026 if requested_langs:
d2c8aadf 3027 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
3028
3029 formats_query = self.params.get('subtitlesformat', 'best')
3030 formats_preference = formats_query.split('/') if formats_query else []
3031 subs = {}
3032 for lang in requested_langs:
3033 formats = available_subs.get(lang)
3034 if formats is None:
86e5f3ed 3035 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 3036 continue
a504ced0
JMF
3037 for ext in formats_preference:
3038 if ext == 'best':
3039 f = formats[-1]
3040 break
3041 matches = list(filter(lambda f: f['ext'] == ext, formats))
3042 if matches:
3043 f = matches[-1]
3044 break
3045 else:
3046 f = formats[-1]
3047 self.report_warning(
3048 'No subtitle format found matching "%s" for language %s, '
3049 'using %s' % (formats_query, lang, f['ext']))
3050 subs[lang] = f
3051 return subs
3052
bb66c247 3053 def _forceprint(self, key, info_dict):
3054 if info_dict is None:
3055 return
3056 info_copy = info_dict.copy()
17060584 3057 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3058 if info_dict.get('requested_formats') is not None:
3059 # For RTMP URLs, also include the playpath
3060 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3061 elif info_dict.get('url'):
3062 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
bb66c247 3063 info_copy['formats_table'] = self.render_formats_table(info_dict)
3064 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3065 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3066 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3067
3068 def format_tmpl(tmpl):
48c8424b 3069 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 3070 if not mobj:
3071 return tmpl
48c8424b 3072
3073 fmt = '%({})s'
3074 if tmpl.startswith('{'):
6f2287cb 3075 tmpl, fmt = f'.{tmpl}', '%({})j'
48c8424b 3076 if tmpl.endswith('='):
3077 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3078 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 3079
bb66c247 3080 for tmpl in self.params['forceprint'].get(key, []):
3081 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3082
3083 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 3084 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 3085 tmpl = format_tmpl(tmpl)
3086 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 3087 if self._ensure_dir_exists(filename):
9874e82b 3088 with open(filename, 'a', encoding='utf-8', newline='') as f:
3089 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
ca30f449 3090
17060584 3091 return info_copy
3092
3093 def __forced_printings(self, info_dict, filename=None, incomplete=True):
bb66c247 3094 if (self.params.get('forcejson')
3095 or self.params['forceprint'].get('video')
3096 or self.params['print_to_file'].get('video')):
2b8a2973 3097 self.post_extract(info_dict)
17060584 3098 if filename:
3099 info_dict['filename'] = filename
b5f61b69 3100 info_copy = self._forceprint('video', info_dict)
3101
3102 def print_field(field, actual_field=None, optional=False):
3103 if actual_field is None:
3104 actual_field = field
3105 if self.params.get(f'force{field}') and (
3106 info_copy.get(field) is not None or (not optional and not incomplete)):
3107 self.to_stdout(info_copy[actual_field])
3108
3109 print_field('title')
3110 print_field('id')
3111 print_field('url', 'urls')
3112 print_field('thumbnail', optional=True)
3113 print_field('description', optional=True)
ad54c913 3114 print_field('filename')
b5f61b69 3115 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3116 self.to_stdout(formatSeconds(info_copy['duration']))
3117 print_field('format')
53c18592 3118
2b8a2973 3119 if self.params.get('forcejson'):
6e84b215 3120 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 3121
e8e73840 3122 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 3123 if not info.get('url'):
1151c407 3124 self.raise_no_formats(info, True)
e8e73840 3125
3126 if test:
3127 verbose = self.params.get('verbose')
3128 params = {
3129 'test': True,
a169858f 3130 'quiet': self.params.get('quiet') or not verbose,
e8e73840 3131 'verbose': verbose,
3132 'noprogress': not verbose,
3133 'nopart': True,
3134 'skip_unavailable_fragments': False,
3135 'keep_fragments': False,
3136 'overwrites': True,
3137 '_no_ytdl_file': True,
3138 }
3139 else:
3140 params = self.params
96fccc10 3141 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 3142 if not test:
3143 for ph in self._progress_hooks:
3144 fd.add_progress_hook(ph)
42676437
M
3145 urls = '", "'.join(
3146 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3147 for f in info.get('requested_formats', []) or [info])
3a408f9d 3148 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 3149
adbc4ec4
THD
3150 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3151 # But it may contain objects that are not deep-copyable
3152 new_info = self._copy_infodict(info)
e8e73840 3153 if new_info.get('http_headers') is None:
3154 new_info['http_headers'] = self._calc_headers(new_info)
3155 return fd.download(name, new_info, subtitle)
3156
e04938ab 3157 def existing_file(self, filepaths, *, default_overwrite=True):
3158 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3159 if existing_files and not self.params.get('overwrites', default_overwrite):
3160 return existing_files[0]
3161
3162 for file in existing_files:
3163 self.report_file_delete(file)
3164 os.remove(file)
3165 return None
3166
8222d8de 3167 def process_info(self, info_dict):
09b49e1f 3168 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
3169
3170 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 3171 original_infodict = info_dict
fd288278 3172
4513a41a 3173 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
3174 info_dict['format'] = info_dict['ext']
3175
c77495e3 3176 if self._match_entry(info_dict) is not None:
9e907ebd 3177 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
3178 return
3179
09b49e1f 3180 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 3181 self.post_extract(info_dict)
119e40ef 3182
3183 def replace_info_dict(new_info):
3184 nonlocal info_dict
3185 if new_info == info_dict:
3186 return
3187 info_dict.clear()
3188 info_dict.update(new_info)
3189
3190 new_info, _ = self.pre_process(info_dict, 'video')
3191 replace_info_dict(new_info)
0c14d66a 3192 self._num_downloads += 1
8222d8de 3193
dcf64d43 3194 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 3195 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3196 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 3197 files_to_move = {}
8222d8de
JMF
3198
3199 # Forced printings
4513a41a 3200 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 3201
ca6d59d2 3202 def check_max_downloads():
3203 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3204 raise MaxDownloadsReached()
3205
b7b04c78 3206 if self.params.get('simulate'):
9e907ebd 3207 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3208 check_max_downloads()
8222d8de
JMF
3209 return
3210
de6000d9 3211 if full_filename is None:
8222d8de 3212 return
e92caff5 3213 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3214 return
e92caff5 3215 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3216 return
3217
80c03fa9 3218 if self._write_description('video', info_dict,
3219 self.prepare_filename(info_dict, 'description')) is None:
3220 return
3221
3222 sub_files = self._write_subtitles(info_dict, temp_filename)
3223 if sub_files is None:
3224 return
3225 files_to_move.update(dict(sub_files))
3226
3227 thumb_files = self._write_thumbnails(
3228 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3229 if thumb_files is None:
3230 return
3231 files_to_move.update(dict(thumb_files))
8222d8de 3232
80c03fa9 3233 infofn = self.prepare_filename(info_dict, 'infojson')
3234 _infojson_written = self._write_info_json('video', info_dict, infofn)
3235 if _infojson_written:
dac5df5a 3236 info_dict['infojson_filename'] = infofn
e75bb0d6 3237 # For backward compatibility, even though it was a private field
80c03fa9 3238 info_dict['__infojson_filename'] = infofn
3239 elif _infojson_written is None:
3240 return
3241
3242 # Note: Annotations are deprecated
3243 annofn = None
1fb07d10 3244 if self.params.get('writeannotations', False):
de6000d9 3245 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3246 if annofn:
e92caff5 3247 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3248 return
0c3d0f51 3249 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3250 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3251 elif not info_dict.get('annotations'):
3252 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3253 else:
3254 try:
6febd1c1 3255 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3256 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3257 annofile.write(info_dict['annotations'])
3258 except (KeyError, TypeError):
6febd1c1 3259 self.report_warning('There are no annotations to write.')
86e5f3ed 3260 except OSError:
6febd1c1 3261 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3262 return
1fb07d10 3263
732044af 3264 # Write internet shortcut files
08438d2c 3265 def _write_link_file(link_type):
60f3e995 3266 url = try_get(info_dict['webpage_url'], iri_to_uri)
3267 if not url:
3268 self.report_warning(
3269 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3270 return True
08438d2c 3271 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3272 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3273 return False
10e3742e 3274 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3275 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3276 return True
3277 try:
3278 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3279 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3280 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3281 template_vars = {'url': url}
08438d2c 3282 if link_type == 'desktop':
3283 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3284 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3285 except OSError:
08438d2c 3286 self.report_error(f'Cannot write internet shortcut {linkfn}')
3287 return False
732044af 3288 return True
3289
08438d2c 3290 write_links = {
3291 'url': self.params.get('writeurllink'),
3292 'webloc': self.params.get('writewebloclink'),
3293 'desktop': self.params.get('writedesktoplink'),
3294 }
3295 if self.params.get('writelink'):
3296 link_type = ('webloc' if sys.platform == 'darwin'
3297 else 'desktop' if sys.platform.startswith('linux')
3298 else 'url')
3299 write_links[link_type] = True
3300
3301 if any(should_write and not _write_link_file(link_type)
3302 for link_type, should_write in write_links.items()):
3303 return
732044af 3304
415f8d51 3305 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3306 replace_info_dict(new_info)
56d868db 3307
a13e6848 3308 if self.params.get('skip_download'):
56d868db 3309 info_dict['filepath'] = temp_filename
3310 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3311 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3312 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3313 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3314 else:
3315 # Download
b868936c 3316 info_dict.setdefault('__postprocessors', [])
4340deca 3317 try:
0202b52a 3318
e04938ab 3319 def existing_video_file(*filepaths):
6b591b29 3320 ext = info_dict.get('ext')
e04938ab 3321 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3322 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3323 default_overwrite=False)
3324 if file:
3325 info_dict['ext'] = os.path.splitext(file)[1][1:]
3326 return file
0202b52a 3327
7b2c3f47 3328 fd, success = None, True
fccf90e7 3329 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3330 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
62b5c94c 3331 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3332 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3333 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3334 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3335 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3336 return
5ec1b6b7 3337
4340deca 3338 if info_dict.get('requested_formats') is not None:
0202b52a 3339 old_ext = info_dict['ext']
4e3b637d 3340 if self.params.get('merge_output_format') is None:
4e3b637d 3341 if (info_dict['ext'] == 'webm'
3342 and info_dict.get('thumbnails')
3343 # check with type instead of pp_key, __name__, or isinstance
3344 # since we dont want any custom PPs to trigger this
c487cf00 3345 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3346 info_dict['ext'] = 'mkv'
3347 self.report_warning(
3348 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3349 new_ext = info_dict['ext']
0202b52a 3350
124bc071 3351 def correct_ext(filename, ext=new_ext):
96fccc10 3352 if filename == '-':
3353 return filename
0202b52a 3354 filename_real_ext = os.path.splitext(filename)[1][1:]
3355 filename_wo_ext = (
3356 os.path.splitext(filename)[0]
124bc071 3357 if filename_real_ext in (old_ext, new_ext)
0202b52a 3358 else filename)
86e5f3ed 3359 return f'{filename_wo_ext}.{ext}'
0202b52a 3360
38c6902b 3361 # Ensure filename always has a correct extension for successful merge
0202b52a 3362 full_filename = correct_ext(full_filename)
3363 temp_filename = correct_ext(temp_filename)
e04938ab 3364 dl_filename = existing_video_file(full_filename, temp_filename)
ad54c913 3365
1ea24129 3366 info_dict['__real_download'] = False
84078a8b 3367 # NOTE: Copy so that original format dicts are not modified
3368 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
18e674b4 3369
7b2c3f47 3370 merger = FFmpegMergerPP(self)
adbc4ec4 3371 downloaded = []
dbf5416a 3372 if dl_filename is not None:
6c7274ec 3373 self.report_file_already_downloaded(dl_filename)
adbc4ec4 3374 elif fd:
ad54c913 3375 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
adbc4ec4
THD
3376 f['filepath'] = fname = prepend_extension(
3377 correct_ext(temp_filename, info_dict['ext']),
3378 'f%s' % f['format_id'], info_dict['ext'])
3379 downloaded.append(fname)
ad54c913 3380 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
dbf5416a 3381 success, real_download = self.dl(temp_filename, info_dict)
3382 info_dict['__real_download'] = real_download
18e674b4 3383 else:
18e674b4 3384 if self.params.get('allow_unplayable_formats'):
3385 self.report_warning(
3386 'You have requested merging of multiple formats '
3387 'while also allowing unplayable formats to be downloaded. '
3388 'The formats won\'t be merged to prevent data corruption.')
3389 elif not merger.available:
e8969bda 3390 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3391 if not self.params.get('ignoreerrors'):
3392 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3393 return
3394 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3395
96fccc10 3396 if temp_filename == '-':
adbc4ec4 3397 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3398 else 'but the formats are incompatible for simultaneous download' if merger.available
3399 else 'but ffmpeg is not installed')
3400 self.report_warning(
3401 f'You have requested downloading multiple formats to stdout {reason}. '
3402 'The formats will be streamed one after the other')
3403 fname = temp_filename
ad54c913 3404 for f in info_dict['requested_formats']:
dbf5416a 3405 new_info = dict(info_dict)
3406 del new_info['requested_formats']
3407 new_info.update(f)
96fccc10 3408 if temp_filename != '-':
124bc071 3409 fname = prepend_extension(
3410 correct_ext(temp_filename, new_info['ext']),
3411 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3412 if not self._ensure_dir_exists(fname):
3413 return
a21e0ab1 3414 f['filepath'] = fname
96fccc10 3415 downloaded.append(fname)
dbf5416a 3416 partial_success, real_download = self.dl(fname, new_info)
3417 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3418 success = success and partial_success
adbc4ec4
THD
3419
3420 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3421 info_dict['__postprocessors'].append(merger)
3422 info_dict['__files_to_merge'] = downloaded
3423 # Even if there were no downloads, it is being merged only now
3424 info_dict['__real_download'] = True
3425 else:
3426 for file in downloaded:
3427 files_to_move[file] = None
4340deca
P
3428 else:
3429 # Just a single file
e04938ab 3430 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3431 if dl_filename is None or dl_filename == temp_filename:
3432 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3433 # So we should try to resume the download
e8e73840 3434 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3435 info_dict['__real_download'] = real_download
6c7274ec 3436 else:
3437 self.report_file_already_downloaded(dl_filename)
0202b52a 3438
0202b52a 3439 dl_filename = dl_filename or temp_filename
c571435f 3440 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3441
3158150c 3442 except network_exceptions as err:
7960b056 3443 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3444 return
86e5f3ed 3445 except OSError as err:
4340deca
P
3446 raise UnavailableVideoError(err)
3447 except (ContentTooShortError, ) as err:
86e5f3ed 3448 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3449 return
8222d8de 3450
415f8d51 3451 self._raise_pending_errors(info_dict)
de6000d9 3452 if success and full_filename != '-':
f17f8651 3453
fd7cfb64 3454 def fixup():
3455 do_fixup = True
3456 fixup_policy = self.params.get('fixup')
3457 vid = info_dict['id']
3458
3459 if fixup_policy in ('ignore', 'never'):
3460 return
3461 elif fixup_policy == 'warn':
3fe75fdc 3462 do_fixup = 'warn'
f89b3e2d 3463 elif fixup_policy != 'force':
3464 assert fixup_policy in ('detect_or_warn', None)
3465 if not info_dict.get('__real_download'):
3466 do_fixup = False
fd7cfb64 3467
3468 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3469 if not (do_fixup and cndn):
fd7cfb64 3470 return
3fe75fdc 3471 elif do_fixup == 'warn':
fd7cfb64 3472 self.report_warning(f'{vid}: {msg}')
3473 return
3474 pp = cls(self)
3475 if pp.available:
3476 info_dict['__postprocessors'].append(pp)
3477 else:
3478 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3479
3480 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3481 ffmpeg_fixup(stretched_ratio not in (1, None),
3482 f'Non-uniform pixel ratio {stretched_ratio}',
3483 FFmpegFixupStretchedPP)
fd7cfb64 3484
993191c0 3485 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3486 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3487
ca9def71
LNO
3488 ext = info_dict.get('ext')
3489 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3490 isinstance(pp, FFmpegVideoConvertorPP)
3491 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
f73c1180 3492 ) for pp in self._pps['post_process'])
ca9def71
LNO
3493
3494 if not postprocessed_by_ffmpeg:
f73c1180 3495 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3496 and info_dict.get('container') == 'm4a_dash',
f2df4071 3497 'writing DASH m4a. Only some players support this container',
3498 FFmpegFixupM4aPP)
24146491 3499 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3500 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3501 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3502 FFmpegFixupM3u8PP)
4ce57d3b
A
3503 ffmpeg_fixup(downloader == 'dashsegments'
3504 and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
adbc4ec4
THD
3505 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3506
24146491 3507 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3508 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3509
3510 fixup()
8222d8de 3511 try:
f46e2f9d 3512 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3513 except PostProcessingError as err:
3514 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3515 return
ab8e5e51
AM
3516 try:
3517 for ph in self._post_hooks:
23c1a667 3518 ph(info_dict['filepath'])
ab8e5e51
AM
3519 except Exception as err:
3520 self.report_error('post hooks: %s' % str(err))
3521 return
9e907ebd 3522 info_dict['__write_download_archive'] = True
2d30509f 3523
c487cf00 3524 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3525 if self.params.get('force_write_download_archive'):
9e907ebd 3526 info_dict['__write_download_archive'] = True
ca6d59d2 3527 check_max_downloads()
8222d8de 3528
aa9369a2 3529 def __download_wrapper(self, func):
3530 @functools.wraps(func)
3531 def wrapper(*args, **kwargs):
3532 try:
3533 res = func(*args, **kwargs)
3534 except UnavailableVideoError as e:
3535 self.report_error(e)
b222c271 3536 except DownloadCancelled as e:
3537 self.to_screen(f'[info] {e}')
3538 if not self.params.get('break_per_url'):
3539 raise
fd404bec 3540 self._num_downloads = 0
aa9369a2 3541 else:
3542 if self.params.get('dump_single_json', False):
3543 self.post_extract(res)
3544 self.to_stdout(json.dumps(self.sanitize_info(res)))
3545 return wrapper
3546
8222d8de
JMF
3547 def download(self, url_list):
3548 """Download a given list of URLs."""
aa9369a2 3549 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3550 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3551 if (len(url_list) > 1
3552 and outtmpl != '-'
3553 and '%' not in outtmpl
3554 and self.params.get('max_downloads') != 1):
acd69589 3555 raise SameFileError(outtmpl)
8222d8de
JMF
3556
3557 for url in url_list:
aa9369a2 3558 self.__download_wrapper(self.extract_info)(
3559 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3560
3561 return self._download_retcode
3562
1dcc4c0c 3563 def download_with_info_file(self, info_filename):
31bd3925
JMF
3564 with contextlib.closing(fileinput.FileInput(
3565 [info_filename], mode='r',
3566 openhook=fileinput.hook_encoded('utf-8'))) as f:
3567 # FileInput doesn't have a read method, we can't call json.load
ab1de9cb 3568 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3569 for info in variadic(json.loads('\n'.join(f)))]
3570 for info in infos:
3571 try:
3572 self.__download_wrapper(self.process_ie_result)(info, download=True)
3573 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3574 if not isinstance(e, EntryNotInPlaylist):
3575 self.to_stderr('\r')
3576 webpage_url = info.get('webpage_url')
3577 if webpage_url is None:
3578 raise
aa9369a2 3579 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
ab1de9cb 3580 self.download([webpage_url])
263a4b55 3581 except ExtractorError as e:
3582 self.report_error(e)
d4943898 3583 return self._download_retcode
1dcc4c0c 3584
cb202fd2 3585 @staticmethod
8012d892 3586 def sanitize_info(info_dict, remove_private_keys=False):
3587 ''' Sanitize the infodict for converting to json '''
3ad56b42 3588 if info_dict is None:
3589 return info_dict
6e84b215 3590 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3591 info_dict.setdefault('_type', 'video')
b5e7a2e6 3592 info_dict.setdefault('_version', {
3593 'version': __version__,
3594 'current_git_head': current_git_head(),
3595 'release_git_head': RELEASE_GIT_HEAD,
20314dd4 3596 'repository': ORIGIN,
b5e7a2e6 3597 })
09b49e1f 3598
8012d892 3599 if remove_private_keys:
0a5a191a 3600 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3601 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
6f2287cb 3602 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
595ea4a9 3603 'playlist_autonumber',
6e84b215 3604 }
ae8f99e6 3605 else:
09b49e1f 3606 reject = lambda k, v: False
adbc4ec4
THD
3607
3608 def filter_fn(obj):
3609 if isinstance(obj, dict):
3610 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3611 elif isinstance(obj, (list, tuple, set, LazyList)):
3612 return list(map(filter_fn, obj))
3613 elif obj is None or isinstance(obj, (str, int, float, bool)):
3614 return obj
3615 else:
3616 return repr(obj)
3617
5226731e 3618 return filter_fn(info_dict)
cb202fd2 3619
8012d892 3620 @staticmethod
3621 def filter_requested_info(info_dict, actually_filter=True):
3622 ''' Alias of sanitize_info for backward compatibility '''
3623 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3624
43d7f5a5 3625 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3626 for filename in set(filter(None, files_to_delete)):
3627 if msg:
3628 self.to_screen(msg % filename)
3629 try:
3630 os.remove(filename)
3631 except OSError:
3632 self.report_warning(f'Unable to delete file {filename}')
3633 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3634 del info['__files_to_move'][filename]
3635
ed5835b4 3636 @staticmethod
3637 def post_extract(info_dict):
3638 def actual_post_extract(info_dict):
3639 if info_dict.get('_type') in ('playlist', 'multi_video'):
3640 for video_dict in info_dict.get('entries', {}):
3641 actual_post_extract(video_dict or {})
3642 return
3643
09b49e1f 3644 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3645 info_dict.update(post_extractor())
ed5835b4 3646
3647 actual_post_extract(info_dict or {})
3648
dcf64d43 3649 def run_pp(self, pp, infodict):
5bfa4862 3650 files_to_delete = []
dcf64d43 3651 if '__files_to_move' not in infodict:
3652 infodict['__files_to_move'] = {}
b1940459 3653 try:
3654 files_to_delete, infodict = pp.run(infodict)
3655 except PostProcessingError as e:
3656 # Must be True and not 'only_download'
3657 if self.params.get('ignoreerrors') is True:
3658 self.report_error(e)
3659 return infodict
3660 raise
3661
5bfa4862 3662 if not files_to_delete:
dcf64d43 3663 return infodict
5bfa4862 3664 if self.params.get('keepvideo', False):
3665 for f in files_to_delete:
dcf64d43 3666 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3667 else:
43d7f5a5 3668 self._delete_downloaded_files(
3669 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3670 return infodict
5bfa4862 3671
6f2287cb 3672 def run_all_pps(self, key, info, *, additional_pps=None):
17ba4343 3673 if key != 'video':
3674 self._forceprint(key, info)
3675 for pp in (additional_pps or []) + self._pps[key]:
3676 info = self.run_pp(pp, info)
ed5835b4 3677 return info
277d6ff5 3678
56d868db 3679 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3680 info = dict(ie_info)
56d868db 3681 info['__files_to_move'] = files_to_move or {}
415f8d51 3682 try:
3683 info = self.run_all_pps(key, info)
3684 except PostProcessingError as err:
3685 msg = f'Preprocessing: {err}'
3686 info.setdefault('__pending_error', msg)
3687 self.report_error(msg, is_error=False)
56d868db 3688 return info, info.pop('__files_to_move', None)
5bfa4862 3689
f46e2f9d 3690 def post_process(self, filename, info, files_to_move=None):
8222d8de 3691 """Run all the postprocessors on the given file."""
8222d8de 3692 info['filepath'] = filename
dcf64d43 3693 info['__files_to_move'] = files_to_move or {}
ed5835b4 3694 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3695 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3696 del info['__files_to_move']
ed5835b4 3697 return self.run_all_pps('after_move', info)
c1c9a79c 3698
5db07df6 3699 def _make_archive_id(self, info_dict):
e9fef7ee
S
3700 video_id = info_dict.get('id')
3701 if not video_id:
3702 return
5db07df6
PH
3703 # Future-proof against any change in case
3704 # and backwards compatibility with prior versions
e9fef7ee 3705 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3706 if extractor is None:
1211bb6d
S
3707 url = str_or_none(info_dict.get('url'))
3708 if not url:
3709 return
e9fef7ee 3710 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3711 for ie_key, ie in self._ies.items():
1211bb6d 3712 if ie.suitable(url):
8b7491c8 3713 extractor = ie_key
e9fef7ee
S
3714 break
3715 else:
3716 return
0647d925 3717 return make_archive_id(extractor, video_id)
5db07df6
PH
3718
3719 def in_download_archive(self, info_dict):
ae103564 3720 if not self.archive:
5db07df6
PH
3721 return False
3722
1e8fe57e 3723 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3724 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3725 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3726
3727 def record_download_archive(self, info_dict):
3728 fn = self.params.get('download_archive')
3729 if fn is None:
3730 return
5db07df6
PH
3731 vid_id = self._make_archive_id(info_dict)
3732 assert vid_id
ae103564 3733
a13e6848 3734 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3735 if is_path_like(fn):
ae103564 3736 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3737 archive_file.write(vid_id + '\n')
a45e8619 3738 self.archive.add(vid_id)
dd82ffea 3739
8c51aa65 3740 @staticmethod
8abeeb94 3741 def format_resolution(format, default='unknown'):
9359f3d4 3742 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3743 return 'audio only'
f49d89ee
PH
3744 if format.get('resolution') is not None:
3745 return format['resolution']
35615307 3746 if format.get('width') and format.get('height'):
ff51ed58 3747 return '%dx%d' % (format['width'], format['height'])
35615307 3748 elif format.get('height'):
ff51ed58 3749 return '%sp' % format['height']
35615307 3750 elif format.get('width'):
ff51ed58 3751 return '%dx?' % format['width']
3752 return default
8c51aa65 3753
8130779d 3754 def _list_format_headers(self, *headers):
3755 if self.params.get('listformats_table', True) is not False:
591bb9d3 3756 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3757 return headers
3758
c57f7757
PH
3759 def _format_note(self, fdict):
3760 res = ''
3761 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3762 res += '(unsupported)'
32f90364
PH
3763 if fdict.get('language'):
3764 if res:
3765 res += ' '
f304da8a 3766 res += '[%s]' % fdict['language']
c57f7757 3767 if fdict.get('format_note') is not None:
f304da8a 3768 if res:
3769 res += ' '
3770 res += fdict['format_note']
c57f7757 3771 if fdict.get('tbr') is not None:
f304da8a 3772 if res:
3773 res += ', '
3774 res += '%4dk' % fdict['tbr']
c57f7757
PH
3775 if fdict.get('container') is not None:
3776 if res:
3777 res += ', '
3778 res += '%s container' % fdict['container']
3089bc74
S
3779 if (fdict.get('vcodec') is not None
3780 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3781 if res:
3782 res += ', '
3783 res += fdict['vcodec']
91c7271a 3784 if fdict.get('vbr') is not None:
c57f7757
PH
3785 res += '@'
3786 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3787 res += 'video@'
3788 if fdict.get('vbr') is not None:
3789 res += '%4dk' % fdict['vbr']
fbb21cf5 3790 if fdict.get('fps') is not None:
5d583bdf
S
3791 if res:
3792 res += ', '
3793 res += '%sfps' % fdict['fps']
c57f7757
PH
3794 if fdict.get('acodec') is not None:
3795 if res:
3796 res += ', '
3797 if fdict['acodec'] == 'none':
3798 res += 'video only'
3799 else:
3800 res += '%-5s' % fdict['acodec']
3801 elif fdict.get('abr') is not None:
3802 if res:
3803 res += ', '
3804 res += 'audio'
3805 if fdict.get('abr') is not None:
3806 res += '@%3dk' % fdict['abr']
3807 if fdict.get('asr') is not None:
3808 res += ' (%5dHz)' % fdict['asr']
3809 if fdict.get('filesize') is not None:
3810 if res:
3811 res += ', '
3812 res += format_bytes(fdict['filesize'])
9732d77e
PH
3813 elif fdict.get('filesize_approx') is not None:
3814 if res:
3815 res += ', '
3816 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3817 return res
91c7271a 3818
aebb4f4b 3819 def _get_formats(self, info_dict):
3820 if info_dict.get('formats') is None:
3821 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3822 return [info_dict]
3823 return []
3824 return info_dict['formats']
b69fd25c 3825
aebb4f4b 3826 def render_formats_table(self, info_dict):
3827 formats = self._get_formats(info_dict)
3828 if not formats:
3829 return
8130779d 3830 if not self.params.get('listformats_table', True) is not False:
76d321f6 3831 table = [
3832 [
3833 format_field(f, 'format_id'),
3834 format_field(f, 'ext'),
3835 self.format_resolution(f),
8130779d 3836 self._format_note(f)
d5d1df8a 3837 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3838 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3839
d816f61f 3840 def simplified_codec(f, field):
3841 assert field in ('acodec', 'vcodec')
337734d4 3842 codec = f.get(field)
f5ea4748 3843 if not codec:
3844 return 'unknown'
3845 elif codec != 'none':
d816f61f 3846 return '.'.join(codec.split('.')[:4])
3847
3848 if field == 'vcodec' and f.get('acodec') == 'none':
3849 return 'images'
3850 elif field == 'acodec' and f.get('vcodec') == 'none':
3851 return ''
3852 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3853 self.Styles.SUPPRESS)
3854
591bb9d3 3855 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3856 table = [
3857 [
591bb9d3 3858 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3859 format_field(f, 'ext'),
3860 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3861 format_field(f, 'fps', '\t%d', func=round),
8130779d 3862 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3863 format_field(f, 'audio_channels', '\t%s'),
0dff8e4d 3864 delim, (
3865 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3866 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3867 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3868 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
563e0bf8 3869 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3870 shorten_protocol_name(f.get('protocol', '')),
3871 delim,
d816f61f 3872 simplified_codec(f, 'vcodec'),
563e0bf8 3873 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3874 simplified_codec(f, 'acodec'),
563e0bf8 3875 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3876 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
bc344cd4 3877 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3878 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3879 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3880 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3881 format_field(f, 'format_note'),
3882 format_field(f, 'container', ignore=(None, f.get('ext'))),
3883 delim=', '), delim=' '),
8130779d 3884 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3885 header_line = self._list_format_headers(
b8ed0f15 3886 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3887 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3888
3889 return render_table(
3890 header_line, table, hide_empty=True,
591bb9d3 3891 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3892
3893 def render_thumbnails_table(self, info_dict):
88f23a18 3894 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3895 if not thumbnails:
8130779d 3896 return None
3897 return render_table(
ec11a9f4 3898 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3899 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3900
8130779d 3901 def render_subtitles_table(self, video_id, subtitles):
2412044c 3902 def _row(lang, formats):
49c258e1 3903 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3904 if len(set(names)) == 1:
7aee40c1 3905 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3906 return [lang, ', '.join(names), ', '.join(exts)]
3907
8130779d 3908 if not subtitles:
3909 return None
3910 return render_table(
ec11a9f4 3911 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3912 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3913 hide_empty=True)
3914
3915 def __list_table(self, video_id, name, func, *args):
3916 table = func(*args)
3917 if not table:
3918 self.to_screen(f'{video_id} has no {name}')
3919 return
3920 self.to_screen(f'[info] Available {name} for {video_id}:')
3921 self.to_stdout(table)
3922
3923 def list_formats(self, info_dict):
3924 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3925
3926 def list_thumbnails(self, info_dict):
3927 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3928
3929 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3930 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3931
dca08720
PH
3932 def print_debug_header(self):
3933 if not self.params.get('verbose'):
3934 return
49a57e70 3935
a057779d 3936 from . import _IN_CLI # Must be delayed import
3937
560738f3 3938 # These imports can be slow. So import them only as needed
3939 from .extractor.extractors import _LAZY_LOADER
e756f45b
M
3940 from .extractor.extractors import (
3941 _PLUGIN_CLASSES as plugin_ies,
3942 _PLUGIN_OVERRIDES as plugin_ie_overrides
3943 )
560738f3 3944
49a57e70 3945 def get_encoding(stream):
2a938746 3946 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
8417f26b
SS
3947 additional_info = []
3948 if os.environ.get('TERM', '').lower() == 'dumb':
3949 additional_info.append('dumb')
49a57e70 3950 if not supports_terminal_sequences(stream):
53973b4d 3951 from .utils import WINDOWS_VT_MODE # Must be imported locally
8417f26b
SS
3952 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3953 if additional_info:
3954 ret = f'{ret} ({",".join(additional_info)})'
49a57e70 3955 return ret
3956
591bb9d3 3957 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3958 locale.getpreferredencoding(),
3959 sys.getfilesystemencoding(),
591bb9d3 3960 self.get_encoding(),
3961 ', '.join(
64fa820c 3962 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3963 if stream is not None and key != 'console')
3964 )
883d4b1e 3965
3966 logger = self.params.get('logger')
3967 if logger:
3968 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3969 write_debug(encoding_str)
3970 else:
96565c7e 3971 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3972 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3973
4c88ff87 3974 source = detect_variant()
70b23409 3975 if VARIANT not in (None, 'pip'):
3976 source += '*'
a5387729 3977 klass = type(self)
36eaf303 3978 write_debug(join_nonempty(
20314dd4 3979 f'{REPOSITORY.rpartition("/")[2]} version',
0b6ad22e 3980 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
29cb20bd 3981 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
36eaf303 3982 '' if source == 'unknown' else f'({source})',
a5387729 3983 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
36eaf303 3984 delim=' '))
497074f0 3985
3986 if not _IN_CLI:
3987 write_debug(f'params: {self.params}')
3988
6e21fdd2 3989 if not _LAZY_LOADER:
3990 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3991 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3992 else:
49a57e70 3993 write_debug('Lazy loading extractors is disabled')
8a82af35 3994 if self.params['compat_opts']:
3995 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3996
b5e7a2e6 3997 if current_git_head():
3998 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3999 write_debug(system_identifier())
d28b5171 4000
8913ef74 4001 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4002 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4003 if ffmpeg_features:
19a03940 4004 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 4005
4c83c967 4006 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 4007 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 4008 exe_str = ', '.join(
2831b468 4009 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4010 ) or 'none'
49a57e70 4011 write_debug('exe versions: %s' % exe_str)
dca08720 4012
1d485a1a 4013 from .compat.compat_utils import get_package_info
9b8ee23b 4014 from .dependencies import available_dependencies
4015
4016 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 4017 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 4018 })) or 'none'))
2831b468 4019
227bf1a3 4020 write_debug(f'Proxy map: {self.proxies}')
8a8b5452 4021 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
e756f45b
M
4022 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
4023 display_list = ['%s%s' % (
8e40b9d1 4024 klass.__name__, '' if klass.__name__ == name else f' as {name}')
e756f45b
M
4025 for name, klass in plugins.items()]
4026 if plugin_type == 'Extractor':
4027 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4028 for parent, plugins in plugin_ie_overrides.items())
4029 if not display_list:
4030 continue
4031 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4032
8e40b9d1
M
4033 plugin_dirs = plugin_directories()
4034 if plugin_dirs:
4035 write_debug(f'Plugin directories: {plugin_dirs}')
4036
49a57e70 4037 # Not implemented
4038 if False and self.params.get('call_home'):
0f06bcd7 4039 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 4040 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 4041 latest_version = self.urlopen(
0f06bcd7 4042 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
4043 if version_tuple(latest_version) > version_tuple(__version__):
4044 self.report_warning(
4045 'You are using an outdated version (newest version: %s)! '
4046 'See https://yt-dl.org/update if you need help updating.' %
4047 latest_version)
4048
227bf1a3 4049 @functools.cached_property
4050 def proxies(self):
4051 """Global proxy configuration"""
dca08720 4052 opts_proxy = self.params.get('proxy')
dca08720
PH
4053 if opts_proxy is not None:
4054 if opts_proxy == '':
227bf1a3 4055 opts_proxy = '__noproxy__'
4056 proxies = {'all': opts_proxy}
dca08720 4057 else:
ac668111 4058 proxies = urllib.request.getproxies()
227bf1a3 4059 # compat. Set HTTPS_PROXY to __noproxy__ to revert
dca08720
PH
4060 if 'http' in proxies and 'https' not in proxies:
4061 proxies['https'] = proxies['http']
227bf1a3 4062
4063 return proxies
62fec3b2 4064
c365dba8 4065 @functools.cached_property
4066 def cookiejar(self):
4067 """Global cookiejar instance"""
4068 return load_cookies(
4069 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4070
227bf1a3 4071 @property
4072 def _opener(self):
4073 """
4074 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4075 """
62b5c94c 4076 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
227bf1a3 4077 handler = self._request_director.handlers['Urllib']
4078 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4079
c365dba8 4080 def urlopen(self, req):
4081 """ Start an HTTP download """
4082 if isinstance(req, str):
227bf1a3 4083 req = Request(req)
4084 elif isinstance(req, urllib.request.Request):
3d2623a8 4085 self.deprecation_warning(
4086 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4087 'Use yt_dlp.networking.common.Request instead.')
227bf1a3 4088 req = urllib_req_to_req(req)
4089 assert isinstance(req, Request)
4090
4091 # compat: Assume user:pass url params are basic auth
4092 url, basic_auth_header = extract_basic_auth(req.url)
4093 if basic_auth_header:
4094 req.headers['Authorization'] = basic_auth_header
4095 req.url = sanitize_url(url)
4096
4097 clean_proxies(proxies=req.proxies, headers=req.headers)
4098 clean_headers(req.headers)
4099
4100 try:
4101 return self._request_director.send(req)
4102 except NoSupportingHandlers as e:
4103 for ue in e.unsupported_errors:
ccfd70f4 4104 # FIXME: This depends on the order of errors.
227bf1a3 4105 if not (ue.handler and ue.msg):
4106 continue
4107 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4108 raise RequestError(
4109 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4110 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
8a8b5452 4111 if 'unsupported proxy type: "https"' in ue.msg.lower():
4112 raise RequestError(
4113 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
ccfd70f4 4114
4115 elif (
4116 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4117 and 'websockets' not in self._request_director.handlers
4118 ):
4119 raise RequestError(
4120 'This request requires WebSocket support. '
4121 'Ensure one of the following dependencies are installed: websockets',
4122 cause=ue) from ue
227bf1a3 4123 raise
4124 except SSLError as e:
4125 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4126 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4127 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4128 raise RequestError(
4129 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4130 'Try using --legacy-server-connect', cause=e) from e
4131 raise
227bf1a3 4132
db7b054a 4133 def build_request_director(self, handlers, preferences=None):
227bf1a3 4134 logger = _YDLLogger(self)
6148833f 4135 headers = self.params['http_headers'].copy()
227bf1a3 4136 proxies = self.proxies.copy()
4137 clean_headers(headers)
4138 clean_proxies(proxies, headers)
4139
4140 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4141 for handler in handlers:
4142 director.add_handler(handler(
4143 logger=logger,
4144 headers=headers,
4145 cookiejar=self.cookiejar,
4146 proxies=proxies,
4147 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4148 verify=not self.params.get('nocheckcertificate'),
4149 **traverse_obj(self.params, {
4150 'verbose': 'debug_printtraffic',
4151 'source_address': 'source_address',
4152 'timeout': 'socket_timeout',
75dc8e67 4153 'legacy_ssl_support': 'legacyserverconnect',
227bf1a3 4154 'enable_file_urls': 'enable_file_urls',
4155 'client_cert': {
4156 'client_certificate': 'client_certificate',
4157 'client_certificate_key': 'client_certificate_key',
4158 'client_certificate_password': 'client_certificate_password',
4159 },
4160 }),
4161 ))
db7b054a 4162 director.preferences.update(preferences or [])
8a8b5452 4163 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4164 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
227bf1a3 4165 return director
c365dba8 4166
069b2aed 4167 @functools.cached_property
4168 def _request_director(self):
4169 return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4170
62fec3b2
PH
4171 def encode(self, s):
4172 if isinstance(s, bytes):
4173 return s # Already encoded
4174
4175 try:
4176 return s.encode(self.get_encoding())
4177 except UnicodeEncodeError as err:
4178 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4179 raise
4180
4181 def get_encoding(self):
4182 encoding = self.params.get('encoding')
4183 if encoding is None:
4184 encoding = preferredencoding()
4185 return encoding
ec82d85a 4186
e08a85d8 4187 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 4188 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 4189 if overwrite is None:
4190 overwrite = self.params.get('overwrites', True)
80c03fa9 4191 if not self.params.get('writeinfojson'):
4192 return False
4193 elif not infofn:
4194 self.write_debug(f'Skipping writing {label} infojson')
4195 return False
4196 elif not self._ensure_dir_exists(infofn):
4197 return None
e08a85d8 4198 elif not overwrite and os.path.exists(infofn):
80c03fa9 4199 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 4200 return 'exists'
4201
4202 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4203 try:
4204 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4205 return True
86e5f3ed 4206 except OSError:
cb96c5be 4207 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4208 return None
80c03fa9 4209
4210 def _write_description(self, label, ie_result, descfn):
4211 ''' Write description and returns True = written, False = skip, None = error '''
4212 if not self.params.get('writedescription'):
4213 return False
4214 elif not descfn:
4215 self.write_debug(f'Skipping writing {label} description')
4216 return False
4217 elif not self._ensure_dir_exists(descfn):
4218 return None
4219 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4220 self.to_screen(f'[info] {label.title()} description is already present')
4221 elif ie_result.get('description') is None:
88fb9425 4222 self.to_screen(f'[info] There\'s no {label} description to write')
80c03fa9 4223 return False
4224 else:
4225 try:
4226 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 4227 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 4228 descfile.write(ie_result['description'])
86e5f3ed 4229 except OSError:
80c03fa9 4230 self.report_error(f'Cannot write {label} description file {descfn}')
4231 return None
4232 return True
4233
4234 def _write_subtitles(self, info_dict, filename):
4235 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4236 ret = []
4237 subtitles = info_dict.get('requested_subtitles')
88fb9425 4238 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
80c03fa9 4239 # subtitles download errors are already managed as troubles in relevant IE
4240 # that way it will silently go on when used with unsupporting IE
4241 return ret
88fb9425 4242 elif not subtitles:
c8bc203f 4243 self.to_screen('[info] There are no subtitles for the requested languages')
88fb9425 4244 return ret
80c03fa9 4245 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4246 if not sub_filename_base:
4247 self.to_screen('[info] Skipping writing video subtitles')
4248 return ret
88fb9425 4249
80c03fa9 4250 for sub_lang, sub_info in subtitles.items():
4251 sub_format = sub_info['ext']
4252 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4253 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 4254 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4255 if existing_sub:
80c03fa9 4256 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 4257 sub_info['filepath'] = existing_sub
4258 ret.append((existing_sub, sub_filename_final))
80c03fa9 4259 continue
4260
4261 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4262 if sub_info.get('data') is not None:
4263 try:
4264 # Use newline='' to prevent conversion of newline characters
4265 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 4266 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 4267 subfile.write(sub_info['data'])
4268 sub_info['filepath'] = sub_filename
4269 ret.append((sub_filename, sub_filename_final))
4270 continue
86e5f3ed 4271 except OSError:
80c03fa9 4272 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4273 return None
4274
4275 try:
4276 sub_copy = sub_info.copy()
4277 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4278 self.dl(sub_filename, sub_copy, subtitle=True)
4279 sub_info['filepath'] = sub_filename
4280 ret.append((sub_filename, sub_filename_final))
6020e05d 4281 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 4282 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 4283 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 4284 if not self.params.get('ignoreerrors'):
4285 self.report_error(msg)
4286 raise DownloadError(msg)
4287 self.report_warning(msg)
519804a9 4288 return ret
80c03fa9 4289
4290 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
2acd1d55 4291 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
6c4fd172 4292 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4293 thumbnails, ret = [], []
6c4fd172 4294 if write_all or self.params.get('writethumbnail', False):
0202b52a 4295 thumbnails = info_dict.get('thumbnails') or []
88fb9425 4296 if not thumbnails:
c8bc203f 4297 self.to_screen(f'[info] There are no {label} thumbnails to download')
88fb9425 4298 return ret
6c4fd172 4299 multiple = write_all and len(thumbnails) > 1
ec82d85a 4300
80c03fa9 4301 if thumb_filename_base is None:
4302 thumb_filename_base = filename
4303 if thumbnails and not thumb_filename_base:
4304 self.write_debug(f'Skipping writing {label} thumbnail')
4305 return ret
4306
a40e0b37 4307 if thumbnails and not self._ensure_dir_exists(filename):
2acd1d55
R
4308 return None
4309
dd0228ce 4310 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4311 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4312 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4313 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4314 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4315
e04938ab 4316 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4317 if existing_thumb:
aa9369a2 4318 self.to_screen('[info] %s is already present' % (
4319 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4320 t['filepath'] = existing_thumb
4321 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4322 else:
80c03fa9 4323 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4324 try:
227bf1a3 4325 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4326 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4327 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4328 shutil.copyfileobj(uf, thumbf)
80c03fa9 4329 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4330 t['filepath'] = thumb_filename
3158150c 4331 except network_exceptions as err:
3d2623a8 4332 if isinstance(err, HTTPError) and err.status == 404:
ad54c913 4333 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4334 else:
4335 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
dd0228ce 4336 thumbnails.pop(idx)
6c4fd172 4337 if ret and not write_all:
4338 break
0202b52a 4339 return ret