]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[outtmpl] Support multiplication
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
31215122 3import copy
9d2ecdbc 4import datetime
c1c9a79c 5import errno
31bd3925 6import fileinput
31215122 7import http.cookiejar
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
f8271158 14import random
8222d8de
JMF
15import re
16import shutil
6f2287cb 17import string
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
961ea474 25
f8271158 26from .cache import Cache
227bf1a3 27from .compat import functools, urllib # isort: split
28from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
31215122 29from .cookies import LenientSimpleCookie, load_cookies
f8271158 30from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31from .downloader.rtmp import rtmpdump_version
f8271158 32from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 33from .extractor.common import UnsupportedURLIE
f8271158 34from .extractor.openload import PhantomJSwrapper
35from .minicurses import format_text
3d2623a8 36from .networking import HEADRequest, Request, RequestDirector
db7b054a 37from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
227bf1a3 38from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
3d2623a8 44 network_exceptions,
227bf1a3 45)
8e40b9d1 46from .plugins import directories as plugin_directories
e756f45b 47from .postprocessor import _PLUGIN_CLASSES as plugin_pps
f8271158 48from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
ca9def71 58 FFmpegVideoConvertorPP,
f8271158 59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61)
ca9def71 62from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
0b6ad22e 63from .update import REPOSITORY, _get_system_deprecation, _make_label, current_git_head, detect_variant
8c25f81b 64from .utils import (
f8271158 65 DEFAULT_OUTTMPL,
7b2c3f47 66 IDENTITY,
f8271158 67 LINK_TEMPLATES,
8dc59305 68 MEDIA_EXTENSIONS,
f8271158 69 NO_DEFAULT,
1d485a1a 70 NUMBER_RE,
f8271158 71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
784320c9 82 FormatSorter,
f8271158 83 GeoRestrictedError,
f8271158 84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
19a03940 87 Namespace,
f8271158 88 PagedList,
7e88d7d7 89 PlaylistEntries,
f8271158 90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
693f0600 96 UserNotLive,
eedb7ba5
S
97 age_restricted,
98 args_to_str,
cb794ee0 99 bug_reports_message,
ce02ed60 100 date_from_str,
da4db748 101 deprecation_warning,
ce02ed60 102 determine_ext,
b5559424 103 determine_protocol,
c0384f22 104 encode_compat_str,
ce02ed60 105 encodeFilename,
a06916d9 106 error_to_compat_str,
47cdc68e 107 escapeHTML,
590bc6f6 108 expand_path,
227bf1a3 109 extract_basic_auth,
90137ca4 110 filter_dict,
e29663c6 111 float_or_none,
02dbf93f 112 format_bytes,
e0fd9573 113 format_decimal_suffix,
f8271158 114 format_field,
525ef922 115 formatSeconds,
fc61aff4 116 get_compatible_ext,
0bb322b9 117 get_domain,
c9969434 118 int_or_none,
732044af 119 iri_to_uri,
941e881e 120 is_path_like,
34921b43 121 join_nonempty,
ce02ed60 122 locked_file,
0647d925 123 make_archive_id,
0202b52a 124 make_dir,
ec11a9f4 125 number_of_digits,
cd6fc19e 126 orderedSet,
5314b521 127 orderedSet_from_options,
083c9df9 128 parse_filesize,
ce02ed60 129 preferredencoding,
eedb7ba5 130 prepend_extension,
3efb96a6 131 remove_terminal_sequences,
cfb56d1a 132 render_table,
eedb7ba5 133 replace_extension,
ce02ed60 134 sanitize_filename,
1bb5c511 135 sanitize_path,
dcf77cf1 136 sanitize_url,
1211bb6d 137 str_or_none,
e29663c6 138 strftime_or_none,
ce02ed60 139 subtitles_filename,
819e0531 140 supports_terminal_sequences,
b1f94422 141 system_identifier,
f2ebc5c7 142 timetuple_from_msec,
732044af 143 to_high_limit_path,
324ad820 144 traverse_obj,
fc61aff4 145 try_call,
6033d980 146 try_get,
29eb5174 147 url_basename,
7d1eb38a 148 variadic,
58b1f00d 149 version_tuple,
53973b4d 150 windows_enable_vt_mode,
ce02ed60
PH
151 write_json_file,
152 write_string,
4f026faf 153)
227bf1a3 154from .utils._utils import _YDLLogger
155from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
3d2623a8 159 std_headers,
227bf1a3 160)
20314dd4 161from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 162
e9c0cdd3
YCH
163if compat_os_name == 'nt':
164 import ctypes
165
2459b6e1 166
86e5f3ed 167class YoutubeDL:
8222d8de
JMF
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
180940e0 196 videopassword: Password for accessing a video.
1da50aa3
S
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
8222d8de 200 usenetrc: Use netrc for authentication instead.
c8bc203f 201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
db3ad8a6 202 netrc_cmd: Use a shell command to get credentials
8222d8de
JMF
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
ad8915b7 205 no_warnings: Do not print out anything for warnings.
bb66c247 206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
ca30f449 209 For compatibility, a single list is also accepted
bb66c247 210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
8694c600 212 forcejson: Force printing info_dict as JSON.
63e0be34
PH
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
c25228e5 215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
b7b04c78 217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 219 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
63ad4d43 223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
0930b11f 227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
c25228e5 229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
08d30158 231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
c25228e5 233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
0ba692ac 237 check_formats Whether to test if the formats are downloadable.
9f1a1c36 238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
0ba692ac 240 or None (check only if requested by extractor)
4524baf0 241 paths: Dictionary of output paths. The allowed keys are 'home'
5ca095cb 242 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
de6000d9 243 outtmpl: Dictionary of templates for output names. Allowed keys
5ca095cb 244 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
34488702 245 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
4524baf0 249 windowsfilenames: Force the filenames to be windows compatible
b1940459 250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
26e2805c 253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
fe7866d0 255 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
c14e88f0 259 playlist_items: Specific indices of playlist to download.
75822ca7 260 playlistrandom: Download playlist items in random order.
7e9a6125 261 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
8bf9319e 264 logger: Log messages to a logging.Logger instance.
17ffed18 265 logtostderr: Print everything to stderr instead of stdout.
266 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
ad54c913 269 clean_infojson: Remove internal metadata from the infojson
34488702 270 getcomments: Extract video comments. This will not be written to disk
06167fbb 271 unless writeinfojson is also given
1fb07d10 272 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 273 writethumbnail: Write the thumbnail image to a file
c25228e5 274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
ec82d85a 276 write_all_thumbnails: Write all thumbnail formats to files
732044af 277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 282 writesubtitles: Write the video subtitles to a file
741dd8ea 283 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 284 listsubtitles: Lists all available subtitles for the video
a504ced0 285 subtitlesformat: The format code for subtitles
c32b0aab 286 subtitleslangs: List of languages of the subtitles to download (can be regex).
287 The list may contain "all" to refer to all the available
288 subtitles. The language can be prefixed with a "-" to
62b58c09 289 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de 290 keepvideo: Keep the video file after post-processing
46f1370e 291 daterange: A utils.DateRange object, download only if the upload_date is in the range.
8222d8de 292 skip_download: Skip the actual download of the video file
c35f9e72 293 cachedir: Location of the cache files in the filesystem.
a0e07d31 294 False to disable filesystem cache.
47192f92 295 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
296 age_limit: An integer representing the user's age in years.
297 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
298 min_views: An integer representing the minimum view count the video
299 must have in order to not be skipped.
300 Videos without view count information are always
301 downloaded. None for no limit.
302 max_views: An integer representing the maximum view count.
303 Videos that are more popular than that are not
304 downloaded.
305 Videos without view count information are always
306 downloaded. None for no limit.
ae103564 307 download_archive: A set, or the name of a file where all downloads are recorded.
308 Videos already present in the file are not downloaded again.
8a51f564 309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
b222c271 311 break_per_url: Whether break_on_reject and break_on_existing
312 should act on each input URL as opposed to for the entire queue
d76fa1f3 313 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 314 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 315 name/path from where cookies are loaded, the name of the keyring,
316 and the container name, e.g. ('chrome', ) or
317 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
f59f5ef8 320 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 321 client_certificate: Path to client certificate file in PEM format. May include the private key
322 client_certificate_key: Path to private key file for client certificate
323 client_certificate_password: Password for client certificate private key, if encrypted.
324 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 325 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 326 (Only supported by some extractors)
8300774c 327 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
8b7539d2 328 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 329 proxy: URL of the proxy server to use
38cce791 330 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 331 on geo-restricted sites.
e344693b 332 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
a0ddb8a2 335 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
336 default_search: Prepend this string if an input url is not valid.
337 'auto' for elaborate guessing
62fec3b2 338 encoding: Use this encoding instead of the system-specified.
134c913c 339 extract_flat: Whether to resolve and process url_results further
46f1370e 340 * False: Always process. Default for API
134c913c 341 * True: Never process
342 * 'in_playlist': Do not process inside playlist/multi_video
343 * 'discard': Always process, but don't return the result
344 from inside playlist/multi_video
345 * 'discard_in_playlist': Same as "discard", but only for
46f1370e 346 playlists (not multi_video). Default for CLI
f2ebc5c7 347 wait_for_video: If given, wait for scheduled streams to become available.
348 The value should be a tuple containing the range
349 (min_secs, max_secs) to wait between retries
4f026faf 350 postprocessors: A list of dictionaries, each with an entry
71b640cc 351 * key: The name of the postprocessor. See
7a5c1cfe 352 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 353 * when: When to run the postprocessor. Allowed values are
354 the entries of utils.POSTPROCESS_WHEN
56d868db 355 Assumed to be 'post_process' if not given
71b640cc
PH
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
5cda4eda 358 * status: One of "downloading", "error", or "finished".
ee69b99a 359 Check this first and ignore unknown values.
3ba7740d 360 * info_dict: The extracted info_dict
71b640cc 361
5cda4eda 362 If status is one of "downloading", or "finished", the
ee69b99a
PH
363 following properties may also be present:
364 * filename: The final filename (always present)
5cda4eda 365 * tmpfilename: The filename we're currently writing to
71b640cc
PH
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
368 * total_bytes_estimate: Guess of the eventual file size,
369 None if unavailable.
370 * elapsed: The number of seconds since download started.
71b640cc
PH
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
373 unknown
5cda4eda
PH
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
71b640cc
PH
378
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
819e0531 381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
387
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
fc61aff4 390 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 391 final_ext: Expected final extension; used to detect when the file was
59a7a13e 392 already downloaded and converted
6271f1ca
PH
393 fixup: Automatically correct known faults of the file.
394 One of:
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
62cd676c 398 about it, warn otherwise (default)
504f20dd 399 source_address: Client-side IP address to bind to.
1cf376f5 400 sleep_interval_requests: Number of seconds to sleep between requests
401 during extraction
7aa589a5
S
402 sleep_interval: Number of seconds to sleep before each download when
403 used alone or a lower bound of a range for randomized
404 sleep before each download (minimum possible number
405 of seconds to sleep) when used along with
406 max_sleep_interval.
407 max_sleep_interval:Upper bound of a range for randomized sleep before each
408 download (maximum possible number of seconds to sleep).
409 Must only be used along with sleep_interval.
410 Actual sleep time will be a random float from range
411 [sleep_interval; max_sleep_interval].
1cf376f5 412 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
413 listformats: Print an overview of available video formats and exit.
414 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 415 match_filter: A function that gets called for every video with the signature
416 (info_dict, *, incomplete: bool) -> Optional[str]
417 For backward compatibility with youtube-dl, the signature
418 (info_dict) -> Optional[str] is also allowed.
419 - If it returns a message, the video is ignored.
420 - If it returns None, the video is downloaded.
421 - If it returns utils.NO_DEFAULT, the user is interactively
422 asked whether to download the video.
fe2ce85a 423 - Raise utils.DownloadCancelled(msg) to abort remaining
424 downloads when a video is rejected.
5ca095cb 425 match_filter_func in utils/_utils.py is one example for this.
8417f26b
SS
426 color: A Dictionary with output stream names as keys
427 and their respective color policy as values.
428 Can also just be a single color policy,
429 in which case it applies to all outputs.
430 Valid stream names are 'stdout' and 'stderr'.
431 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
0a840f58 432 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 433 HTTP header
0a840f58 434 geo_bypass_country:
773f291d
S
435 Two-letter ISO 3166-2 country code that will be used for
436 explicit geographic restriction bypassing via faking
504f20dd 437 X-Forwarded-For HTTP header
5f95927a
S
438 geo_bypass_ip_block:
439 IP range in CIDR notation that will be used similarly to
504f20dd 440 geo_bypass_country
52a8a1e1 441 external_downloader: A dictionary of protocol keys and the executable of the
442 external downloader to use for it. The allowed protocols
443 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
444 Set the value to 'native' to use the native downloader
53ed7066 445 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 446 The following options do not work when used through the API:
b5ae35ee 447 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 448 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 449 Refer __init__.py for their implementation
819e0531 450 progress_template: Dictionary of templates for progress outputs.
451 Allowed keys are 'download', 'postprocess',
452 'download-title' (console title) and 'postprocess-title'.
453 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 454 retry_sleep_functions: Dictionary of functions that takes the number of attempts
455 as argument and returns the time to sleep in seconds.
456 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
457 download_ranges: A callback function that gets called for every video with
458 the signature (info_dict, ydl) -> Iterable[Section].
459 Only the returned sections will be downloaded.
460 Each Section is a dict with the following keys:
5ec1b6b7 461 * start_time: Start time of the section in seconds
462 * end_time: End time of the section in seconds
463 * title: Section title (Optional)
464 * index: Section number (Optional)
0f446365 465 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 466 noprogress: Do not print the progress bar
a831c2ea 467 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 468
8222d8de 469 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 470 the downloader (see yt_dlp/downloader/common.py):
51d9739f 471 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 472 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 473 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 474 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
475
476 The following options are used by the post processors:
c0b7d117
S
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
43820c03 479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
485 can also be used
e409895f 486
487 The following options are used by the extractors:
46f1370e 488 extractor_retries: Number of times to retry for known errors (default: 3)
62bff2c1 489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 490 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 491 discontinuities such as ad breaks (default: False)
5d3a0e79 492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 494 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 496
497 The following options are deprecated and may be removed in the future:
498
fe2ce85a 499 break_on_reject: Stop the download process when encountering a video that
500 has been filtered out.
501 - `raise DownloadCancelled(msg)` in match_filter instead
fe7866d0 502 force_generic_extractor: Force downloader to use the generic extractor
503 - Use allowed_extractors = ['generic', 'default']
7e9a6125 504 playliststart: - Use playlist_items
505 Playlist item to start at.
506 playlistend: - Use playlist_items
507 Playlist item to end at.
508 playlistreverse: - Use playlist_items
509 Download playlist items in reverse order.
1890fc63 510 forceurl: - Use forceprint
511 Force printing final URL.
512 forcetitle: - Use forceprint
513 Force printing title.
514 forceid: - Use forceprint
515 Force printing ID.
516 forcethumbnail: - Use forceprint
517 Force printing thumbnail URL.
518 forcedescription: - Use forceprint
519 Force printing description.
520 forcefilename: - Use forceprint
521 Force printing final filename.
522 forceduration: - Use forceprint
523 Force printing duration.
524 allsubtitles: - Use subtitleslangs = ['all']
525 Downloads all the subtitles of the video
526 (requires writesubtitles or writeautomaticsub)
527 include_ads: - Doesn't work
528 Download ads as well
529 call_home: - Not implemented
530 Boolean, true iff we are allowed to contact the
531 yt-dlp servers for debugging.
532 post_hooks: - Register a custom postprocessor
533 A list of functions that get called as the final step
534 for each video file, after all postprocessors have been
535 called. The filename will be passed as the only argument.
536 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
537 Use the native HLS downloader instead of ffmpeg/avconv
538 if True, otherwise use ffmpeg/avconv if False, otherwise
539 use downloader suggested by extractor if None.
540 prefer_ffmpeg: - avconv support is deprecated
541 If False, use avconv instead of ffmpeg if both are available,
542 otherwise prefer ffmpeg.
543 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 544 If True (default), DASH manifests and related
62bff2c1 545 data will be downloaded and processed by extractor.
546 You can reduce network I/O by disabling it if you don't
547 care about DASH. (only for youtube)
1890fc63 548 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 549 If True (default), HLS manifests and related
62bff2c1 550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about HLS. (only for youtube)
8417f26b 553 no_color: Same as `color='no_color'`
6148833f 554 no_overwrites: Same as `overwrites=False`
8222d8de
JMF
555 """
556
86e5f3ed 557 _NUMERIC_FIELDS = {
b8ed0f15 558 'width', 'height', 'asr', 'audio_channels', 'fps',
559 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 560 'timestamp', 'release_timestamp',
c9969434
S
561 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
562 'average_rating', 'comment_count', 'age_limit',
563 'start_time', 'end_time',
564 'chapter_number', 'season_number', 'episode_number',
565 'track_number', 'disc_number', 'release_year',
86e5f3ed 566 }
c9969434 567
6db9c4d5 568 _format_fields = {
569 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 570 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 571 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 572 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 573 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
62b5c94c 574 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
7e68567e 575 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
6db9c4d5 576 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
577 }
48ee10ee 578 _format_selection_exts = {
8dc59305 579 'audio': set(MEDIA_EXTENSIONS.common_audio),
580 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
581 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 582 }
583
3511266b 584 def __init__(self, params=None, auto_init=True):
883d4b1e 585 """Create a FileDownloader object with the given options.
586 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 587 Set to 'no_verbose_header' to not print the header
883d4b1e 588 """
e9f9a10f
JMF
589 if params is None:
590 params = {}
592b7485 591 self.params = params
8b7491c8 592 self._ies = {}
56c73665 593 self._ies_instances = {}
1e43a6f7 594 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 595 self._printed_messages = set()
1cf376f5 596 self._first_webpage_request = True
ab8e5e51 597 self._post_hooks = []
933605d7 598 self._progress_hooks = []
819e0531 599 self._postprocessor_hooks = []
8222d8de
JMF
600 self._download_retcode = 0
601 self._num_downloads = 0
9c906919 602 self._num_videos = 0
592b7485 603 self._playlist_level = 0
604 self._playlist_urls = set()
a0e07d31 605 self.cache = Cache(self)
6148833f 606 self.__header_cookies = []
34308b30 607
591bb9d3 608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
cf4f42cb 614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 615 )
f0795149 616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
8417f26b
SS
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
62b5c94c 624 self.params.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
8417f26b
SS
626 self.params['color'] = 'no_color'
627
a0b19d31 628 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
629 no_color = bool(os.getenv('NO_COLOR'))
8417f26b
SS
630
631 def process_color_policy(stream):
632 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
633 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
634 if policy in ('auto', None):
a0b19d31 635 if term_allow_color and supports_terminal_sequences(stream):
636 return 'no_color' if no_color else True
637 return False
6148833f 638 assert policy in ('always', 'never', 'no_color'), policy
8417f26b
SS
639 return {'always': True, 'never': False}.get(policy, policy)
640
591bb9d3 641 self._allow_colors = Namespace(**{
8417f26b
SS
642 name: process_color_policy(stream)
643 for name, stream in self._out_files.items_ if name != 'console'
591bb9d3 644 })
819e0531 645
61bdf15f
SS
646 system_deprecation = _get_system_deprecation()
647 if system_deprecation:
648 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
a61f4b28 649
88acdbc2 650 if self.params.get('allow_unplayable_formats'):
651 self.report_warning(
ec11a9f4 652 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 653 'This is a developer option intended for debugging. \n'
654 ' If you experience any issues while using this option, '
ec11a9f4 655 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 656
497074f0 657 if self.params.get('bidi_workaround', False):
658 try:
659 import pty
660 master, slave = pty.openpty()
661 width = shutil.get_terminal_size().columns
662 width_args = [] if width is None else ['-w', str(width)]
663 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
664 try:
665 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
666 except OSError:
667 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
668 self._output_channel = os.fdopen(master, 'rb')
669 except OSError as ose:
670 if ose.errno == errno.ENOENT:
671 self.report_warning(
672 'Could not find fribidi executable, ignoring --bidi-workaround. '
673 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
674 else:
675 raise
676
677 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
227bf1a3 678 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
6c5211ce 679 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
680 self.params['http_headers'].pop('Cookie', None)
db7b054a 681 self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
6c5211ce 682
497074f0 683 if auto_init and auto_init != 'no_verbose_header':
684 self.print_debug_header()
685
be5df5ee
S
686 def check_deprecated(param, option, suggestion):
687 if self.params.get(param) is not None:
86e5f3ed 688 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
689 return True
690 return False
691
692 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
693 if self.params.get('geo_verification_proxy') is None:
694 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
695
0d1bb027 696 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
697 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 698 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 699
49a57e70 700 for msg in self.params.get('_warnings', []):
0d1bb027 701 self.report_warning(msg)
ee8dd27a 702 for msg in self.params.get('_deprecation_warnings', []):
da4db748 703 self.deprecated_feature(msg)
0d1bb027 704
8a82af35 705 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 706 self.params['listformats_table'] = False
707
b5ae35ee 708 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 709 # nooverwrites was unnecessarily changed to overwrites
710 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
711 # This ensures compatibility with both keys
712 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 713 elif self.params.get('overwrites') is None:
714 self.params.pop('overwrites', None)
b868936c 715 else:
716 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 717
e4221b70 718 if self.params.get('simulate') is None and any((
719 self.params.get('list_thumbnails'),
720 self.params.get('listformats'),
721 self.params.get('listsubtitles'),
722 )):
723 self.params['simulate'] = 'list_only'
724
455a15e2 725 self.params.setdefault('forceprint', {})
726 self.params.setdefault('print_to_file', {})
bb66c247 727
728 # Compatibility with older syntax
ca30f449 729 if not isinstance(params['forceprint'], dict):
455a15e2 730 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 731
97ec5bc5 732 if auto_init:
97ec5bc5 733 self.add_default_info_extractors()
734
3089bc74
S
735 if (sys.platform != 'win32'
736 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 737 and not self.params.get('restrictfilenames', False)):
e9137224 738 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 739 self.report_warning(
6febd1c1 740 'Assuming --restrict-filenames since file system encoding '
1b725173 741 'cannot encode all characters. '
6febd1c1 742 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 743 self.params['restrictfilenames'] = True
34308b30 744
bf1824b3 745 self._parse_outtmpl()
486dd09e 746
187986a8 747 # Creating format selector here allows us to catch syntax errors before the extraction
748 self.format_selector = (
fa9f30b8 749 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 750 else self.params['format'] if callable(self.params['format'])
187986a8 751 else self.build_format_selector(self.params['format']))
752
013b50b7 753 hooks = {
754 'post_hooks': self.add_post_hook,
755 'progress_hooks': self.add_progress_hook,
756 'postprocessor_hooks': self.add_postprocessor_hook,
757 }
758 for opt, fn in hooks.items():
759 for ph in self.params.get(opt, []):
760 fn(ph)
71b640cc 761
5bfc8bee 762 for pp_def_raw in self.params.get('postprocessors', []):
763 pp_def = dict(pp_def_raw)
764 when = pp_def.pop('when', 'post_process')
765 self.add_post_processor(
f9934b96 766 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 767 when=when)
768
ed39cac5 769 def preload_download_archive(fn):
770 """Preload the archive, if any is specified"""
ae103564 771 archive = set()
ed39cac5 772 if fn is None:
ae103564 773 return archive
941e881e 774 elif not is_path_like(fn):
ae103564 775 return fn
776
49a57e70 777 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 778 try:
779 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
780 for line in archive_file:
ae103564 781 archive.add(line.strip())
86e5f3ed 782 except OSError as ioe:
ed39cac5 783 if ioe.errno != errno.ENOENT:
784 raise
ae103564 785 return archive
ed39cac5 786
ae103564 787 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 788
7d4111ed
PH
789 def warn_if_short_id(self, argv):
790 # short YouTube ID starting with dash?
791 idxs = [
792 i for i, a in enumerate(argv)
793 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
794 if idxs:
795 correct_argv = (
7a5c1cfe 796 ['yt-dlp']
3089bc74
S
797 + [a for i, a in enumerate(argv) if i not in idxs]
798 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
799 )
800 self.report_warning(
801 'Long argument string detected. '
49a57e70 802 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
803 args_to_str(correct_argv))
804
8222d8de
JMF
805 def add_info_extractor(self, ie):
806 """Add an InfoExtractor object to the end of the list."""
8b7491c8 807 ie_key = ie.ie_key()
808 self._ies[ie_key] = ie
e52d7f85 809 if not isinstance(ie, type):
8b7491c8 810 self._ies_instances[ie_key] = ie
e52d7f85 811 ie.set_downloader(self)
8222d8de 812
56c73665
JMF
813 def get_info_extractor(self, ie_key):
814 """
815 Get an instance of an IE with name ie_key, it will try to get one from
816 the _ies list, if there's no instance it will create a new one and add
817 it to the extractor list.
818 """
819 ie = self._ies_instances.get(ie_key)
820 if ie is None:
821 ie = get_info_extractor(ie_key)()
822 self.add_info_extractor(ie)
823 return ie
824
023fa8c4
JMF
825 def add_default_info_extractors(self):
826 """
827 Add the InfoExtractors returned by gen_extractors to the end of the list
828 """
fe7866d0 829 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
830 all_ies['end'] = UnsupportedURLIE()
831 try:
832 ie_names = orderedSet_from_options(
833 self.params.get('allowed_extractors', ['default']), {
834 'all': list(all_ies),
835 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
836 }, use_regex=True)
837 except re.error as e:
838 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
839 for name in ie_names:
840 self.add_info_extractor(all_ies[name])
841 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 842
56d868db 843 def add_post_processor(self, pp, when='post_process'):
8222d8de 844 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 845 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 846 self._pps[when].append(pp)
8222d8de
JMF
847 pp.set_downloader(self)
848
ab8e5e51
AM
849 def add_post_hook(self, ph):
850 """Add the post hook"""
851 self._post_hooks.append(ph)
852
933605d7 853 def add_progress_hook(self, ph):
819e0531 854 """Add the download progress hook"""
933605d7 855 self._progress_hooks.append(ph)
8ab470f1 856
819e0531 857 def add_postprocessor_hook(self, ph):
858 """Add the postprocessing progress hook"""
859 self._postprocessor_hooks.append(ph)
5bfc8bee 860 for pps in self._pps.values():
861 for pp in pps:
862 pp.add_progress_hook(ph)
819e0531 863
1c088fa8 864 def _bidi_workaround(self, message):
5d681e96 865 if not hasattr(self, '_output_channel'):
1c088fa8
PH
866 return message
867
5d681e96 868 assert hasattr(self, '_output_process')
14f25df2 869 assert isinstance(message, str)
6febd1c1 870 line_count = message.count('\n') + 1
0f06bcd7 871 self._output_process.stdin.write((message + '\n').encode())
5d681e96 872 self._output_process.stdin.flush()
0f06bcd7 873 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 874 for _ in range(line_count))
6febd1c1 875 return res[:-len('\n')]
1c088fa8 876
b35496d8 877 def _write_string(self, message, out=None, only_once=False):
878 if only_once:
879 if message in self._printed_messages:
880 return
881 self._printed_messages.add(message)
882 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 883
cf4f42cb 884 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 885 """Print message to stdout"""
cf4f42cb 886 if quiet is not None:
da4db748 887 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
888 'Use "YoutubeDL.to_screen" instead')
8a82af35 889 if skip_eol is not False:
da4db748 890 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
891 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 892 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 893
dfea94f8 894 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 895 """Print message to screen if not in quiet mode"""
8bf9319e 896 if self.params.get('logger'):
43afe285 897 self.params['logger'].debug(message)
cf4f42cb 898 return
899 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
900 return
901 self._write_string(
902 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 903 self._out_files.screen, only_once=only_once)
8222d8de 904
b35496d8 905 def to_stderr(self, message, only_once=False):
0760b0a7 906 """Print message to stderr"""
14f25df2 907 assert isinstance(message, str)
8bf9319e 908 if self.params.get('logger'):
43afe285
IB
909 self.params['logger'].error(message)
910 else:
5792c950 911 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 912
913 def _send_console_code(self, code):
591bb9d3 914 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 915 return
591bb9d3 916 self._write_string(code, self._out_files.console)
8222d8de 917
1e5b9a95
PH
918 def to_console_title(self, message):
919 if not self.params.get('consoletitle', False):
920 return
3efb96a6 921 message = remove_terminal_sequences(message)
4bede0d8
C
922 if compat_os_name == 'nt':
923 if ctypes.windll.kernel32.GetConsoleWindow():
924 # c_wchar_p() might not be necessary if `message` is
925 # already of type unicode()
926 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 927 else:
928 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 929
bdde425c 930 def save_console_title(self):
cf4f42cb 931 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 932 return
592b7485 933 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
934
935 def restore_console_title(self):
cf4f42cb 936 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 937 return
592b7485 938 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
939
940 def __enter__(self):
941 self.save_console_title()
942 return self
943
c365dba8 944 def save_cookies(self):
dca08720 945 if self.params.get('cookiefile') is not None:
62b5c94c 946 self.cookiejar.save()
bdde425c 947
c365dba8 948 def __exit__(self, *args):
949 self.restore_console_title()
227bf1a3 950 self.close()
951
952 def close(self):
c365dba8 953 self.save_cookies()
227bf1a3 954 self._request_director.close()
c365dba8 955
fa9f30b8 956 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
957 """Determine action to take when a download problem appears.
958
959 Depending on if the downloader has been configured to ignore
960 download errors or not, this method may throw an exception or
961 not when errors are found, after printing the message.
962
fa9f30b8 963 @param tb If given, is additional traceback information
964 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
965 """
966 if message is not None:
967 self.to_stderr(message)
968 if self.params.get('verbose'):
969 if tb is None:
970 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 971 tb = ''
8222d8de 972 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 973 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 974 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
975 else:
976 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 977 tb = ''.join(tb_data)
c19bc311 978 if tb:
979 self.to_stderr(tb)
fa9f30b8 980 if not is_error:
981 return
b1940459 982 if not self.params.get('ignoreerrors'):
8222d8de
JMF
983 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
984 exc_info = sys.exc_info()[1].exc_info
985 else:
986 exc_info = sys.exc_info()
987 raise DownloadError(message, exc_info)
988 self._download_retcode = 1
989
19a03940 990 Styles = Namespace(
991 HEADERS='yellow',
992 EMPHASIS='light blue',
492272fe 993 FILENAME='green',
19a03940 994 ID='green',
995 DELIM='blue',
996 ERROR='red',
bc344cd4 997 BAD_FORMAT='light red',
19a03940 998 WARNING='yellow',
999 SUPPRESS='light black',
1000 )
ec11a9f4 1001
7578d77d 1002 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 1003 text = str(text)
ec11a9f4 1004 if test_encoding:
1005 original_text = text
5c104538 1006 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1007 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 1008 text = text.encode(encoding, 'ignore').decode(encoding)
1009 if fallback is not None and text != original_text:
1010 text = fallback
8417f26b 1011 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
ec11a9f4 1012
591bb9d3 1013 def _format_out(self, *args, **kwargs):
1014 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1015
ec11a9f4 1016 def _format_screen(self, *args, **kwargs):
591bb9d3 1017 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 1018
1019 def _format_err(self, *args, **kwargs):
591bb9d3 1020 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 1021
c84aeac6 1022 def report_warning(self, message, only_once=False):
8222d8de
JMF
1023 '''
1024 Print the message to stderr, it will be prefixed with 'WARNING:'
1025 If stderr is a tty file the 'WARNING:' will be colored
1026 '''
6d07ce01
JMF
1027 if self.params.get('logger') is not None:
1028 self.params['logger'].warning(message)
8222d8de 1029 else:
ad8915b7
PH
1030 if self.params.get('no_warnings'):
1031 return
ec11a9f4 1032 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 1033
da4db748 1034 def deprecation_warning(self, message, *, stacklevel=0):
1035 deprecation_warning(
1036 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1037
1038 def deprecated_feature(self, message):
ee8dd27a 1039 if self.params.get('logger') is not None:
da4db748 1040 self.params['logger'].warning(f'Deprecated Feature: {message}')
1041 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1042
fa9f30b8 1043 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1044 '''
1045 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1046 in red if stderr is a tty file.
1047 '''
fa9f30b8 1048 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1049
b35496d8 1050 def write_debug(self, message, only_once=False):
0760b0a7 1051 '''Log debug message or Print message to stderr'''
1052 if not self.params.get('verbose', False):
1053 return
8a82af35 1054 message = f'[debug] {message}'
0760b0a7 1055 if self.params.get('logger'):
1056 self.params['logger'].debug(message)
1057 else:
b35496d8 1058 self.to_stderr(message, only_once)
0760b0a7 1059
8222d8de
JMF
1060 def report_file_already_downloaded(self, file_name):
1061 """Report file has already been fully downloaded."""
1062 try:
6febd1c1 1063 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1064 except UnicodeEncodeError:
6febd1c1 1065 self.to_screen('[download] The file has already been downloaded')
8222d8de 1066
0c3d0f51 1067 def report_file_delete(self, file_name):
1068 """Report that existing file will be deleted."""
1069 try:
c25228e5 1070 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1071 except UnicodeEncodeError:
c25228e5 1072 self.to_screen('Deleting existing file')
0c3d0f51 1073
319b6059 1074 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1075 has_drm = info.get('_has_drm')
319b6059 1076 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1077 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1078 if forced or not ignored:
1151c407 1079 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1080 expected=has_drm or ignored or expected)
88acdbc2 1081 else:
1082 self.report_warning(msg)
1083
de6000d9 1084 def parse_outtmpl(self):
bf1824b3 1085 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1086 self._parse_outtmpl()
1087 return self.params['outtmpl']
1088
1089 def _parse_outtmpl(self):
7b2c3f47 1090 sanitize = IDENTITY
bf1824b3 1091 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1092 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1093
1094 outtmpl = self.params.setdefault('outtmpl', {})
1095 if not isinstance(outtmpl, dict):
1096 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1097 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1098
21cd8fae 1099 def get_output_path(self, dir_type='', filename=None):
1100 paths = self.params.get('paths', {})
d2c8aadf 1101 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1102 path = os.path.join(
1103 expand_path(paths.get('home', '').strip()),
1104 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1105 filename or '')
21cd8fae 1106 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1107
76a264ac 1108 @staticmethod
901130bb 1109 def _outtmpl_expandpath(outtmpl):
1110 # expand_path translates '%%' into '%' and '$$' into '$'
1111 # correspondingly that is not what we want since we need to keep
1112 # '%%' intact for template dict substitution step. Working around
1113 # with boundary-alike separator hack.
6f2287cb 1114 sep = ''.join(random.choices(string.ascii_letters, k=32))
86e5f3ed 1115 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1116
1117 # outtmpl should be expand_path'ed before template dict substitution
1118 # because meta fields may contain env variables we don't want to
62b58c09 1119 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1120 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1121 return expand_path(outtmpl).replace(sep, '')
1122
1123 @staticmethod
1124 def escape_outtmpl(outtmpl):
1125 ''' Escape any remaining strings like %s, %abc% etc. '''
1126 return re.sub(
1127 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1128 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1129 outtmpl)
1130
1131 @classmethod
1132 def validate_outtmpl(cls, outtmpl):
76a264ac 1133 ''' @return None or Exception object '''
7d1eb38a 1134 outtmpl = re.sub(
47cdc68e 1135 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1136 lambda mobj: f'{mobj.group(0)[:-1]}s',
1137 cls._outtmpl_expandpath(outtmpl))
76a264ac 1138 try:
7d1eb38a 1139 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1140 return None
1141 except ValueError as err:
1142 return err
1143
03b4de72 1144 @staticmethod
1145 def _copy_infodict(info_dict):
1146 info_dict = dict(info_dict)
09b49e1f 1147 info_dict.pop('__postprocessors', None)
415f8d51 1148 info_dict.pop('__pending_error', None)
03b4de72 1149 return info_dict
1150
e0fd9573 1151 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1152 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1153 @param sanitize Whether to sanitize the output as a filename.
1154 For backward compatibility, a function can also be passed
1155 """
1156
6e84b215 1157 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1158
03b4de72 1159 info_dict = self._copy_infodict(info_dict)
752cda38 1160 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1161 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1162 if info_dict.get('duration', None) is not None
1163 else None)
1d485a1a 1164 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1165 info_dict['video_autonumber'] = self._num_videos
752cda38 1166 if info_dict.get('resolution') is None:
1167 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1168
e6f21b3d 1169 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1170 # of %(field)s to %(field)0Nd for backward compatibility
1171 field_size_compat_map = {
0a5a191a 1172 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1173 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1174 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1175 }
752cda38 1176
385a27fa 1177 TMPL_DICT = {}
47cdc68e 1178 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1179 MATH_FUNCTIONS = {
1180 '+': float.__add__,
1181 '-': float.__sub__,
993edd3f 1182 '*': float.__mul__,
385a27fa 1183 }
e625be0d 1184 # Field is of the form key1.key2...
07a1250e 1185 # where keys (except first) can be string, int, slice or "{field, ...}"
1186 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1187 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1188 'inner': FIELD_INNER_RE,
1189 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1190 }
1d485a1a 1191 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1192 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
78fde6e3 1193 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
e625be0d 1194 (?P<negate>-)?
1d485a1a 1195 (?P<fields>{FIELD_RE})
1196 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1197 (?:>(?P<strf_format>.+?))?
34baa9fd 1198 (?P<remaining>
1199 (?P<alternate>(?<!\\),[^|&)]+)?
1200 (?:&(?P<replacement>.*?))?
1201 (?:\|(?P<default>.*?))?
1d485a1a 1202 )$''')
752cda38 1203
07a1250e 1204 def _traverse_infodict(fields):
1205 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1206 for f in ([x] if x.startswith('{') else x.split('.'))]
1207 for i in (0, -1):
1208 if fields and not fields[i]:
1209 fields.pop(i)
1210
1211 for i, f in enumerate(fields):
1212 if not f.startswith('{'):
1213 continue
1214 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1215 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1216
1217 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1218
752cda38 1219 def get_value(mdict):
1220 # Object traversal
2b8a2973 1221 value = _traverse_infodict(mdict['fields'])
752cda38 1222 # Negative
1223 if mdict['negate']:
1224 value = float_or_none(value)
1225 if value is not None:
1226 value *= -1
1227 # Do maths
385a27fa 1228 offset_key = mdict['maths']
1229 if offset_key:
752cda38 1230 value = float_or_none(value)
1231 operator = None
385a27fa 1232 while offset_key:
1233 item = re.match(
1234 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1235 offset_key).group(0)
1236 offset_key = offset_key[len(item):]
1237 if operator is None:
752cda38 1238 operator = MATH_FUNCTIONS[item]
385a27fa 1239 continue
1240 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1241 offset = float_or_none(item)
1242 if offset is None:
2b8a2973 1243 offset = float_or_none(_traverse_infodict(item))
385a27fa 1244 try:
1245 value = operator(value, multiplier * offset)
1246 except (TypeError, ZeroDivisionError):
1247 return None
1248 operator = None
752cda38 1249 # Datetime formatting
1250 if mdict['strf_format']:
7c37ff97 1251 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1252
a6bcaf71 1253 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1254 if sanitize and value == '':
1255 value = None
752cda38 1256 return value
1257
b868936c 1258 na = self.params.get('outtmpl_na_placeholder', 'NA')
1259
e0fd9573 1260 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1261 return sanitize_filename(str(value), restricted=restricted, is_id=(
1262 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1263 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1264 else NO_DEFAULT))
e0fd9573 1265
1266 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1267 sanitize = bool(sanitize)
1268
6e84b215 1269 def _dumpjson_default(obj):
1270 if isinstance(obj, (set, LazyList)):
1271 return list(obj)
adbc4ec4 1272 return repr(obj)
6e84b215 1273
6f2287cb 1274 class _ReplacementFormatter(string.Formatter):
ec9311c4 1275 def get_field(self, field_name, args, kwargs):
1276 if field_name.isdigit():
1277 return args[0], -1
1278 raise ValueError('Unsupported field')
1279
1280 replacement_formatter = _ReplacementFormatter()
1281
752cda38 1282 def create_key(outer_mobj):
1283 if not outer_mobj.group('has_key'):
b836dc94 1284 return outer_mobj.group(0)
752cda38 1285 key = outer_mobj.group('key')
752cda38 1286 mobj = re.match(INTERNAL_FORMAT_RE, key)
47bcd437 1287 value, replacement, default, last_field = None, None, na, ''
7c37ff97 1288 while mobj:
e625be0d 1289 mobj = mobj.groupdict()
7c37ff97 1290 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1291 value = get_value(mobj)
47bcd437 1292 last_field, replacement = mobj['fields'], mobj['replacement']
7c37ff97 1293 if value is None and mobj['alternate']:
34baa9fd 1294 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1295 else:
1296 break
752cda38 1297
ebe1b4e3 1298 if None not in (value, replacement):
ec9311c4 1299 try:
1300 value = replacement_formatter.format(replacement, value)
1301 except ValueError:
ebe1b4e3 1302 value, default = None, na
752cda38 1303
a264433c 1304 fmt = outer_mobj.group('format')
1305 if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1306 fmt = f'0{field_size_compat_map[last_field]:d}d'
1307
4476d2c7 1308 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1309 str_fmt = f'{fmt[:-1]}s'
ebe1b4e3 1310 if value is None:
1311 value, fmt = default, 's'
1312 elif fmt[-1] == 'l': # list
4476d2c7 1313 delim = '\n' if '#' in flags else ', '
9e907ebd 1314 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1315 elif fmt[-1] == 'j': # json
deae7c17 1316 value, fmt = json.dumps(
1317 value, default=_dumpjson_default,
9b9dad11 1318 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1319 elif fmt[-1] == 'h': # html
deae7c17 1320 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1321 elif fmt[-1] == 'q': # quoted
4476d2c7 1322 value = map(str, variadic(value) if '#' in flags else [value])
1323 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1324 elif fmt[-1] == 'B': # bytes
0f06bcd7 1325 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1326 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1327 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1328 value, fmt = unicodedata.normalize(
1329 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1330 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1331 value), str_fmt
e0fd9573 1332 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1333 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1334 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1335 factor=1024 if '#' in flags else 1000)
37893bb0 1336 elif fmt[-1] == 'S': # filename sanitization
47bcd437 1337 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1338 elif fmt[-1] == 'c':
524e2e4f 1339 if value:
1340 value = str(value)[0]
76a264ac 1341 else:
524e2e4f 1342 fmt = str_fmt
1619ab3e 1343 elif fmt[-1] not in 'rsa': # numeric
a439a3a4 1344 value = float_or_none(value)
752cda38 1345 if value is None:
1346 value, fmt = default, 's'
901130bb 1347
752cda38 1348 if sanitize:
1619ab3e 1349 # If value is an object, sanitize might convert it to a string
1350 # So we convert it to repr first
752cda38 1351 if fmt[-1] == 'r':
7d1eb38a 1352 value, fmt = repr(value), str_fmt
1619ab3e 1353 elif fmt[-1] == 'a':
1354 value, fmt = ascii(value), str_fmt
1355 if fmt[-1] in 'csra':
47bcd437 1356 value = sanitizer(last_field, value)
901130bb 1357
b868936c 1358 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1359 TMPL_DICT[key] = value
b868936c 1360 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1361
385a27fa 1362 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1363
819e0531 1364 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1365 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1366 return self.escape_outtmpl(outtmpl) % info_dict
1367
5127e92a 1368 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1369 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1370 if outtmpl is None:
bf1824b3 1371 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1372 try:
5127e92a 1373 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1374 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1375 if not filename:
1376 return None
15da37c7 1377
5127e92a 1378 if tmpl_type in ('', 'temp'):
6a0546e3 1379 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1380 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1381 filename = replace_extension(filename, ext, final_ext)
5127e92a 1382 elif tmpl_type:
6a0546e3 1383 force_ext = OUTTMPL_TYPES[tmpl_type]
1384 if force_ext:
1385 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1386
bdc3fd2f
U
1387 # https://github.com/blackjack4494/youtube-dlc/issues/85
1388 trim_file_name = self.params.get('trim_file_name', False)
1389 if trim_file_name:
5c22c63d 1390 no_ext, *ext = filename.rsplit('.', 2)
1391 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1392
0202b52a 1393 return filename
8222d8de 1394 except ValueError as err:
6febd1c1 1395 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1396 return None
1397
5127e92a 1398 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1399 """Generate the output filename"""
1400 if outtmpl:
1401 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1402 dir_type = None
1403 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1404 if not filename and dir_type not in ('', 'temp'):
1405 return ''
de6000d9 1406
c84aeac6 1407 if warn:
21cd8fae 1408 if not self.params.get('paths'):
de6000d9 1409 pass
1410 elif filename == '-':
c84aeac6 1411 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1412 elif os.path.isabs(filename):
c84aeac6 1413 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1414 if filename == '-' or not filename:
1415 return filename
1416
21cd8fae 1417 return self.get_output_path(dir_type, filename)
0202b52a 1418
120fe513 1419 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1420 """Returns None if the file should be downloaded"""
93b39cdb 1421 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
d7b460d0 1422 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1423
3bec830a 1424 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1425
8b0d7497 1426 def check_filter():
d7b460d0 1427 if _type in ('playlist', 'multi_video'):
1428 return
1429 elif _type in ('url', 'url_transparent') and not try_call(
1430 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1431 return
1432
8b0d7497 1433 if 'title' in info_dict:
1434 # This can happen when we're just evaluating the playlist
1435 title = info_dict['title']
1436 matchtitle = self.params.get('matchtitle', False)
1437 if matchtitle:
1438 if not re.search(matchtitle, title, re.IGNORECASE):
1439 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1440 rejecttitle = self.params.get('rejecttitle', False)
1441 if rejecttitle:
1442 if re.search(rejecttitle, title, re.IGNORECASE):
1443 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1444
8b0d7497 1445 date = info_dict.get('upload_date')
1446 if date is not None:
1447 dateRange = self.params.get('daterange', DateRange())
1448 if date not in dateRange:
86e5f3ed 1449 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1450 view_count = info_dict.get('view_count')
1451 if view_count is not None:
1452 min_views = self.params.get('min_views')
1453 if min_views is not None and view_count < min_views:
1454 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1455 max_views = self.params.get('max_views')
1456 if max_views is not None and view_count > max_views:
1457 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1458 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1459 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1460
8f18aca8 1461 match_filter = self.params.get('match_filter')
fe2ce85a 1462 if match_filter is None:
1463 return None
1464
1465 cancelled = None
1466 try:
8f18aca8 1467 try:
1468 ret = match_filter(info_dict, incomplete=incomplete)
1469 except TypeError:
1470 # For backward compatibility
1471 ret = None if incomplete else match_filter(info_dict)
fe2ce85a 1472 except DownloadCancelled as err:
1473 if err.msg is not NO_DEFAULT:
1474 raise
1475 ret, cancelled = err.msg, err
1476
1477 if ret is NO_DEFAULT:
1478 while True:
1479 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1480 reply = input(self._format_screen(
1481 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1482 if reply in {'y', ''}:
1483 return None
1484 elif reply == 'n':
1485 if cancelled:
1486 raise type(cancelled)(f'Skipping {video_title}')
1487 return f'Skipping {video_title}'
1488 return ret
8b0d7497 1489
c77495e3 1490 if self.in_download_archive(info_dict):
2b029ca0
AK
1491 reason = ''.join((
1492 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1493 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1494 'has already been recorded in the archive'))
c77495e3 1495 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1496 else:
fe2ce85a 1497 try:
1498 reason = check_filter()
1499 except DownloadCancelled as e:
1500 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1501 else:
1502 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1503 if reason is not None:
120fe513 1504 if not silent:
1505 self.to_screen('[download] ' + reason)
c77495e3 1506 if self.params.get(break_opt, False):
1507 raise break_err()
8b0d7497 1508 return reason
fe7e0c98 1509
b6c45014
JMF
1510 @staticmethod
1511 def add_extra_info(info_dict, extra_info):
1512 '''Set the keys from extra_info in info dict if they are missing'''
1513 for key, value in extra_info.items():
1514 info_dict.setdefault(key, value)
1515
409e1828 1516 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1517 process=True, force_generic_extractor=False):
41d1cca3 1518 """
17ffed18 1519 Extract and return the information dictionary of the URL
41d1cca3 1520
1521 Arguments:
17ffed18 1522 @param url URL to extract
41d1cca3 1523
1524 Keyword arguments:
17ffed18 1525 @param download Whether to download videos
1526 @param process Whether to resolve all unresolved references (URLs, playlist items).
1527 Must be True for download to work
1528 @param ie_key Use only the extractor with this key
1529
1530 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1531 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1532 """
fe7e0c98 1533
409e1828 1534 if extra_info is None:
1535 extra_info = {}
1536
61aa5ba3 1537 if not ie_key and force_generic_extractor:
d22dec74
S
1538 ie_key = 'Generic'
1539
8222d8de 1540 if ie_key:
fe7866d0 1541 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1542 else:
1543 ies = self._ies
1544
fe7866d0 1545 for key, ie in ies.items():
8222d8de
JMF
1546 if not ie.suitable(url):
1547 continue
1548
1549 if not ie.working():
6febd1c1
PH
1550 self.report_warning('The program functionality for this site has been marked as broken, '
1551 'and will probably not work.')
8222d8de 1552
1151c407 1553 temp_id = ie.get_temp_id(url)
fe7866d0 1554 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
2b029ca0
AK
1555 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1556 'has already been recorded in the archive')
5e5be0c0 1557 if self.params.get('break_on_existing', False):
1558 raise ExistingVideoReached()
a0566bbf 1559 break
fe7866d0 1560 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1561 else:
fe7866d0 1562 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1563 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1564 tb=False if extractors_restricted else None)
a0566bbf 1565
7e88d7d7 1566 def _handle_extraction_exceptions(func):
b5ae35ee 1567 @functools.wraps(func)
a0566bbf 1568 def wrapper(self, *args, **kwargs):
6da22e7d 1569 while True:
1570 try:
1571 return func(self, *args, **kwargs)
1572 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1573 raise
6da22e7d 1574 except ReExtractInfo as e:
1575 if e.expected:
1576 self.to_screen(f'{e}; Re-extracting data')
1577 else:
1578 self.to_stderr('\r')
1579 self.report_warning(f'{e}; Re-extracting data')
1580 continue
1581 except GeoRestrictedError as e:
1582 msg = e.msg
1583 if e.countries:
1584 msg += '\nThis video is available in %s.' % ', '.join(
1585 map(ISO3166Utils.short2full, e.countries))
1586 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1587 self.report_error(msg)
1588 except ExtractorError as e: # An error we somewhat expected
1589 self.report_error(str(e), e.format_traceback())
1590 except Exception as e:
1591 if self.params.get('ignoreerrors'):
1592 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1593 else:
1594 raise
1595 break
a0566bbf 1596 return wrapper
1597
693f0600 1598 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1599 if (not self.params.get('wait_for_video')
1600 or ie_result.get('_type', 'video') != 'video'
1601 or ie_result.get('formats') or ie_result.get('url')):
1602 return
1603
1604 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1605 last_msg = ''
1606
1607 def progress(msg):
1608 nonlocal last_msg
a7dc6a89 1609 full_msg = f'{msg}\n'
1610 if not self.params.get('noprogress'):
1611 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1612 elif last_msg:
1613 return
1614 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1615 last_msg = msg
1616
1617 min_wait, max_wait = self.params.get('wait_for_video')
1618 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1619 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1620 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1621 self.report_warning('Release time of video is not known')
693f0600 1622 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1623 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1624 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1625 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1626
1627 wait_till = time.time() + diff
1628 try:
1629 while True:
1630 diff = wait_till - time.time()
1631 if diff <= 0:
1632 progress('')
1633 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1634 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1635 time.sleep(1)
1636 except KeyboardInterrupt:
1637 progress('')
1638 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1639 except BaseException as e:
1640 if not isinstance(e, ReExtractInfo):
1641 self.to_screen('')
1642 raise
1643
6c5211ce 1644 def _load_cookies(self, data, *, autoscope=True):
31215122
SS
1645 """Loads cookies from a `Cookie` header
1646
1647 This tries to work around the security vulnerability of passing cookies to every domain.
1648 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
31215122
SS
1649
1650 @param data The Cookie header as string to load the cookies from
6c5211ce 1651 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1652 If `True`, save cookies for later to be stored in the jar with a limited scope
1653 If a URL, save cookies in the jar with the domain of the URL
31215122
SS
1654 """
1655 for cookie in LenientSimpleCookie(data).values():
6c5211ce 1656 if autoscope and any(cookie.values()):
31215122
SS
1657 raise ValueError('Invalid syntax in Cookie Header')
1658
1659 domain = cookie.get('domain') or ''
1660 expiry = cookie.get('expires')
1661 if expiry == '': # 0 is valid
1662 expiry = None
1663 prepared_cookie = http.cookiejar.Cookie(
1664 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1665 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1666 cookie.get('secure') or False, expiry, False, None, None, {})
1667
1668 if domain:
1669 self.cookiejar.set_cookie(prepared_cookie)
6c5211ce 1670 elif autoscope is True:
31215122
SS
1671 self.deprecated_feature(
1672 'Passing cookies as a header is a potential security risk; '
1673 'they will be scoped to the domain of the downloaded urls. '
1674 'Please consider loading cookies from a file or browser instead.')
1675 self.__header_cookies.append(prepared_cookie)
6c5211ce 1676 elif autoscope:
1677 self.report_warning(
1678 'The extractor result contains an unscoped cookie as an HTTP header. '
1679 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1680 only_once=True)
1681 self._apply_header_cookies(autoscope, [prepared_cookie])
31215122
SS
1682 else:
1683 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1684 tb=False, is_error=False)
1685
6c5211ce 1686 def _apply_header_cookies(self, url, cookies=None):
31215122
SS
1687 """Applies stray header cookies to the provided url
1688
1689 This loads header cookies and scopes them to the domain provided in `url`.
1690 While this is not ideal, it helps reduce the risk of them being sent
1691 to an unintended destination while mostly maintaining compatibility.
1692 """
1693 parsed = urllib.parse.urlparse(url)
1694 if not parsed.hostname:
1695 return
1696
6c5211ce 1697 for cookie in map(copy.copy, cookies or self.__header_cookies):
31215122
SS
1698 cookie.domain = f'.{parsed.hostname}'
1699 self.cookiejar.set_cookie(cookie)
1700
7e88d7d7 1701 @_handle_extraction_exceptions
58f197b7 1702 def __extract_info(self, url, ie, download, extra_info, process):
31215122
SS
1703 self._apply_header_cookies(url)
1704
693f0600 1705 try:
1706 ie_result = ie.extract(url)
1707 except UserNotLive as e:
1708 if process:
1709 if self.params.get('wait_for_video'):
1710 self.report_warning(e)
1711 self._wait_for_video()
1712 raise
a0566bbf 1713 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1714 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1715 return
1716 if isinstance(ie_result, list):
1717 # Backwards compatibility: old IE result format
1718 ie_result = {
1719 '_type': 'compat_list',
1720 'entries': ie_result,
1721 }
e37d0efb 1722 if extra_info.get('original_url'):
1723 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1724 self.add_default_extra_info(ie_result, ie, url)
1725 if process:
f2ebc5c7 1726 self._wait_for_video(ie_result)
a0566bbf 1727 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1728 else:
a0566bbf 1729 return ie_result
fe7e0c98 1730
ea38e55f 1731 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1732 if url is not None:
1733 self.add_extra_info(ie_result, {
1734 'webpage_url': url,
1735 'original_url': url,
57ebfca3 1736 })
1737 webpage_url = ie_result.get('webpage_url')
1738 if webpage_url:
1739 self.add_extra_info(ie_result, {
1740 'webpage_url_basename': url_basename(webpage_url),
1741 'webpage_url_domain': get_domain(webpage_url),
6033d980 1742 })
1743 if ie is not None:
1744 self.add_extra_info(ie_result, {
1745 'extractor': ie.IE_NAME,
1746 'extractor_key': ie.ie_key(),
1747 })
ea38e55f 1748
58adec46 1749 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1750 """
1751 Take the result of the ie(may be modified) and resolve all unresolved
1752 references (URLs, playlist items).
1753
1754 It will also download the videos if 'download'.
1755 Returns the resolved ie_result.
1756 """
58adec46 1757 if extra_info is None:
1758 extra_info = {}
e8ee972c
PH
1759 result_type = ie_result.get('_type', 'video')
1760
057a5206 1761 if result_type in ('url', 'url_transparent'):
8f97a15d 1762 ie_result['url'] = sanitize_url(
1763 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1764 if ie_result.get('original_url') and not extra_info.get('original_url'):
1765 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1766
057a5206 1767 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1768 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1769 or extract_flat is True):
ecb54191 1770 info_copy = ie_result.copy()
6033d980 1771 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1772 if ie and not ie_result.get('id'):
4614bc22 1773 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1774 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1775 self.add_extra_info(info_copy, extra_info)
b5475f11 1776 info_copy, _ = self.pre_process(info_copy)
94dc8604 1777 self._fill_common_fields(info_copy, False)
17060584 1778 self.__forced_printings(info_copy)
415f8d51 1779 self._raise_pending_errors(info_copy)
4614bc22 1780 if self.params.get('force_write_download_archive', False):
1781 self.record_download_archive(info_copy)
e8ee972c
PH
1782 return ie_result
1783
8222d8de 1784 if result_type == 'video':
b6c45014 1785 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1786 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1787 self._raise_pending_errors(ie_result)
28b0eb0f 1788 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1789 if additional_urls:
e9f4ccd1 1790 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1791 if isinstance(additional_urls, str):
9c2b75b5 1792 additional_urls = [additional_urls]
1793 self.to_screen(
1794 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1795 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1796 ie_result['additional_entries'] = [
1797 self.extract_info(
b69fd25c 1798 url, download, extra_info=extra_info,
9c2b75b5 1799 force_generic_extractor=self.params.get('force_generic_extractor'))
1800 for url in additional_urls
1801 ]
1802 return ie_result
8222d8de
JMF
1803 elif result_type == 'url':
1804 # We have to add extra_info to the results because it may be
1805 # contained in a playlist
07cce701 1806 return self.extract_info(
1807 ie_result['url'], download,
1808 ie_key=ie_result.get('ie_key'),
1809 extra_info=extra_info)
7fc3fa05
PH
1810 elif result_type == 'url_transparent':
1811 # Use the information from the embedding page
1812 info = self.extract_info(
1813 ie_result['url'], ie_key=ie_result.get('ie_key'),
1814 extra_info=extra_info, download=False, process=False)
1815
1640eb09
S
1816 # extract_info may return None when ignoreerrors is enabled and
1817 # extraction failed with an error, don't crash and return early
1818 # in this case
1819 if not info:
1820 return info
1821
3975b4d2 1822 exempted_fields = {'_type', 'url', 'ie_key'}
1823 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1824 # For video clips, the id etc of the clip extractor should be used
1825 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1826
412c617d 1827 new_result = info.copy()
3975b4d2 1828 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1829
0563f7ac
S
1830 # Extracted info may not be a video result (i.e.
1831 # info.get('_type', 'video') != video) but rather an url or
1832 # url_transparent. In such cases outer metadata (from ie_result)
1833 # should be propagated to inner one (info). For this to happen
1834 # _type of info should be overridden with url_transparent. This
067aa17e 1835 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1836 if new_result.get('_type') == 'url':
1837 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1838
1839 return self.process_ie_result(
1840 new_result, download=download, extra_info=extra_info)
40fcba5e 1841 elif result_type in ('playlist', 'multi_video'):
30a074c2 1842 # Protect from infinite recursion due to recursively nested playlists
1843 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1844 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1845 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1846 self.to_screen(
30a074c2 1847 '[download] Skipping already downloaded playlist: %s'
1848 % ie_result.get('title') or ie_result.get('id'))
1849 return
7e85e872 1850
30a074c2 1851 self._playlist_level += 1
1852 self._playlist_urls.add(webpage_url)
03f83004 1853 self._fill_common_fields(ie_result, False)
bc516a3f 1854 self._sanitize_thumbnails(ie_result)
30a074c2 1855 try:
1856 return self.__process_playlist(ie_result, download)
1857 finally:
1858 self._playlist_level -= 1
1859 if not self._playlist_level:
1860 self._playlist_urls.clear()
8222d8de 1861 elif result_type == 'compat_list':
c9bf4114
PH
1862 self.report_warning(
1863 'Extractor %s returned a compat_list result. '
1864 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1865
8222d8de 1866 def _fixup(r):
b868936c 1867 self.add_extra_info(r, {
1868 'extractor': ie_result['extractor'],
1869 'webpage_url': ie_result['webpage_url'],
1870 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1871 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1872 'extractor_key': ie_result['extractor_key'],
1873 })
8222d8de
JMF
1874 return r
1875 ie_result['entries'] = [
b6c45014 1876 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1877 for r in ie_result['entries']
1878 ]
1879 return ie_result
1880 else:
1881 raise Exception('Invalid result type: %s' % result_type)
1882
e92caff5 1883 def _ensure_dir_exists(self, path):
1884 return make_dir(path, self.report_error)
1885
3b603dbd 1886 @staticmethod
3bec830a 1887 def _playlist_infodict(ie_result, strict=False, **kwargs):
1888 info = {
1889 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1890 'playlist': ie_result.get('title') or ie_result.get('id'),
1891 'playlist_id': ie_result.get('id'),
1892 'playlist_title': ie_result.get('title'),
1893 'playlist_uploader': ie_result.get('uploader'),
1894 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1895 **kwargs,
1896 }
3bec830a 1897 if strict:
1898 return info
0bd5a039 1899 if ie_result.get('webpage_url'):
1900 info.update({
1901 'webpage_url': ie_result['webpage_url'],
1902 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1903 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1904 })
3bec830a 1905 return {
1906 **info,
1907 'playlist_index': 0,
59d7de0d 1908 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
3bec830a 1909 'extractor': ie_result['extractor'],
3bec830a 1910 'extractor_key': ie_result['extractor_key'],
1911 }
3b603dbd 1912
30a074c2 1913 def __process_playlist(self, ie_result, download):
7e88d7d7 1914 """Process each entry in the playlist"""
f5ea4748 1915 assert ie_result['_type'] in ('playlist', 'multi_video')
1916
3bec830a 1917 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1918 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1919 if self._match_entry(common_info, incomplete=True) is not None:
1920 return
c6e07cf1 1921 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1922
7e88d7d7 1923 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1924 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1925
1926 lazy = self.params.get('lazy_playlist')
1927 if lazy:
1928 resolved_entries, n_entries = [], 'N/A'
1929 ie_result['requested_entries'], ie_result['entries'] = None, None
1930 else:
1931 entries = resolved_entries = list(entries)
1932 n_entries = len(resolved_entries)
1933 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1934 if not ie_result.get('playlist_count'):
1935 # Better to do this after potentially exhausting entries
1936 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1937
0647d925 1938 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1939 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1940
e08a85d8 1941 _infojson_written = False
0bfc53d0 1942 write_playlist_files = self.params.get('allow_playlist_files', True)
1943 if write_playlist_files and self.params.get('list_thumbnails'):
1944 self.list_thumbnails(ie_result)
1945 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1946 _infojson_written = self._write_info_json(
1947 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1948 if _infojson_written is None:
80c03fa9 1949 return
1950 if self._write_description('playlist', ie_result,
1951 self.prepare_filename(ie_copy, 'pl_description')) is None:
1952 return
681de68e 1953 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1954 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1955
7e9a6125 1956 if lazy:
1957 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1958 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1959 elif self.params.get('playlistreverse'):
1960 entries.reverse()
1961 elif self.params.get('playlistrandom'):
30a074c2 1962 random.shuffle(entries)
1963
bc5c2f8a 1964 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1965 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1966
134c913c 1967 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1968 if self.params.get('extract_flat') == 'discard_in_playlist':
1969 keep_resolved_entries = ie_result['_type'] != 'playlist'
1970 if keep_resolved_entries:
1971 self.write_debug('The information of all playlist entries will be held in memory')
1972
26e2805c 1973 failures = 0
1974 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1975 for i, (playlist_index, entry) in enumerate(entries):
1976 if lazy:
1977 resolved_entries.append((playlist_index, entry))
3bec830a 1978 if not entry:
7e88d7d7 1979 continue
1980
7e88d7d7 1981 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
ad54c913 1982 if not lazy and 'playlist-index' in self.params['compat_opts']:
7e9a6125 1983 playlist_index = ie_result['requested_entries'][i]
1984
0647d925 1985 entry_copy = collections.ChainMap(entry, {
3bec830a 1986 **common_info,
3955b207 1987 'n_entries': int_or_none(n_entries),
71729754 1988 'playlist_index': playlist_index,
7e9a6125 1989 'playlist_autonumber': i + 1,
0647d925 1990 })
3bec830a 1991
0647d925 1992 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1993 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1994 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1995 continue
1996
bc5c2f8a 1997 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1998 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1999
ec54bd43 2000 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 2001 'playlist_index': playlist_index,
2002 'playlist_autonumber': i + 1,
ec54bd43 2003 }, extra))
26e2805c 2004 if not entry_result:
2005 failures += 1
2006 if failures >= max_failures:
2007 self.report_error(
7e88d7d7 2008 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 2009 break
134c913c 2010 if keep_resolved_entries:
2011 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 2012
2013 # Update with processed data
f0ad6f8c 2014 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 2015 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2016 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2017 # Do not set for full playlist
2018 ie_result.pop('requested_entries')
e08a85d8 2019
2020 # Write the updated info to json
cb96c5be 2021 if _infojson_written is True and self._write_info_json(
e08a85d8 2022 'updated playlist', ie_result,
2023 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2024 return
ca30f449 2025
ed5835b4 2026 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 2027 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 2028 return ie_result
2029
7e88d7d7 2030 @_handle_extraction_exceptions
a0566bbf 2031 def __process_iterable_entry(self, entry, download, extra_info):
2032 return self.process_ie_result(
2033 entry, download=download, extra_info=extra_info)
2034
67134eab
JMF
2035 def _build_format_filter(self, filter_spec):
2036 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
2037
2038 OPERATORS = {
2039 '<': operator.lt,
2040 '<=': operator.le,
2041 '>': operator.gt,
2042 '>=': operator.ge,
2043 '=': operator.eq,
2044 '!=': operator.ne,
2045 }
67134eab 2046 operator_rex = re.compile(r'''(?x)\s*
c3f624ef 2047 (?P<key>[\w.-]+)\s*
187986a8 2048 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2049 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 2050 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 2051 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
2052 if m:
2053 try:
2054 comparison_value = int(m.group('value'))
2055 except ValueError:
2056 comparison_value = parse_filesize(m.group('value'))
2057 if comparison_value is None:
2058 comparison_value = parse_filesize(m.group('value') + 'B')
2059 if comparison_value is None:
2060 raise ValueError(
2061 'Invalid value %r in format specification %r' % (
67134eab 2062 m.group('value'), filter_spec))
9ddb6925
S
2063 op = OPERATORS[m.group('op')]
2064
083c9df9 2065 if not m:
9ddb6925
S
2066 STR_OPERATORS = {
2067 '=': operator.eq,
10d33b34
YCH
2068 '^=': lambda attr, value: attr.startswith(value),
2069 '$=': lambda attr, value: attr.endswith(value),
2070 '*=': lambda attr, value: value in attr,
1ce9a3cb 2071 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 2072 }
187986a8 2073 str_operator_rex = re.compile(r'''(?x)\s*
2074 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
2075 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2076 (?P<quote>["'])?
2077 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2078 (?(quote)(?P=quote))\s*
9ddb6925 2079 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 2080 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 2081 if m:
1ce9a3cb
LF
2082 if m.group('op') == '~=':
2083 comparison_value = re.compile(m.group('value'))
2084 else:
2085 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
2086 str_op = STR_OPERATORS[m.group('op')]
2087 if m.group('negation'):
e118a879 2088 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
2089 else:
2090 op = str_op
083c9df9 2091
9ddb6925 2092 if not m:
187986a8 2093 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
2094
2095 def _filter(f):
2096 actual_value = f.get(m.group('key'))
2097 if actual_value is None:
2098 return m.group('none_inclusive')
2099 return op(actual_value, comparison_value)
67134eab
JMF
2100 return _filter
2101
9f1a1c36 2102 def _check_formats(self, formats):
2103 for f in formats:
2104 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 2105 path = self.get_output_path('temp')
2106 if not self._ensure_dir_exists(f'{path}/'):
2107 continue
2108 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 2109 temp_file.close()
2110 try:
2111 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 2112 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 2113 success = False
2114 finally:
2115 if os.path.exists(temp_file.name):
2116 try:
2117 os.remove(temp_file.name)
2118 except OSError:
2119 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2120 if success:
2121 yield f
2122 else:
2123 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2124
0017d9ad 2125 def _default_format_spec(self, info_dict, download=True):
0017d9ad 2126
af0f7428
S
2127 def can_merge():
2128 merger = FFmpegMergerPP(self)
2129 return merger.available and merger.can_merge()
2130
91ebc640 2131 prefer_best = (
b7b04c78 2132 not self.params.get('simulate')
91ebc640 2133 and download
2134 and (
2135 not can_merge()
21633673 2136 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2137 or self.params['outtmpl']['default'] == '-'))
53ed7066 2138 compat = (
2139 prefer_best
2140 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2141 or 'format-spec' in self.params['compat_opts'])
91ebc640 2142
2143 return (
53ed7066 2144 'best/bestvideo+bestaudio' if prefer_best
2145 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2146 else 'bestvideo+bestaudio/best')
0017d9ad 2147
67134eab
JMF
2148 def build_format_selector(self, format_spec):
2149 def syntax_error(note, start):
2150 message = (
2151 'Invalid format specification: '
86e5f3ed 2152 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2153 return SyntaxError(message)
2154
2155 PICKFIRST = 'PICKFIRST'
2156 MERGE = 'MERGE'
2157 SINGLE = 'SINGLE'
0130afb7 2158 GROUP = 'GROUP'
67134eab
JMF
2159 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2160
91ebc640 2161 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2162 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2163
67134eab
JMF
2164 def _parse_filter(tokens):
2165 filter_parts = []
6f2287cb 2166 for type, string_, start, _, _ in tokens:
2167 if type == tokenize.OP and string_ == ']':
67134eab
JMF
2168 return ''.join(filter_parts)
2169 else:
6f2287cb 2170 filter_parts.append(string_)
67134eab 2171
232541df 2172 def _remove_unused_ops(tokens):
62b58c09
L
2173 # Remove operators that we don't use and join them with the surrounding strings.
2174 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2175 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2176 last_string, last_start, last_end, last_line = None, None, None, None
6f2287cb 2177 for type, string_, start, end, line in tokens:
2178 if type == tokenize.OP and string_ == '[':
232541df
JMF
2179 if last_string:
2180 yield tokenize.NAME, last_string, last_start, last_end, last_line
2181 last_string = None
6f2287cb 2182 yield type, string_, start, end, line
232541df 2183 # everything inside brackets will be handled by _parse_filter
6f2287cb 2184 for type, string_, start, end, line in tokens:
2185 yield type, string_, start, end, line
2186 if type == tokenize.OP and string_ == ']':
232541df 2187 break
6f2287cb 2188 elif type == tokenize.OP and string_ in ALLOWED_OPS:
232541df
JMF
2189 if last_string:
2190 yield tokenize.NAME, last_string, last_start, last_end, last_line
2191 last_string = None
6f2287cb 2192 yield type, string_, start, end, line
232541df
JMF
2193 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2194 if not last_string:
6f2287cb 2195 last_string = string_
232541df
JMF
2196 last_start = start
2197 last_end = end
2198 else:
6f2287cb 2199 last_string += string_
232541df
JMF
2200 if last_string:
2201 yield tokenize.NAME, last_string, last_start, last_end, last_line
2202
cf2ac6df 2203 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2204 selectors = []
2205 current_selector = None
6f2287cb 2206 for type, string_, start, _, _ in tokens:
67134eab
JMF
2207 # ENCODING is only defined in python 3.x
2208 if type == getattr(tokenize, 'ENCODING', None):
2209 continue
2210 elif type in [tokenize.NAME, tokenize.NUMBER]:
6f2287cb 2211 current_selector = FormatSelector(SINGLE, string_, [])
67134eab 2212 elif type == tokenize.OP:
6f2287cb 2213 if string_ == ')':
cf2ac6df
JMF
2214 if not inside_group:
2215 # ')' will be handled by the parentheses group
2216 tokens.restore_last_token()
67134eab 2217 break
6f2287cb 2218 elif inside_merge and string_ in ['/', ',']:
0130afb7
JMF
2219 tokens.restore_last_token()
2220 break
6f2287cb 2221 elif inside_choice and string_ == ',':
cf2ac6df
JMF
2222 tokens.restore_last_token()
2223 break
6f2287cb 2224 elif string_ == ',':
0a31a350
JMF
2225 if not current_selector:
2226 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2227 selectors.append(current_selector)
2228 current_selector = None
6f2287cb 2229 elif string_ == '/':
d96d604e
JMF
2230 if not current_selector:
2231 raise syntax_error('"/" must follow a format selector', start)
67134eab 2232 first_choice = current_selector
cf2ac6df 2233 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2234 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
6f2287cb 2235 elif string_ == '[':
67134eab
JMF
2236 if not current_selector:
2237 current_selector = FormatSelector(SINGLE, 'best', [])
2238 format_filter = _parse_filter(tokens)
2239 current_selector.filters.append(format_filter)
6f2287cb 2240 elif string_ == '(':
0130afb7
JMF
2241 if current_selector:
2242 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2243 group = _parse_format_selection(tokens, inside_group=True)
2244 current_selector = FormatSelector(GROUP, group, [])
6f2287cb 2245 elif string_ == '+':
d03cfdce 2246 if not current_selector:
2247 raise syntax_error('Unexpected "+"', start)
2248 selector_1 = current_selector
2249 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2250 if not selector_2:
2251 raise syntax_error('Expected a selector', start)
2252 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2253 else:
6f2287cb 2254 raise syntax_error(f'Operator not recognized: "{string_}"', start)
67134eab
JMF
2255 elif type == tokenize.ENDMARKER:
2256 break
2257 if current_selector:
2258 selectors.append(current_selector)
2259 return selectors
2260
f8d4ad9a 2261 def _merge(formats_pair):
2262 format_1, format_2 = formats_pair
2263
2264 formats_info = []
2265 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2266 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2267
2268 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2269 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2270 for (i, fmt_info) in enumerate(formats_info):
551f9388 2271 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2272 formats_info.pop(i)
2273 continue
2274 for aud_vid in ['audio', 'video']:
f8d4ad9a 2275 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2276 if get_no_more[aud_vid]:
2277 formats_info.pop(i)
f5510afe 2278 break
f8d4ad9a 2279 get_no_more[aud_vid] = True
2280
2281 if len(formats_info) == 1:
2282 return formats_info[0]
2283
2284 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2285 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2286
2287 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2288 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2289
fc61aff4
LL
2290 output_ext = get_compatible_ext(
2291 vcodecs=[f.get('vcodec') for f in video_fmts],
2292 acodecs=[f.get('acodec') for f in audio_fmts],
2293 vexts=[f['ext'] for f in video_fmts],
2294 aexts=[f['ext'] for f in audio_fmts],
2295 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2296 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2297
975a0d0d 2298 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2299
f8d4ad9a 2300 new_dict = {
2301 'requested_formats': formats_info,
975a0d0d 2302 'format': '+'.join(filtered('format')),
2303 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2304 'ext': output_ext,
975a0d0d 2305 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2306 'language': '+'.join(orderedSet(filtered('language'))) or None,
2307 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2308 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2309 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2310 }
2311
2312 if the_only_video:
2313 new_dict.update({
2314 'width': the_only_video.get('width'),
2315 'height': the_only_video.get('height'),
2316 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2317 'fps': the_only_video.get('fps'),
49a57e70 2318 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2319 'vcodec': the_only_video.get('vcodec'),
2320 'vbr': the_only_video.get('vbr'),
2321 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2322 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2323 })
2324
2325 if the_only_audio:
2326 new_dict.update({
2327 'acodec': the_only_audio.get('acodec'),
2328 'abr': the_only_audio.get('abr'),
975a0d0d 2329 'asr': the_only_audio.get('asr'),
b8ed0f15 2330 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2331 })
2332
2333 return new_dict
2334
e8e73840 2335 def _check_formats(formats):
8cb7fc44 2336 if self.params.get('check_formats') == 'selected':
2337 yield from self._check_formats(formats)
2338 return
2339 elif (self.params.get('check_formats') is not None
bc344cd4 2340 or self.params.get('allow_unplayable_formats')):
981052c9 2341 yield from formats
b5ac45b1 2342 return
bc344cd4 2343
2344 for f in formats:
ef79d20d 2345 if f.get('has_drm') or f.get('__needs_testing'):
bc344cd4 2346 yield from self._check_formats([f])
2347 else:
2348 yield f
e8e73840 2349
67134eab 2350 def _build_selector_function(selector):
909d24dd 2351 if isinstance(selector, list): # ,
67134eab
JMF
2352 fs = [_build_selector_function(s) for s in selector]
2353
317f7ab6 2354 def selector_function(ctx):
67134eab 2355 for f in fs:
981052c9 2356 yield from f(ctx)
67134eab 2357 return selector_function
909d24dd 2358
2359 elif selector.type == GROUP: # ()
0130afb7 2360 selector_function = _build_selector_function(selector.selector)
909d24dd 2361
2362 elif selector.type == PICKFIRST: # /
67134eab
JMF
2363 fs = [_build_selector_function(s) for s in selector.selector]
2364
317f7ab6 2365 def selector_function(ctx):
67134eab 2366 for f in fs:
317f7ab6 2367 picked_formats = list(f(ctx))
67134eab
JMF
2368 if picked_formats:
2369 return picked_formats
2370 return []
67134eab 2371
981052c9 2372 elif selector.type == MERGE: # +
2373 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2374
2375 def selector_function(ctx):
adbc4ec4 2376 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2377 yield _merge(pair)
2378
909d24dd 2379 elif selector.type == SINGLE: # atom
598d185d 2380 format_spec = selector.selector or 'best'
909d24dd 2381
f8d4ad9a 2382 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2383 if format_spec == 'all':
2384 def selector_function(ctx):
9222c381 2385 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2386 elif format_spec == 'mergeall':
2387 def selector_function(ctx):
316f2650 2388 formats = list(_check_formats(
2389 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2390 if not formats:
2391 return
921b76ca 2392 merged_format = formats[-1]
2393 for f in formats[-2::-1]:
f8d4ad9a 2394 merged_format = _merge((merged_format, f))
2395 yield merged_format
909d24dd 2396
2397 else:
85e801a9 2398 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2399 mobj = re.match(
2400 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2401 format_spec)
2402 if mobj is not None:
2403 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2404 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2405 format_type = (mobj.group('type') or [None])[0]
2406 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2407 format_modified = mobj.group('mod') is not None
909d24dd 2408
2409 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2410 _filter_f = (
eff63539 2411 (lambda f: f.get('%scodec' % format_type) != 'none')
2412 if format_type and format_modified # bv*, ba*, wv*, wa*
2413 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2414 if format_type # bv, ba, wv, wa
2415 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2416 if not format_modified # b, w
8326b00a 2417 else lambda f: True) # b*, w*
2418 filter_f = lambda f: _filter_f(f) and (
2419 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2420 else:
48ee10ee 2421 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2422 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2423 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2424 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2425 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2426 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2427 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2428 else:
b5ae35ee 2429 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2430
2431 def selector_function(ctx):
2432 formats = list(ctx['formats'])
909d24dd 2433 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2434 if not matches:
2435 if format_fallback and ctx['incomplete_formats']:
2436 # for extractors with incomplete formats (audio only (soundcloud)
2437 # or video only (imgur)) best/worst will fallback to
2438 # best/worst {video,audio}-only format
2439 matches = formats
2440 elif seperate_fallback and not ctx['has_merged_format']:
2441 # for compatibility with youtube-dl when there is no pre-merged format
2442 matches = list(filter(seperate_fallback, formats))
981052c9 2443 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2444 try:
e8e73840 2445 yield matches[format_idx - 1]
4abea8ca 2446 except LazyList.IndexError:
981052c9 2447 return
083c9df9 2448
67134eab 2449 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2450
317f7ab6 2451 def final_selector(ctx):
adbc4ec4 2452 ctx_copy = dict(ctx)
67134eab 2453 for _filter in filters:
317f7ab6
S
2454 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2455 return selector_function(ctx_copy)
67134eab 2456 return final_selector
083c9df9 2457
0f06bcd7 2458 stream = io.BytesIO(format_spec.encode())
0130afb7 2459 try:
f9934b96 2460 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2461 except tokenize.TokenError:
2462 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2463
86e5f3ed 2464 class TokenIterator:
0130afb7
JMF
2465 def __init__(self, tokens):
2466 self.tokens = tokens
2467 self.counter = 0
2468
2469 def __iter__(self):
2470 return self
2471
2472 def __next__(self):
2473 if self.counter >= len(self.tokens):
2474 raise StopIteration()
2475 value = self.tokens[self.counter]
2476 self.counter += 1
2477 return value
2478
2479 next = __next__
2480
2481 def restore_last_token(self):
2482 self.counter -= 1
2483
2484 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2485 return _build_selector_function(parsed_selector)
a9c58ad9 2486
6c5211ce 2487 def _calc_headers(self, info_dict, load_cookies=False):
227bf1a3 2488 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
c365dba8 2489 clean_headers(res)
6c5211ce 2490
2491 if load_cookies: # For --load-info-json
2492 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2493 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2494 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2495 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2496 res.pop('Cookie', None)
31215122 2497 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
e5660ee6 2498 if cookies:
31215122
SS
2499 encoder = LenientSimpleCookie()
2500 values = []
2501 for cookie in cookies:
2502 _, value = encoder.value_encode(cookie.value)
2503 values.append(f'{cookie.name}={value}')
2504 if cookie.domain:
2505 values.append(f'Domain={cookie.domain}')
2506 if cookie.path:
2507 values.append(f'Path={cookie.path}')
2508 if cookie.secure:
2509 values.append('Secure')
2510 if cookie.expires:
2511 values.append(f'Expires={cookie.expires}')
2512 if cookie.version:
2513 values.append(f'Version={cookie.version}')
2514 info_dict['cookies'] = '; '.join(values)
e5660ee6 2515
0016b84e
S
2516 if 'X-Forwarded-For' not in res:
2517 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2518 if x_forwarded_for_ip:
2519 res['X-Forwarded-For'] = x_forwarded_for_ip
2520
e5660ee6
JMF
2521 return res
2522
c487cf00 2523 def _calc_cookies(self, url):
b87e01c1 2524 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2525 return self.cookiejar.get_cookie_header(url)
e5660ee6 2526
9f1a1c36 2527 def _sort_thumbnails(self, thumbnails):
2528 thumbnails.sort(key=lambda t: (
2529 t.get('preference') if t.get('preference') is not None else -1,
2530 t.get('width') if t.get('width') is not None else -1,
2531 t.get('height') if t.get('height') is not None else -1,
2532 t.get('id') if t.get('id') is not None else '',
2533 t.get('url')))
2534
b0249bca 2535 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2536 thumbnails = info_dict.get('thumbnails')
2537 if thumbnails is None:
2538 thumbnail = info_dict.get('thumbnail')
2539 if thumbnail:
2540 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2541 if not thumbnails:
2542 return
2543
2544 def check_thumbnails(thumbnails):
2545 for t in thumbnails:
2546 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2547 try:
2548 self.urlopen(HEADRequest(t['url']))
2549 except network_exceptions as err:
2550 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2551 continue
2552 yield t
2553
2554 self._sort_thumbnails(thumbnails)
2555 for i, t in enumerate(thumbnails):
2556 if t.get('id') is None:
2557 t['id'] = '%d' % i
2558 if t.get('width') and t.get('height'):
2559 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2560 t['url'] = sanitize_url(t['url'])
2561
2562 if self.params.get('check_formats') is True:
282f5709 2563 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2564 else:
2565 info_dict['thumbnails'] = thumbnails
bc516a3f 2566
94dc8604 2567 def _fill_common_fields(self, info_dict, final=True):
03f83004 2568 # TODO: move sanitization here
94dc8604 2569 if final:
7aefd19a 2570 title = info_dict['fulltitle'] = info_dict.get('title')
d4736fdb 2571 if not title:
2572 if title == '':
2573 self.write_debug('Extractor gave empty title. Creating a generic title')
2574 else:
2575 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2576 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2577
2578 if info_dict.get('duration') is not None:
2579 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2580
2581 for ts_key, date_key in (
2582 ('timestamp', 'upload_date'),
2583 ('release_timestamp', 'release_date'),
2584 ('modified_timestamp', 'modified_date'),
2585 ):
2586 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2587 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2588 # see http://bugs.python.org/issue1646728)
19a03940 2589 with contextlib.suppress(ValueError, OverflowError, OSError):
836e06d2 2590 upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
03f83004 2591 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004 2592
1732eccc 2593 if not info_dict.get('release_year'):
2594 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2595
03f83004
LNO
2596 live_keys = ('is_live', 'was_live')
2597 live_status = info_dict.get('live_status')
2598 if live_status is None:
2599 for key in live_keys:
2600 if info_dict.get(key) is False:
2601 continue
2602 if info_dict.get(key):
2603 live_status = key
2604 break
2605 if all(info_dict.get(key) is False for key in live_keys):
2606 live_status = 'not_live'
2607 if live_status:
2608 info_dict['live_status'] = live_status
2609 for key in live_keys:
2610 if info_dict.get(key) is None:
2611 info_dict[key] = (live_status == key)
a057779d 2612 if live_status == 'post_live':
2613 info_dict['was_live'] = True
03f83004
LNO
2614
2615 # Auto generate title fields corresponding to the *_number fields when missing
2616 # in order to always have clean titles. This is very common for TV series.
2617 for field in ('chapter', 'season', 'episode'):
94dc8604 2618 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2619 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2620
415f8d51 2621 def _raise_pending_errors(self, info):
2622 err = info.pop('__pending_error', None)
2623 if err:
2624 self.report_error(err, tb=False)
2625
784320c9 2626 def sort_formats(self, info_dict):
2627 formats = self._get_formats(info_dict)
784320c9 2628 formats.sort(key=FormatSorter(
c154302c 2629 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
784320c9 2630
dd82ffea
JMF
2631 def process_video_result(self, info_dict, download=True):
2632 assert info_dict.get('_type', 'video') == 'video'
9c906919 2633 self._num_videos += 1
dd82ffea 2634
bec1fad2 2635 if 'id' not in info_dict:
fc08bdd6 2636 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2637 elif not info_dict.get('id'):
2638 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2639
c9969434
S
2640 def report_force_conversion(field, field_not, conversion):
2641 self.report_warning(
2642 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2643 % (field, field_not, conversion))
2644
2645 def sanitize_string_field(info, string_field):
2646 field = info.get(string_field)
14f25df2 2647 if field is None or isinstance(field, str):
c9969434
S
2648 return
2649 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2650 info[string_field] = str(field)
c9969434
S
2651
2652 def sanitize_numeric_fields(info):
2653 for numeric_field in self._NUMERIC_FIELDS:
2654 field = info.get(numeric_field)
f9934b96 2655 if field is None or isinstance(field, (int, float)):
c9969434
S
2656 continue
2657 report_force_conversion(numeric_field, 'numeric', 'int')
2658 info[numeric_field] = int_or_none(field)
2659
2660 sanitize_string_field(info_dict, 'id')
2661 sanitize_numeric_fields(info_dict)
3975b4d2 2662 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2663 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2664 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2665 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2666
9eef7c4e 2667 chapters = info_dict.get('chapters') or []
a3976e07 2668 if chapters and chapters[0].get('start_time'):
2669 chapters.insert(0, {'start_time': 0})
2670
9eef7c4e 2671 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2672 for idx, (prev, current, next_) in enumerate(zip(
2673 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2674 if current.get('start_time') is None:
2675 current['start_time'] = prev.get('end_time')
2676 if not current.get('end_time'):
2677 current['end_time'] = next_.get('start_time')
a3976e07 2678 if not current.get('title'):
2679 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2680
dd82ffea
JMF
2681 if 'playlist' not in info_dict:
2682 # It isn't part of a playlist
2683 info_dict['playlist'] = None
2684 info_dict['playlist_index'] = None
2685
bc516a3f 2686 self._sanitize_thumbnails(info_dict)
d5519808 2687
536a55da 2688 thumbnail = info_dict.get('thumbnail')
bc516a3f 2689 thumbnails = info_dict.get('thumbnails')
536a55da
S
2690 if thumbnail:
2691 info_dict['thumbnail'] = sanitize_url(thumbnail)
2692 elif thumbnails:
d5519808
PH
2693 info_dict['thumbnail'] = thumbnails[-1]['url']
2694
ae30b840 2695 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2696 info_dict['display_id'] = info_dict['id']
2697
03f83004 2698 self._fill_common_fields(info_dict)
33d2fc2f 2699
05108a49
S
2700 for cc_kind in ('subtitles', 'automatic_captions'):
2701 cc = info_dict.get(cc_kind)
2702 if cc:
2703 for _, subtitle in cc.items():
2704 for subtitle_format in subtitle:
2705 if subtitle_format.get('url'):
2706 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2707 if subtitle_format.get('ext') is None:
2708 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2709
2710 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2711 subtitles = info_dict.get('subtitles')
4bba3716 2712
360e1ca5 2713 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2714 info_dict['id'], subtitles, automatic_captions)
a504ced0 2715
aebb4f4b 2716 formats = self._get_formats(info_dict)
dd82ffea 2717
c154302c 2718 # Backward compatibility with InfoExtractor._sort_formats
9ebac355 2719 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
c154302c 2720 if field_preference:
2721 info_dict['_format_sort_fields'] = field_preference
2722
bc344cd4 2723 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2724 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
88acdbc2 2725 if not self.params.get('allow_unplayable_formats'):
bc344cd4 2726 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
17ffed18 2727
2728 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2729 self.report_warning(
2730 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2731 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2732
319b6059 2733 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2734 if not get_from_start:
2735 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2736 if info_dict.get('is_live') and formats:
adbc4ec4 2737 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2738 if get_from_start and not formats:
a44ca5a4 2739 self.raise_no_formats(info_dict, msg=(
2740 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2741 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2742
73af5cc8
S
2743 def is_wellformed(f):
2744 url = f.get('url')
a5ac0c47 2745 if not url:
73af5cc8
S
2746 self.report_warning(
2747 '"url" field is missing or empty - skipping format, '
2748 'there is an error in extractor')
a5ac0c47
S
2749 return False
2750 if isinstance(url, bytes):
2751 sanitize_string_field(f, 'url')
2752 return True
73af5cc8
S
2753
2754 # Filter out malformed formats for better extraction robustness
1ac7f461 2755 formats = list(filter(is_wellformed, formats or []))
2756
2757 if not formats:
2758 self.raise_no_formats(info_dict)
73af5cc8 2759
39f32f17 2760 for format in formats:
c9969434
S
2761 sanitize_string_field(format, 'format_id')
2762 sanitize_numeric_fields(format)
dcf77cf1 2763 format['url'] = sanitize_url(format['url'])
39f32f17 2764 if format.get('ext') is None:
2765 format['ext'] = determine_ext(format['url']).lower()
2766 if format.get('protocol') is None:
2767 format['protocol'] = determine_protocol(format)
2768 if format.get('resolution') is None:
2769 format['resolution'] = self.format_resolution(format, default=None)
2770 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2771 format['dynamic_range'] = 'SDR'
2772 if format.get('aspect_ratio') is None:
2773 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
10025b71 2774 # For fragmented formats, "tbr" is often max bitrate and not average
2775 if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
0dff8e4d 2776 and info_dict.get('duration') and format.get('tbr')
39f32f17 2777 and not format.get('filesize') and not format.get('filesize_approx')):
2778 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
6c5211ce 2779 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2780
2781 # Safeguard against old/insecure infojson when using --load-info-json
2782 if info_dict.get('http_headers'):
2783 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2784 info_dict['http_headers'].pop('Cookie', None)
39f32f17 2785
2786 # This is copied to http_headers by the above _calc_headers and can now be removed
2787 if '__x_forwarded_for_ip' in info_dict:
2788 del info_dict['__x_forwarded_for_ip']
2789
c154302c 2790 self.sort_formats({
2791 'formats': formats,
2792 '_format_sort_fields': info_dict.get('_format_sort_fields')
2793 })
39f32f17 2794
2795 # Sanitize and group by format_id
2796 formats_dict = {}
2797 for i, format in enumerate(formats):
e74e3b63 2798 if not format.get('format_id'):
14f25df2 2799 format['format_id'] = str(i)
e2effb08
S
2800 else:
2801 # Sanitize format_id from characters used in format selector expression
ec85ded8 2802 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
39f32f17 2803 formats_dict.setdefault(format['format_id'], []).append(format)
181c7053
S
2804
2805 # Make sure all formats have unique format_id
03b4de72 2806 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2807 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2808 ambigious_id = len(ambiguous_formats) > 1
2809 for i, format in enumerate(ambiguous_formats):
2810 if ambigious_id:
181c7053 2811 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2812 # Ensure there is no conflict between id and ext in format selection
2813 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2814 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2815 format['format_id'] = 'f%s' % format['format_id']
181c7053 2816
39f32f17 2817 if format.get('format') is None:
2818 format['format'] = '{id} - {res}{note}'.format(
2819 id=format['format_id'],
2820 res=self.format_resolution(format),
2821 note=format_field(format, 'format_note', ' (%s)'),
2822 )
dd82ffea 2823
9f1a1c36 2824 if self.params.get('check_formats') is True:
282f5709 2825 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2826
88acdbc2 2827 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2828 # only set the 'formats' fields if the original info_dict list them
2829 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2830 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2831 # which can't be exported to json
b3d9ef88 2832 info_dict['formats'] = formats
4ec82a72 2833
2834 info_dict, _ = self.pre_process(info_dict)
2835
6db9c4d5 2836 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2837 return info_dict
2838
2839 self.post_extract(info_dict)
2840 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2841
093a1710 2842 # The pre-processors may have modified the formats
aebb4f4b 2843 formats = self._get_formats(info_dict)
093a1710 2844
e4221b70 2845 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2846 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2847 if self.params.get('list_thumbnails'):
2848 self.list_thumbnails(info_dict)
b7b04c78 2849 if self.params.get('listsubtitles'):
2850 if 'automatic_captions' in info_dict:
2851 self.list_subtitles(
2852 info_dict['id'], automatic_captions, 'automatic captions')
2853 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2854 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2855 self.list_formats(info_dict)
169dbde9 2856 if list_only:
b7b04c78 2857 # Without this printing, -F --print-json will not work
17060584 2858 self.__forced_printings(info_dict)
c487cf00 2859 return info_dict
bfaae0a7 2860
187986a8 2861 format_selector = self.format_selector
fa9f30b8 2862 while True:
2863 if interactive_format_selection:
372a0f3b
IS
2864 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2865 + '(Press ENTER for default, or Ctrl+C to quit)'
2866 + self._format_screen(': ', self.Styles.EMPHASIS))
fa9f30b8 2867 try:
372a0f3b 2868 format_selector = self.build_format_selector(req_format) if req_format else None
fa9f30b8 2869 except SyntaxError as err:
2870 self.report_error(err, tb=False, is_error=False)
2871 continue
2872
372a0f3b
IS
2873 if format_selector is None:
2874 req_format = self._default_format_spec(info_dict, download=download)
2875 self.write_debug(f'Default format spec: {req_format}')
2876 format_selector = self.build_format_selector(req_format)
2877
85e801a9 2878 formats_to_download = list(format_selector({
fa9f30b8 2879 'formats': formats,
85e801a9 2880 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
906c0bdc 2881 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2882 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
85e801a9 2883 }))
fa9f30b8 2884 if interactive_format_selection and not formats_to_download:
2885 self.report_error('Requested format is not available', tb=False, is_error=False)
2886 continue
2887 break
317f7ab6 2888
dd82ffea 2889 if not formats_to_download:
b7da73eb 2890 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2891 raise ExtractorError(
2892 'Requested format is not available. Use --list-formats for a list of available formats',
2893 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2894 self.report_warning('Requested format is not available')
2895 # Process what we can, even without any available formats.
2896 formats_to_download = [{}]
a13e6848 2897
0500ee3d 2898 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2899 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2900 if download:
0500ee3d 2901 if best_format and requested_ranges:
5ec1b6b7 2902 def to_screen(*msg):
2903 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2904
2905 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2906 (f['format_id'] for f in formats_to_download))
0500ee3d 2907 if requested_ranges != ({}, ):
5ec1b6b7 2908 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2909 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2910 max_downloads_reached = False
5ec1b6b7 2911
0500ee3d 2912 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2913 new_info = self._copy_infodict(info_dict)
b7da73eb 2914 new_info.update(fmt)
3975b4d2 2915 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2916 end_time = offset + min(chapter.get('end_time', duration), duration)
b4e0d758 2917 # duration may not be accurate. So allow deviations <1sec
2918 if end_time == float('inf') or end_time > offset + duration + 1:
2919 end_time = None
3975b4d2 2920 if chapter or offset:
5ec1b6b7 2921 new_info.update({
3975b4d2 2922 'section_start': offset + chapter.get('start_time', 0),
b4e0d758 2923 'section_end': end_time,
5ec1b6b7 2924 'section_title': chapter.get('title'),
2925 'section_number': chapter.get('index'),
2926 })
2927 downloaded_formats.append(new_info)
a13e6848 2928 try:
2929 self.process_info(new_info)
2930 except MaxDownloadsReached:
2931 max_downloads_reached = True
415f8d51 2932 self._raise_pending_errors(new_info)
f46e2f9d 2933 # Remove copied info
2934 for key, val in tuple(new_info.items()):
2935 if info_dict.get(key) == val:
2936 new_info.pop(key)
a13e6848 2937 if max_downloads_reached:
2938 break
ebed8b37 2939
5ec1b6b7 2940 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2941 assert write_archive.issubset({True, False, 'ignore'})
2942 if True in write_archive and False not in write_archive:
2943 self.record_download_archive(info_dict)
be72c624 2944
5ec1b6b7 2945 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2946 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2947 if max_downloads_reached:
2948 raise MaxDownloadsReached()
ebed8b37 2949
49a57e70 2950 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2951 info_dict.update(best_format)
dd82ffea
JMF
2952 return info_dict
2953
98c70d6f 2954 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2955 """Select the requested subtitles and their format"""
d8a58ddc 2956 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2957 if normal_subtitles and self.params.get('writesubtitles'):
2958 available_subs.update(normal_subtitles)
d8a58ddc 2959 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2960 if automatic_captions and self.params.get('writeautomaticsub'):
2961 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2962 if lang not in available_subs:
2963 available_subs[lang] = cap_info
2964
d2c8aadf 2965 if not available_subs or (
2966 not self.params.get('writesubtitles')
2967 and not self.params.get('writeautomaticsub')):
4d171848 2968 return None
a504ced0 2969
d8a58ddc 2970 all_sub_langs = tuple(available_subs.keys())
a504ced0 2971 if self.params.get('allsubtitles', False):
c32b0aab 2972 requested_langs = all_sub_langs
2973 elif self.params.get('subtitleslangs', False):
5314b521 2974 try:
2975 requested_langs = orderedSet_from_options(
2976 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2977 except re.error as e:
2978 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
a504ced0 2979 else:
376aa24b
SS
2980 requested_langs = LazyList(itertools.chain(
2981 ['en'] if 'en' in normal_sub_langs else [],
2982 filter(lambda f: f.startswith('en'), normal_sub_langs),
2983 ['en'] if 'en' in all_sub_langs else [],
2984 filter(lambda f: f.startswith('en'), all_sub_langs),
2985 normal_sub_langs, all_sub_langs,
2986 ))[:1]
ad3dc496 2987 if requested_langs:
d2c8aadf 2988 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2989
2990 formats_query = self.params.get('subtitlesformat', 'best')
2991 formats_preference = formats_query.split('/') if formats_query else []
2992 subs = {}
2993 for lang in requested_langs:
2994 formats = available_subs.get(lang)
2995 if formats is None:
86e5f3ed 2996 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2997 continue
a504ced0
JMF
2998 for ext in formats_preference:
2999 if ext == 'best':
3000 f = formats[-1]
3001 break
3002 matches = list(filter(lambda f: f['ext'] == ext, formats))
3003 if matches:
3004 f = matches[-1]
3005 break
3006 else:
3007 f = formats[-1]
3008 self.report_warning(
3009 'No subtitle format found matching "%s" for language %s, '
3010 'using %s' % (formats_query, lang, f['ext']))
3011 subs[lang] = f
3012 return subs
3013
bb66c247 3014 def _forceprint(self, key, info_dict):
3015 if info_dict is None:
3016 return
3017 info_copy = info_dict.copy()
17060584 3018 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3019 if info_dict.get('requested_formats') is not None:
3020 # For RTMP URLs, also include the playpath
3021 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3022 elif info_dict.get('url'):
3023 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
bb66c247 3024 info_copy['formats_table'] = self.render_formats_table(info_dict)
3025 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3026 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3027 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3028
3029 def format_tmpl(tmpl):
48c8424b 3030 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 3031 if not mobj:
3032 return tmpl
48c8424b 3033
3034 fmt = '%({})s'
3035 if tmpl.startswith('{'):
6f2287cb 3036 tmpl, fmt = f'.{tmpl}', '%({})j'
48c8424b 3037 if tmpl.endswith('='):
3038 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3039 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 3040
bb66c247 3041 for tmpl in self.params['forceprint'].get(key, []):
3042 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3043
3044 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 3045 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 3046 tmpl = format_tmpl(tmpl)
3047 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 3048 if self._ensure_dir_exists(filename):
9874e82b 3049 with open(filename, 'a', encoding='utf-8', newline='') as f:
3050 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
ca30f449 3051
17060584 3052 return info_copy
3053
3054 def __forced_printings(self, info_dict, filename=None, incomplete=True):
bb66c247 3055 if (self.params.get('forcejson')
3056 or self.params['forceprint'].get('video')
3057 or self.params['print_to_file'].get('video')):
2b8a2973 3058 self.post_extract(info_dict)
17060584 3059 if filename:
3060 info_dict['filename'] = filename
b5f61b69 3061 info_copy = self._forceprint('video', info_dict)
3062
3063 def print_field(field, actual_field=None, optional=False):
3064 if actual_field is None:
3065 actual_field = field
3066 if self.params.get(f'force{field}') and (
3067 info_copy.get(field) is not None or (not optional and not incomplete)):
3068 self.to_stdout(info_copy[actual_field])
3069
3070 print_field('title')
3071 print_field('id')
3072 print_field('url', 'urls')
3073 print_field('thumbnail', optional=True)
3074 print_field('description', optional=True)
ad54c913 3075 print_field('filename')
b5f61b69 3076 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3077 self.to_stdout(formatSeconds(info_copy['duration']))
3078 print_field('format')
53c18592 3079
2b8a2973 3080 if self.params.get('forcejson'):
6e84b215 3081 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 3082
e8e73840 3083 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 3084 if not info.get('url'):
1151c407 3085 self.raise_no_formats(info, True)
e8e73840 3086
3087 if test:
3088 verbose = self.params.get('verbose')
3089 params = {
3090 'test': True,
a169858f 3091 'quiet': self.params.get('quiet') or not verbose,
e8e73840 3092 'verbose': verbose,
3093 'noprogress': not verbose,
3094 'nopart': True,
3095 'skip_unavailable_fragments': False,
3096 'keep_fragments': False,
3097 'overwrites': True,
3098 '_no_ytdl_file': True,
3099 }
3100 else:
3101 params = self.params
96fccc10 3102 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 3103 if not test:
3104 for ph in self._progress_hooks:
3105 fd.add_progress_hook(ph)
42676437
M
3106 urls = '", "'.join(
3107 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3108 for f in info.get('requested_formats', []) or [info])
3a408f9d 3109 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 3110
adbc4ec4
THD
3111 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3112 # But it may contain objects that are not deep-copyable
3113 new_info = self._copy_infodict(info)
e8e73840 3114 if new_info.get('http_headers') is None:
3115 new_info['http_headers'] = self._calc_headers(new_info)
3116 return fd.download(name, new_info, subtitle)
3117
e04938ab 3118 def existing_file(self, filepaths, *, default_overwrite=True):
3119 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3120 if existing_files and not self.params.get('overwrites', default_overwrite):
3121 return existing_files[0]
3122
3123 for file in existing_files:
3124 self.report_file_delete(file)
3125 os.remove(file)
3126 return None
3127
8222d8de 3128 def process_info(self, info_dict):
09b49e1f 3129 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
3130
3131 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 3132 original_infodict = info_dict
fd288278 3133
4513a41a 3134 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
3135 info_dict['format'] = info_dict['ext']
3136
c77495e3 3137 if self._match_entry(info_dict) is not None:
9e907ebd 3138 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
3139 return
3140
09b49e1f 3141 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 3142 self.post_extract(info_dict)
119e40ef 3143
3144 def replace_info_dict(new_info):
3145 nonlocal info_dict
3146 if new_info == info_dict:
3147 return
3148 info_dict.clear()
3149 info_dict.update(new_info)
3150
3151 new_info, _ = self.pre_process(info_dict, 'video')
3152 replace_info_dict(new_info)
0c14d66a 3153 self._num_downloads += 1
8222d8de 3154
dcf64d43 3155 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 3156 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3157 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 3158 files_to_move = {}
8222d8de
JMF
3159
3160 # Forced printings
4513a41a 3161 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 3162
ca6d59d2 3163 def check_max_downloads():
3164 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3165 raise MaxDownloadsReached()
3166
b7b04c78 3167 if self.params.get('simulate'):
9e907ebd 3168 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3169 check_max_downloads()
8222d8de
JMF
3170 return
3171
de6000d9 3172 if full_filename is None:
8222d8de 3173 return
e92caff5 3174 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3175 return
e92caff5 3176 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3177 return
3178
80c03fa9 3179 if self._write_description('video', info_dict,
3180 self.prepare_filename(info_dict, 'description')) is None:
3181 return
3182
3183 sub_files = self._write_subtitles(info_dict, temp_filename)
3184 if sub_files is None:
3185 return
3186 files_to_move.update(dict(sub_files))
3187
3188 thumb_files = self._write_thumbnails(
3189 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3190 if thumb_files is None:
3191 return
3192 files_to_move.update(dict(thumb_files))
8222d8de 3193
80c03fa9 3194 infofn = self.prepare_filename(info_dict, 'infojson')
3195 _infojson_written = self._write_info_json('video', info_dict, infofn)
3196 if _infojson_written:
dac5df5a 3197 info_dict['infojson_filename'] = infofn
e75bb0d6 3198 # For backward compatibility, even though it was a private field
80c03fa9 3199 info_dict['__infojson_filename'] = infofn
3200 elif _infojson_written is None:
3201 return
3202
3203 # Note: Annotations are deprecated
3204 annofn = None
1fb07d10 3205 if self.params.get('writeannotations', False):
de6000d9 3206 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3207 if annofn:
e92caff5 3208 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3209 return
0c3d0f51 3210 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3211 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3212 elif not info_dict.get('annotations'):
3213 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3214 else:
3215 try:
6febd1c1 3216 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3217 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3218 annofile.write(info_dict['annotations'])
3219 except (KeyError, TypeError):
6febd1c1 3220 self.report_warning('There are no annotations to write.')
86e5f3ed 3221 except OSError:
6febd1c1 3222 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3223 return
1fb07d10 3224
732044af 3225 # Write internet shortcut files
08438d2c 3226 def _write_link_file(link_type):
60f3e995 3227 url = try_get(info_dict['webpage_url'], iri_to_uri)
3228 if not url:
3229 self.report_warning(
3230 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3231 return True
08438d2c 3232 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3233 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3234 return False
10e3742e 3235 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3236 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3237 return True
3238 try:
3239 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3240 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3241 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3242 template_vars = {'url': url}
08438d2c 3243 if link_type == 'desktop':
3244 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3245 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3246 except OSError:
08438d2c 3247 self.report_error(f'Cannot write internet shortcut {linkfn}')
3248 return False
732044af 3249 return True
3250
08438d2c 3251 write_links = {
3252 'url': self.params.get('writeurllink'),
3253 'webloc': self.params.get('writewebloclink'),
3254 'desktop': self.params.get('writedesktoplink'),
3255 }
3256 if self.params.get('writelink'):
3257 link_type = ('webloc' if sys.platform == 'darwin'
3258 else 'desktop' if sys.platform.startswith('linux')
3259 else 'url')
3260 write_links[link_type] = True
3261
3262 if any(should_write and not _write_link_file(link_type)
3263 for link_type, should_write in write_links.items()):
3264 return
732044af 3265
415f8d51 3266 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3267 replace_info_dict(new_info)
56d868db 3268
a13e6848 3269 if self.params.get('skip_download'):
56d868db 3270 info_dict['filepath'] = temp_filename
3271 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3272 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3273 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3274 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3275 else:
3276 # Download
b868936c 3277 info_dict.setdefault('__postprocessors', [])
4340deca 3278 try:
0202b52a 3279
e04938ab 3280 def existing_video_file(*filepaths):
6b591b29 3281 ext = info_dict.get('ext')
e04938ab 3282 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3283 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3284 default_overwrite=False)
3285 if file:
3286 info_dict['ext'] = os.path.splitext(file)[1][1:]
3287 return file
0202b52a 3288
7b2c3f47 3289 fd, success = None, True
fccf90e7 3290 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3291 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
62b5c94c 3292 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3293 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3294 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3295 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3296 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3297 return
5ec1b6b7 3298
4340deca 3299 if info_dict.get('requested_formats') is not None:
0202b52a 3300 old_ext = info_dict['ext']
4e3b637d 3301 if self.params.get('merge_output_format') is None:
4e3b637d 3302 if (info_dict['ext'] == 'webm'
3303 and info_dict.get('thumbnails')
3304 # check with type instead of pp_key, __name__, or isinstance
3305 # since we dont want any custom PPs to trigger this
c487cf00 3306 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3307 info_dict['ext'] = 'mkv'
3308 self.report_warning(
3309 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3310 new_ext = info_dict['ext']
0202b52a 3311
124bc071 3312 def correct_ext(filename, ext=new_ext):
96fccc10 3313 if filename == '-':
3314 return filename
0202b52a 3315 filename_real_ext = os.path.splitext(filename)[1][1:]
3316 filename_wo_ext = (
3317 os.path.splitext(filename)[0]
124bc071 3318 if filename_real_ext in (old_ext, new_ext)
0202b52a 3319 else filename)
86e5f3ed 3320 return f'{filename_wo_ext}.{ext}'
0202b52a 3321
38c6902b 3322 # Ensure filename always has a correct extension for successful merge
0202b52a 3323 full_filename = correct_ext(full_filename)
3324 temp_filename = correct_ext(temp_filename)
e04938ab 3325 dl_filename = existing_video_file(full_filename, temp_filename)
ad54c913 3326
1ea24129 3327 info_dict['__real_download'] = False
84078a8b 3328 # NOTE: Copy so that original format dicts are not modified
3329 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
18e674b4 3330
7b2c3f47 3331 merger = FFmpegMergerPP(self)
adbc4ec4 3332 downloaded = []
dbf5416a 3333 if dl_filename is not None:
6c7274ec 3334 self.report_file_already_downloaded(dl_filename)
adbc4ec4 3335 elif fd:
ad54c913 3336 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
adbc4ec4
THD
3337 f['filepath'] = fname = prepend_extension(
3338 correct_ext(temp_filename, info_dict['ext']),
3339 'f%s' % f['format_id'], info_dict['ext'])
3340 downloaded.append(fname)
ad54c913 3341 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
dbf5416a 3342 success, real_download = self.dl(temp_filename, info_dict)
3343 info_dict['__real_download'] = real_download
18e674b4 3344 else:
18e674b4 3345 if self.params.get('allow_unplayable_formats'):
3346 self.report_warning(
3347 'You have requested merging of multiple formats '
3348 'while also allowing unplayable formats to be downloaded. '
3349 'The formats won\'t be merged to prevent data corruption.')
3350 elif not merger.available:
e8969bda 3351 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3352 if not self.params.get('ignoreerrors'):
3353 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3354 return
3355 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3356
96fccc10 3357 if temp_filename == '-':
adbc4ec4 3358 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3359 else 'but the formats are incompatible for simultaneous download' if merger.available
3360 else 'but ffmpeg is not installed')
3361 self.report_warning(
3362 f'You have requested downloading multiple formats to stdout {reason}. '
3363 'The formats will be streamed one after the other')
3364 fname = temp_filename
ad54c913 3365 for f in info_dict['requested_formats']:
dbf5416a 3366 new_info = dict(info_dict)
3367 del new_info['requested_formats']
3368 new_info.update(f)
96fccc10 3369 if temp_filename != '-':
124bc071 3370 fname = prepend_extension(
3371 correct_ext(temp_filename, new_info['ext']),
3372 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3373 if not self._ensure_dir_exists(fname):
3374 return
a21e0ab1 3375 f['filepath'] = fname
96fccc10 3376 downloaded.append(fname)
dbf5416a 3377 partial_success, real_download = self.dl(fname, new_info)
3378 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3379 success = success and partial_success
adbc4ec4
THD
3380
3381 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3382 info_dict['__postprocessors'].append(merger)
3383 info_dict['__files_to_merge'] = downloaded
3384 # Even if there were no downloads, it is being merged only now
3385 info_dict['__real_download'] = True
3386 else:
3387 for file in downloaded:
3388 files_to_move[file] = None
4340deca
P
3389 else:
3390 # Just a single file
e04938ab 3391 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3392 if dl_filename is None or dl_filename == temp_filename:
3393 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3394 # So we should try to resume the download
e8e73840 3395 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3396 info_dict['__real_download'] = real_download
6c7274ec 3397 else:
3398 self.report_file_already_downloaded(dl_filename)
0202b52a 3399
0202b52a 3400 dl_filename = dl_filename or temp_filename
c571435f 3401 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3402
3158150c 3403 except network_exceptions as err:
7960b056 3404 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3405 return
86e5f3ed 3406 except OSError as err:
4340deca
P
3407 raise UnavailableVideoError(err)
3408 except (ContentTooShortError, ) as err:
86e5f3ed 3409 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3410 return
8222d8de 3411
415f8d51 3412 self._raise_pending_errors(info_dict)
de6000d9 3413 if success and full_filename != '-':
f17f8651 3414
fd7cfb64 3415 def fixup():
3416 do_fixup = True
3417 fixup_policy = self.params.get('fixup')
3418 vid = info_dict['id']
3419
3420 if fixup_policy in ('ignore', 'never'):
3421 return
3422 elif fixup_policy == 'warn':
3fe75fdc 3423 do_fixup = 'warn'
f89b3e2d 3424 elif fixup_policy != 'force':
3425 assert fixup_policy in ('detect_or_warn', None)
3426 if not info_dict.get('__real_download'):
3427 do_fixup = False
fd7cfb64 3428
3429 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3430 if not (do_fixup and cndn):
fd7cfb64 3431 return
3fe75fdc 3432 elif do_fixup == 'warn':
fd7cfb64 3433 self.report_warning(f'{vid}: {msg}')
3434 return
3435 pp = cls(self)
3436 if pp.available:
3437 info_dict['__postprocessors'].append(pp)
3438 else:
3439 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3440
3441 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3442 ffmpeg_fixup(stretched_ratio not in (1, None),
3443 f'Non-uniform pixel ratio {stretched_ratio}',
3444 FFmpegFixupStretchedPP)
fd7cfb64 3445
993191c0 3446 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3447 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3448
ca9def71
LNO
3449 ext = info_dict.get('ext')
3450 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3451 isinstance(pp, FFmpegVideoConvertorPP)
3452 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
f73c1180 3453 ) for pp in self._pps['post_process'])
ca9def71
LNO
3454
3455 if not postprocessed_by_ffmpeg:
f73c1180 3456 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3457 and info_dict.get('container') == 'm4a_dash',
f2df4071 3458 'writing DASH m4a. Only some players support this container',
3459 FFmpegFixupM4aPP)
24146491 3460 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3461 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3462 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3463 FFmpegFixupM3u8PP)
26010b5c 3464 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
adbc4ec4
THD
3465 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3466
24146491 3467 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3468 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3469
3470 fixup()
8222d8de 3471 try:
f46e2f9d 3472 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3473 except PostProcessingError as err:
3474 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3475 return
ab8e5e51
AM
3476 try:
3477 for ph in self._post_hooks:
23c1a667 3478 ph(info_dict['filepath'])
ab8e5e51
AM
3479 except Exception as err:
3480 self.report_error('post hooks: %s' % str(err))
3481 return
9e907ebd 3482 info_dict['__write_download_archive'] = True
2d30509f 3483
c487cf00 3484 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3485 if self.params.get('force_write_download_archive'):
9e907ebd 3486 info_dict['__write_download_archive'] = True
ca6d59d2 3487 check_max_downloads()
8222d8de 3488
aa9369a2 3489 def __download_wrapper(self, func):
3490 @functools.wraps(func)
3491 def wrapper(*args, **kwargs):
3492 try:
3493 res = func(*args, **kwargs)
3494 except UnavailableVideoError as e:
3495 self.report_error(e)
b222c271 3496 except DownloadCancelled as e:
3497 self.to_screen(f'[info] {e}')
3498 if not self.params.get('break_per_url'):
3499 raise
fd404bec 3500 self._num_downloads = 0
aa9369a2 3501 else:
3502 if self.params.get('dump_single_json', False):
3503 self.post_extract(res)
3504 self.to_stdout(json.dumps(self.sanitize_info(res)))
3505 return wrapper
3506
8222d8de
JMF
3507 def download(self, url_list):
3508 """Download a given list of URLs."""
aa9369a2 3509 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3510 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3511 if (len(url_list) > 1
3512 and outtmpl != '-'
3513 and '%' not in outtmpl
3514 and self.params.get('max_downloads') != 1):
acd69589 3515 raise SameFileError(outtmpl)
8222d8de
JMF
3516
3517 for url in url_list:
aa9369a2 3518 self.__download_wrapper(self.extract_info)(
3519 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3520
3521 return self._download_retcode
3522
1dcc4c0c 3523 def download_with_info_file(self, info_filename):
31bd3925
JMF
3524 with contextlib.closing(fileinput.FileInput(
3525 [info_filename], mode='r',
3526 openhook=fileinput.hook_encoded('utf-8'))) as f:
3527 # FileInput doesn't have a read method, we can't call json.load
ab1de9cb 3528 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3529 for info in variadic(json.loads('\n'.join(f)))]
3530 for info in infos:
3531 try:
3532 self.__download_wrapper(self.process_ie_result)(info, download=True)
3533 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3534 if not isinstance(e, EntryNotInPlaylist):
3535 self.to_stderr('\r')
3536 webpage_url = info.get('webpage_url')
3537 if webpage_url is None:
3538 raise
aa9369a2 3539 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
ab1de9cb 3540 self.download([webpage_url])
d4943898 3541 return self._download_retcode
1dcc4c0c 3542
cb202fd2 3543 @staticmethod
8012d892 3544 def sanitize_info(info_dict, remove_private_keys=False):
3545 ''' Sanitize the infodict for converting to json '''
3ad56b42 3546 if info_dict is None:
3547 return info_dict
6e84b215 3548 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3549 info_dict.setdefault('_type', 'video')
b5e7a2e6 3550 info_dict.setdefault('_version', {
3551 'version': __version__,
3552 'current_git_head': current_git_head(),
3553 'release_git_head': RELEASE_GIT_HEAD,
20314dd4 3554 'repository': ORIGIN,
b5e7a2e6 3555 })
09b49e1f 3556
8012d892 3557 if remove_private_keys:
0a5a191a 3558 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3559 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
6f2287cb 3560 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
595ea4a9 3561 'playlist_autonumber',
6e84b215 3562 }
ae8f99e6 3563 else:
09b49e1f 3564 reject = lambda k, v: False
adbc4ec4
THD
3565
3566 def filter_fn(obj):
3567 if isinstance(obj, dict):
3568 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3569 elif isinstance(obj, (list, tuple, set, LazyList)):
3570 return list(map(filter_fn, obj))
3571 elif obj is None or isinstance(obj, (str, int, float, bool)):
3572 return obj
3573 else:
3574 return repr(obj)
3575
5226731e 3576 return filter_fn(info_dict)
cb202fd2 3577
8012d892 3578 @staticmethod
3579 def filter_requested_info(info_dict, actually_filter=True):
3580 ''' Alias of sanitize_info for backward compatibility '''
3581 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3582
43d7f5a5 3583 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3584 for filename in set(filter(None, files_to_delete)):
3585 if msg:
3586 self.to_screen(msg % filename)
3587 try:
3588 os.remove(filename)
3589 except OSError:
3590 self.report_warning(f'Unable to delete file {filename}')
3591 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3592 del info['__files_to_move'][filename]
3593
ed5835b4 3594 @staticmethod
3595 def post_extract(info_dict):
3596 def actual_post_extract(info_dict):
3597 if info_dict.get('_type') in ('playlist', 'multi_video'):
3598 for video_dict in info_dict.get('entries', {}):
3599 actual_post_extract(video_dict or {})
3600 return
3601
09b49e1f 3602 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3603 info_dict.update(post_extractor())
ed5835b4 3604
3605 actual_post_extract(info_dict or {})
3606
dcf64d43 3607 def run_pp(self, pp, infodict):
5bfa4862 3608 files_to_delete = []
dcf64d43 3609 if '__files_to_move' not in infodict:
3610 infodict['__files_to_move'] = {}
b1940459 3611 try:
3612 files_to_delete, infodict = pp.run(infodict)
3613 except PostProcessingError as e:
3614 # Must be True and not 'only_download'
3615 if self.params.get('ignoreerrors') is True:
3616 self.report_error(e)
3617 return infodict
3618 raise
3619
5bfa4862 3620 if not files_to_delete:
dcf64d43 3621 return infodict
5bfa4862 3622 if self.params.get('keepvideo', False):
3623 for f in files_to_delete:
dcf64d43 3624 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3625 else:
43d7f5a5 3626 self._delete_downloaded_files(
3627 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3628 return infodict
5bfa4862 3629
6f2287cb 3630 def run_all_pps(self, key, info, *, additional_pps=None):
17ba4343 3631 if key != 'video':
3632 self._forceprint(key, info)
3633 for pp in (additional_pps or []) + self._pps[key]:
3634 info = self.run_pp(pp, info)
ed5835b4 3635 return info
277d6ff5 3636
56d868db 3637 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3638 info = dict(ie_info)
56d868db 3639 info['__files_to_move'] = files_to_move or {}
415f8d51 3640 try:
3641 info = self.run_all_pps(key, info)
3642 except PostProcessingError as err:
3643 msg = f'Preprocessing: {err}'
3644 info.setdefault('__pending_error', msg)
3645 self.report_error(msg, is_error=False)
56d868db 3646 return info, info.pop('__files_to_move', None)
5bfa4862 3647
f46e2f9d 3648 def post_process(self, filename, info, files_to_move=None):
8222d8de 3649 """Run all the postprocessors on the given file."""
8222d8de 3650 info['filepath'] = filename
dcf64d43 3651 info['__files_to_move'] = files_to_move or {}
ed5835b4 3652 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3653 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3654 del info['__files_to_move']
ed5835b4 3655 return self.run_all_pps('after_move', info)
c1c9a79c 3656
5db07df6 3657 def _make_archive_id(self, info_dict):
e9fef7ee
S
3658 video_id = info_dict.get('id')
3659 if not video_id:
3660 return
5db07df6
PH
3661 # Future-proof against any change in case
3662 # and backwards compatibility with prior versions
e9fef7ee 3663 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3664 if extractor is None:
1211bb6d
S
3665 url = str_or_none(info_dict.get('url'))
3666 if not url:
3667 return
e9fef7ee 3668 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3669 for ie_key, ie in self._ies.items():
1211bb6d 3670 if ie.suitable(url):
8b7491c8 3671 extractor = ie_key
e9fef7ee
S
3672 break
3673 else:
3674 return
0647d925 3675 return make_archive_id(extractor, video_id)
5db07df6
PH
3676
3677 def in_download_archive(self, info_dict):
ae103564 3678 if not self.archive:
5db07df6
PH
3679 return False
3680
1e8fe57e 3681 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3682 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3683 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3684
3685 def record_download_archive(self, info_dict):
3686 fn = self.params.get('download_archive')
3687 if fn is None:
3688 return
5db07df6
PH
3689 vid_id = self._make_archive_id(info_dict)
3690 assert vid_id
ae103564 3691
a13e6848 3692 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3693 if is_path_like(fn):
ae103564 3694 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3695 archive_file.write(vid_id + '\n')
a45e8619 3696 self.archive.add(vid_id)
dd82ffea 3697
8c51aa65 3698 @staticmethod
8abeeb94 3699 def format_resolution(format, default='unknown'):
9359f3d4 3700 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3701 return 'audio only'
f49d89ee
PH
3702 if format.get('resolution') is not None:
3703 return format['resolution']
35615307 3704 if format.get('width') and format.get('height'):
ff51ed58 3705 return '%dx%d' % (format['width'], format['height'])
35615307 3706 elif format.get('height'):
ff51ed58 3707 return '%sp' % format['height']
35615307 3708 elif format.get('width'):
ff51ed58 3709 return '%dx?' % format['width']
3710 return default
8c51aa65 3711
8130779d 3712 def _list_format_headers(self, *headers):
3713 if self.params.get('listformats_table', True) is not False:
591bb9d3 3714 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3715 return headers
3716
c57f7757
PH
3717 def _format_note(self, fdict):
3718 res = ''
3719 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3720 res += '(unsupported)'
32f90364
PH
3721 if fdict.get('language'):
3722 if res:
3723 res += ' '
f304da8a 3724 res += '[%s]' % fdict['language']
c57f7757 3725 if fdict.get('format_note') is not None:
f304da8a 3726 if res:
3727 res += ' '
3728 res += fdict['format_note']
c57f7757 3729 if fdict.get('tbr') is not None:
f304da8a 3730 if res:
3731 res += ', '
3732 res += '%4dk' % fdict['tbr']
c57f7757
PH
3733 if fdict.get('container') is not None:
3734 if res:
3735 res += ', '
3736 res += '%s container' % fdict['container']
3089bc74
S
3737 if (fdict.get('vcodec') is not None
3738 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3739 if res:
3740 res += ', '
3741 res += fdict['vcodec']
91c7271a 3742 if fdict.get('vbr') is not None:
c57f7757
PH
3743 res += '@'
3744 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3745 res += 'video@'
3746 if fdict.get('vbr') is not None:
3747 res += '%4dk' % fdict['vbr']
fbb21cf5 3748 if fdict.get('fps') is not None:
5d583bdf
S
3749 if res:
3750 res += ', '
3751 res += '%sfps' % fdict['fps']
c57f7757
PH
3752 if fdict.get('acodec') is not None:
3753 if res:
3754 res += ', '
3755 if fdict['acodec'] == 'none':
3756 res += 'video only'
3757 else:
3758 res += '%-5s' % fdict['acodec']
3759 elif fdict.get('abr') is not None:
3760 if res:
3761 res += ', '
3762 res += 'audio'
3763 if fdict.get('abr') is not None:
3764 res += '@%3dk' % fdict['abr']
3765 if fdict.get('asr') is not None:
3766 res += ' (%5dHz)' % fdict['asr']
3767 if fdict.get('filesize') is not None:
3768 if res:
3769 res += ', '
3770 res += format_bytes(fdict['filesize'])
9732d77e
PH
3771 elif fdict.get('filesize_approx') is not None:
3772 if res:
3773 res += ', '
3774 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3775 return res
91c7271a 3776
aebb4f4b 3777 def _get_formats(self, info_dict):
3778 if info_dict.get('formats') is None:
3779 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3780 return [info_dict]
3781 return []
3782 return info_dict['formats']
b69fd25c 3783
aebb4f4b 3784 def render_formats_table(self, info_dict):
3785 formats = self._get_formats(info_dict)
3786 if not formats:
3787 return
8130779d 3788 if not self.params.get('listformats_table', True) is not False:
76d321f6 3789 table = [
3790 [
3791 format_field(f, 'format_id'),
3792 format_field(f, 'ext'),
3793 self.format_resolution(f),
8130779d 3794 self._format_note(f)
d5d1df8a 3795 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3796 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3797
d816f61f 3798 def simplified_codec(f, field):
3799 assert field in ('acodec', 'vcodec')
337734d4 3800 codec = f.get(field)
f5ea4748 3801 if not codec:
3802 return 'unknown'
3803 elif codec != 'none':
d816f61f 3804 return '.'.join(codec.split('.')[:4])
3805
3806 if field == 'vcodec' and f.get('acodec') == 'none':
3807 return 'images'
3808 elif field == 'acodec' and f.get('vcodec') == 'none':
3809 return ''
3810 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3811 self.Styles.SUPPRESS)
3812
591bb9d3 3813 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3814 table = [
3815 [
591bb9d3 3816 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3817 format_field(f, 'ext'),
3818 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3819 format_field(f, 'fps', '\t%d', func=round),
8130779d 3820 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3821 format_field(f, 'audio_channels', '\t%s'),
0dff8e4d 3822 delim, (
3823 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3824 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3825 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3826 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
563e0bf8 3827 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3828 shorten_protocol_name(f.get('protocol', '')),
3829 delim,
d816f61f 3830 simplified_codec(f, 'vcodec'),
563e0bf8 3831 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3832 simplified_codec(f, 'acodec'),
563e0bf8 3833 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3834 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
bc344cd4 3835 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3836 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3837 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3838 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3839 format_field(f, 'format_note'),
3840 format_field(f, 'container', ignore=(None, f.get('ext'))),
3841 delim=', '), delim=' '),
8130779d 3842 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3843 header_line = self._list_format_headers(
b8ed0f15 3844 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3845 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3846
3847 return render_table(
3848 header_line, table, hide_empty=True,
591bb9d3 3849 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3850
3851 def render_thumbnails_table(self, info_dict):
88f23a18 3852 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3853 if not thumbnails:
8130779d 3854 return None
3855 return render_table(
ec11a9f4 3856 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3857 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3858
8130779d 3859 def render_subtitles_table(self, video_id, subtitles):
2412044c 3860 def _row(lang, formats):
49c258e1 3861 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3862 if len(set(names)) == 1:
7aee40c1 3863 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3864 return [lang, ', '.join(names), ', '.join(exts)]
3865
8130779d 3866 if not subtitles:
3867 return None
3868 return render_table(
ec11a9f4 3869 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3870 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3871 hide_empty=True)
3872
3873 def __list_table(self, video_id, name, func, *args):
3874 table = func(*args)
3875 if not table:
3876 self.to_screen(f'{video_id} has no {name}')
3877 return
3878 self.to_screen(f'[info] Available {name} for {video_id}:')
3879 self.to_stdout(table)
3880
3881 def list_formats(self, info_dict):
3882 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3883
3884 def list_thumbnails(self, info_dict):
3885 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3886
3887 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3888 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3889
dca08720
PH
3890 def print_debug_header(self):
3891 if not self.params.get('verbose'):
3892 return
49a57e70 3893
a057779d 3894 from . import _IN_CLI # Must be delayed import
3895
560738f3 3896 # These imports can be slow. So import them only as needed
3897 from .extractor.extractors import _LAZY_LOADER
e756f45b
M
3898 from .extractor.extractors import (
3899 _PLUGIN_CLASSES as plugin_ies,
3900 _PLUGIN_OVERRIDES as plugin_ie_overrides
3901 )
560738f3 3902
49a57e70 3903 def get_encoding(stream):
2a938746 3904 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
8417f26b
SS
3905 additional_info = []
3906 if os.environ.get('TERM', '').lower() == 'dumb':
3907 additional_info.append('dumb')
49a57e70 3908 if not supports_terminal_sequences(stream):
53973b4d 3909 from .utils import WINDOWS_VT_MODE # Must be imported locally
8417f26b
SS
3910 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3911 if additional_info:
3912 ret = f'{ret} ({",".join(additional_info)})'
49a57e70 3913 return ret
3914
591bb9d3 3915 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3916 locale.getpreferredencoding(),
3917 sys.getfilesystemencoding(),
591bb9d3 3918 self.get_encoding(),
3919 ', '.join(
64fa820c 3920 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3921 if stream is not None and key != 'console')
3922 )
883d4b1e 3923
3924 logger = self.params.get('logger')
3925 if logger:
3926 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3927 write_debug(encoding_str)
3928 else:
96565c7e 3929 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3930 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3931
4c88ff87 3932 source = detect_variant()
70b23409 3933 if VARIANT not in (None, 'pip'):
3934 source += '*'
a5387729 3935 klass = type(self)
36eaf303 3936 write_debug(join_nonempty(
20314dd4 3937 f'{REPOSITORY.rpartition("/")[2]} version',
0b6ad22e 3938 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
29cb20bd 3939 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
36eaf303 3940 '' if source == 'unknown' else f'({source})',
a5387729 3941 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
36eaf303 3942 delim=' '))
497074f0 3943
3944 if not _IN_CLI:
3945 write_debug(f'params: {self.params}')
3946
6e21fdd2 3947 if not _LAZY_LOADER:
3948 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3949 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3950 else:
49a57e70 3951 write_debug('Lazy loading extractors is disabled')
8a82af35 3952 if self.params['compat_opts']:
3953 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3954
b5e7a2e6 3955 if current_git_head():
3956 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3957 write_debug(system_identifier())
d28b5171 3958
8913ef74 3959 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3960 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3961 if ffmpeg_features:
19a03940 3962 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3963
4c83c967 3964 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3965 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3966 exe_str = ', '.join(
2831b468 3967 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3968 ) or 'none'
49a57e70 3969 write_debug('exe versions: %s' % exe_str)
dca08720 3970
1d485a1a 3971 from .compat.compat_utils import get_package_info
9b8ee23b 3972 from .dependencies import available_dependencies
3973
3974 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3975 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3976 })) or 'none'))
2831b468 3977
227bf1a3 3978 write_debug(f'Proxy map: {self.proxies}')
8a8b5452 3979 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
e756f45b
M
3980 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3981 display_list = ['%s%s' % (
8e40b9d1 3982 klass.__name__, '' if klass.__name__ == name else f' as {name}')
e756f45b
M
3983 for name, klass in plugins.items()]
3984 if plugin_type == 'Extractor':
3985 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3986 for parent, plugins in plugin_ie_overrides.items())
3987 if not display_list:
3988 continue
3989 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3990
8e40b9d1
M
3991 plugin_dirs = plugin_directories()
3992 if plugin_dirs:
3993 write_debug(f'Plugin directories: {plugin_dirs}')
3994
49a57e70 3995 # Not implemented
3996 if False and self.params.get('call_home'):
0f06bcd7 3997 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3998 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3999 latest_version = self.urlopen(
0f06bcd7 4000 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
4001 if version_tuple(latest_version) > version_tuple(__version__):
4002 self.report_warning(
4003 'You are using an outdated version (newest version: %s)! '
4004 'See https://yt-dl.org/update if you need help updating.' %
4005 latest_version)
4006
227bf1a3 4007 @functools.cached_property
4008 def proxies(self):
4009 """Global proxy configuration"""
dca08720 4010 opts_proxy = self.params.get('proxy')
dca08720
PH
4011 if opts_proxy is not None:
4012 if opts_proxy == '':
227bf1a3 4013 opts_proxy = '__noproxy__'
4014 proxies = {'all': opts_proxy}
dca08720 4015 else:
ac668111 4016 proxies = urllib.request.getproxies()
227bf1a3 4017 # compat. Set HTTPS_PROXY to __noproxy__ to revert
dca08720
PH
4018 if 'http' in proxies and 'https' not in proxies:
4019 proxies['https'] = proxies['http']
227bf1a3 4020
4021 return proxies
62fec3b2 4022
c365dba8 4023 @functools.cached_property
4024 def cookiejar(self):
4025 """Global cookiejar instance"""
4026 return load_cookies(
4027 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4028
227bf1a3 4029 @property
4030 def _opener(self):
4031 """
4032 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4033 """
62b5c94c 4034 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
227bf1a3 4035 handler = self._request_director.handlers['Urllib']
4036 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4037
c365dba8 4038 def urlopen(self, req):
4039 """ Start an HTTP download """
4040 if isinstance(req, str):
227bf1a3 4041 req = Request(req)
4042 elif isinstance(req, urllib.request.Request):
3d2623a8 4043 self.deprecation_warning(
4044 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4045 'Use yt_dlp.networking.common.Request instead.')
227bf1a3 4046 req = urllib_req_to_req(req)
4047 assert isinstance(req, Request)
4048
4049 # compat: Assume user:pass url params are basic auth
4050 url, basic_auth_header = extract_basic_auth(req.url)
4051 if basic_auth_header:
4052 req.headers['Authorization'] = basic_auth_header
4053 req.url = sanitize_url(url)
4054
4055 clean_proxies(proxies=req.proxies, headers=req.headers)
4056 clean_headers(req.headers)
4057
4058 try:
4059 return self._request_director.send(req)
4060 except NoSupportingHandlers as e:
4061 for ue in e.unsupported_errors:
ccfd70f4 4062 # FIXME: This depends on the order of errors.
227bf1a3 4063 if not (ue.handler and ue.msg):
4064 continue
4065 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4066 raise RequestError(
4067 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4068 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
8a8b5452 4069 if 'unsupported proxy type: "https"' in ue.msg.lower():
4070 raise RequestError(
4071 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
ccfd70f4 4072
4073 elif (
4074 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4075 and 'websockets' not in self._request_director.handlers
4076 ):
4077 raise RequestError(
4078 'This request requires WebSocket support. '
4079 'Ensure one of the following dependencies are installed: websockets',
4080 cause=ue) from ue
227bf1a3 4081 raise
4082 except SSLError as e:
4083 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4084 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4085 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4086 raise RequestError(
4087 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4088 'Try using --legacy-server-connect', cause=e) from e
4089 raise
4090 except HTTPError as e: # TODO: Remove in a future release
4091 raise _CompatHTTPError(e) from e
4092
db7b054a 4093 def build_request_director(self, handlers, preferences=None):
227bf1a3 4094 logger = _YDLLogger(self)
6148833f 4095 headers = self.params['http_headers'].copy()
227bf1a3 4096 proxies = self.proxies.copy()
4097 clean_headers(headers)
4098 clean_proxies(proxies, headers)
4099
4100 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4101 for handler in handlers:
4102 director.add_handler(handler(
4103 logger=logger,
4104 headers=headers,
4105 cookiejar=self.cookiejar,
4106 proxies=proxies,
4107 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4108 verify=not self.params.get('nocheckcertificate'),
4109 **traverse_obj(self.params, {
4110 'verbose': 'debug_printtraffic',
4111 'source_address': 'source_address',
4112 'timeout': 'socket_timeout',
75dc8e67 4113 'legacy_ssl_support': 'legacyserverconnect',
227bf1a3 4114 'enable_file_urls': 'enable_file_urls',
4115 'client_cert': {
4116 'client_certificate': 'client_certificate',
4117 'client_certificate_key': 'client_certificate_key',
4118 'client_certificate_password': 'client_certificate_password',
4119 },
4120 }),
4121 ))
db7b054a 4122 director.preferences.update(preferences or [])
8a8b5452 4123 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4124 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
227bf1a3 4125 return director
c365dba8 4126
62fec3b2
PH
4127 def encode(self, s):
4128 if isinstance(s, bytes):
4129 return s # Already encoded
4130
4131 try:
4132 return s.encode(self.get_encoding())
4133 except UnicodeEncodeError as err:
4134 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4135 raise
4136
4137 def get_encoding(self):
4138 encoding = self.params.get('encoding')
4139 if encoding is None:
4140 encoding = preferredencoding()
4141 return encoding
ec82d85a 4142
e08a85d8 4143 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 4144 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 4145 if overwrite is None:
4146 overwrite = self.params.get('overwrites', True)
80c03fa9 4147 if not self.params.get('writeinfojson'):
4148 return False
4149 elif not infofn:
4150 self.write_debug(f'Skipping writing {label} infojson')
4151 return False
4152 elif not self._ensure_dir_exists(infofn):
4153 return None
e08a85d8 4154 elif not overwrite and os.path.exists(infofn):
80c03fa9 4155 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 4156 return 'exists'
4157
4158 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4159 try:
4160 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4161 return True
86e5f3ed 4162 except OSError:
cb96c5be 4163 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4164 return None
80c03fa9 4165
4166 def _write_description(self, label, ie_result, descfn):
4167 ''' Write description and returns True = written, False = skip, None = error '''
4168 if not self.params.get('writedescription'):
4169 return False
4170 elif not descfn:
4171 self.write_debug(f'Skipping writing {label} description')
4172 return False
4173 elif not self._ensure_dir_exists(descfn):
4174 return None
4175 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4176 self.to_screen(f'[info] {label.title()} description is already present')
4177 elif ie_result.get('description') is None:
88fb9425 4178 self.to_screen(f'[info] There\'s no {label} description to write')
80c03fa9 4179 return False
4180 else:
4181 try:
4182 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 4183 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 4184 descfile.write(ie_result['description'])
86e5f3ed 4185 except OSError:
80c03fa9 4186 self.report_error(f'Cannot write {label} description file {descfn}')
4187 return None
4188 return True
4189
4190 def _write_subtitles(self, info_dict, filename):
4191 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4192 ret = []
4193 subtitles = info_dict.get('requested_subtitles')
88fb9425 4194 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
80c03fa9 4195 # subtitles download errors are already managed as troubles in relevant IE
4196 # that way it will silently go on when used with unsupporting IE
4197 return ret
88fb9425 4198 elif not subtitles:
c8bc203f 4199 self.to_screen('[info] There are no subtitles for the requested languages')
88fb9425 4200 return ret
80c03fa9 4201 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4202 if not sub_filename_base:
4203 self.to_screen('[info] Skipping writing video subtitles')
4204 return ret
88fb9425 4205
80c03fa9 4206 for sub_lang, sub_info in subtitles.items():
4207 sub_format = sub_info['ext']
4208 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4209 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 4210 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4211 if existing_sub:
80c03fa9 4212 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 4213 sub_info['filepath'] = existing_sub
4214 ret.append((existing_sub, sub_filename_final))
80c03fa9 4215 continue
4216
4217 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4218 if sub_info.get('data') is not None:
4219 try:
4220 # Use newline='' to prevent conversion of newline characters
4221 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 4222 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 4223 subfile.write(sub_info['data'])
4224 sub_info['filepath'] = sub_filename
4225 ret.append((sub_filename, sub_filename_final))
4226 continue
86e5f3ed 4227 except OSError:
80c03fa9 4228 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4229 return None
4230
4231 try:
4232 sub_copy = sub_info.copy()
4233 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4234 self.dl(sub_filename, sub_copy, subtitle=True)
4235 sub_info['filepath'] = sub_filename
4236 ret.append((sub_filename, sub_filename_final))
6020e05d 4237 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 4238 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 4239 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 4240 if not self.params.get('ignoreerrors'):
4241 self.report_error(msg)
4242 raise DownloadError(msg)
4243 self.report_warning(msg)
519804a9 4244 return ret
80c03fa9 4245
4246 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
2acd1d55 4247 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
6c4fd172 4248 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4249 thumbnails, ret = [], []
6c4fd172 4250 if write_all or self.params.get('writethumbnail', False):
0202b52a 4251 thumbnails = info_dict.get('thumbnails') or []
88fb9425 4252 if not thumbnails:
c8bc203f 4253 self.to_screen(f'[info] There are no {label} thumbnails to download')
88fb9425 4254 return ret
6c4fd172 4255 multiple = write_all and len(thumbnails) > 1
ec82d85a 4256
80c03fa9 4257 if thumb_filename_base is None:
4258 thumb_filename_base = filename
4259 if thumbnails and not thumb_filename_base:
4260 self.write_debug(f'Skipping writing {label} thumbnail')
4261 return ret
4262
a40e0b37 4263 if thumbnails and not self._ensure_dir_exists(filename):
2acd1d55
R
4264 return None
4265
dd0228ce 4266 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4267 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4268 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4269 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4270 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4271
e04938ab 4272 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4273 if existing_thumb:
aa9369a2 4274 self.to_screen('[info] %s is already present' % (
4275 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4276 t['filepath'] = existing_thumb
4277 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4278 else:
80c03fa9 4279 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4280 try:
227bf1a3 4281 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4282 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4283 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4284 shutil.copyfileobj(uf, thumbf)
80c03fa9 4285 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4286 t['filepath'] = thumb_filename
3158150c 4287 except network_exceptions as err:
3d2623a8 4288 if isinstance(err, HTTPError) and err.status == 404:
ad54c913 4289 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4290 else:
4291 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
dd0228ce 4292 thumbnails.pop(idx)
6c4fd172 4293 if ret and not write_all:
4294 break
0202b52a 4295 return ret