]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[ie/khanacademy] Fix extractors (#9136)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
31215122 3import copy
c305a25c 4import datetime as dt
c1c9a79c 5import errno
31bd3925 6import fileinput
31215122 7import http.cookiejar
8222d8de 8import io
b82f815f 9import itertools
8694c600 10import json
62fec3b2 11import locale
083c9df9 12import operator
8222d8de 13import os
f8271158 14import random
8222d8de
JMF
15import re
16import shutil
6f2287cb 17import string
dca08720 18import subprocess
8222d8de 19import sys
21cd8fae 20import tempfile
8222d8de 21import time
67134eab 22import tokenize
8222d8de 23import traceback
524e2e4f 24import unicodedata
961ea474 25
f8271158 26from .cache import Cache
227bf1a3 27from .compat import functools, urllib # isort: split
ff077926 28from .compat import compat_os_name, urllib_req_to_req
31215122 29from .cookies import LenientSimpleCookie, load_cookies
f8271158 30from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31from .downloader.rtmp import rtmpdump_version
f8271158 32from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 33from .extractor.common import UnsupportedURLIE
f8271158 34from .extractor.openload import PhantomJSwrapper
35from .minicurses import format_text
3d2623a8 36from .networking import HEADRequest, Request, RequestDirector
db7b054a 37from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
227bf1a3 38from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
3d2623a8 43 network_exceptions,
227bf1a3 44)
0b81d4d2 45from .networking.impersonate import ImpersonateRequestHandler
8e40b9d1 46from .plugins import directories as plugin_directories
e756f45b 47from .postprocessor import _PLUGIN_CLASSES as plugin_pps
f8271158 48from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
ca9def71 58 FFmpegVideoConvertorPP,
f8271158 59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61)
ca9def71 62from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
f9fb3ce8
SS
63from .update import (
64 REPOSITORY,
65 _get_system_deprecation,
66 _make_label,
67 current_git_head,
68 detect_variant,
69)
8c25f81b 70from .utils import (
f8271158 71 DEFAULT_OUTTMPL,
7b2c3f47 72 IDENTITY,
f8271158 73 LINK_TEMPLATES,
8dc59305 74 MEDIA_EXTENSIONS,
f8271158 75 NO_DEFAULT,
1d485a1a 76 NUMBER_RE,
f8271158 77 OUTTMPL_TYPES,
78 POSTPROCESS_WHEN,
79 STR_FORMAT_RE_TMPL,
80 STR_FORMAT_TYPES,
81 ContentTooShortError,
82 DateRange,
83 DownloadCancelled,
84 DownloadError,
85 EntryNotInPlaylist,
86 ExistingVideoReached,
87 ExtractorError,
784320c9 88 FormatSorter,
f8271158 89 GeoRestrictedError,
f8271158 90 ISO3166Utils,
91 LazyList,
92 MaxDownloadsReached,
19a03940 93 Namespace,
f8271158 94 PagedList,
7e88d7d7 95 PlaylistEntries,
f8271158 96 Popen,
97 PostProcessingError,
98 ReExtractInfo,
99 RejectedVideoReached,
100 SameFileError,
101 UnavailableVideoError,
693f0600 102 UserNotLive,
0b81d4d2 103 YoutubeDLError,
eedb7ba5 104 age_restricted,
cb794ee0 105 bug_reports_message,
ce02ed60 106 date_from_str,
da4db748 107 deprecation_warning,
ce02ed60 108 determine_ext,
b5559424 109 determine_protocol,
c0384f22 110 encode_compat_str,
ce02ed60 111 encodeFilename,
47cdc68e 112 escapeHTML,
590bc6f6 113 expand_path,
227bf1a3 114 extract_basic_auth,
90137ca4 115 filter_dict,
e29663c6 116 float_or_none,
02dbf93f 117 format_bytes,
e0fd9573 118 format_decimal_suffix,
f8271158 119 format_field,
525ef922 120 formatSeconds,
fc61aff4 121 get_compatible_ext,
0bb322b9 122 get_domain,
c9969434 123 int_or_none,
732044af 124 iri_to_uri,
941e881e 125 is_path_like,
34921b43 126 join_nonempty,
ce02ed60 127 locked_file,
0647d925 128 make_archive_id,
0202b52a 129 make_dir,
ec11a9f4 130 number_of_digits,
cd6fc19e 131 orderedSet,
5314b521 132 orderedSet_from_options,
083c9df9 133 parse_filesize,
ce02ed60 134 preferredencoding,
eedb7ba5 135 prepend_extension,
3efb96a6 136 remove_terminal_sequences,
cfb56d1a 137 render_table,
eedb7ba5 138 replace_extension,
ce02ed60 139 sanitize_filename,
1bb5c511 140 sanitize_path,
dcf77cf1 141 sanitize_url,
ff077926 142 shell_quote,
1211bb6d 143 str_or_none,
e29663c6 144 strftime_or_none,
ce02ed60 145 subtitles_filename,
819e0531 146 supports_terminal_sequences,
b1f94422 147 system_identifier,
86e3b822 148 filesize_from_tbr,
f2ebc5c7 149 timetuple_from_msec,
732044af 150 to_high_limit_path,
324ad820 151 traverse_obj,
fc61aff4 152 try_call,
6033d980 153 try_get,
29eb5174 154 url_basename,
7d1eb38a 155 variadic,
58b1f00d 156 version_tuple,
53973b4d 157 windows_enable_vt_mode,
ce02ed60
PH
158 write_json_file,
159 write_string,
4f026faf 160)
227bf1a3 161from .utils._utils import _YDLLogger
162from .utils.networking import (
163 HTTPHeaderDict,
164 clean_headers,
165 clean_proxies,
3d2623a8 166 std_headers,
227bf1a3 167)
20314dd4 168from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 169
e9c0cdd3
YCH
170if compat_os_name == 'nt':
171 import ctypes
172
2459b6e1 173
86e5f3ed 174class YoutubeDL:
8222d8de
JMF
175 """YoutubeDL class.
176
177 YoutubeDL objects are the ones responsible of downloading the
178 actual video file and writing it to disk if the user has requested
179 it, among some other tasks. In most cases there should be one per
180 program. As, given a video URL, the downloader doesn't know how to
181 extract all the needed information, task that InfoExtractors do, it
182 has to pass the URL to one of them.
183
184 For this, YoutubeDL objects have a method that allows
185 InfoExtractors to be registered in a given order. When it is passed
186 a URL, the YoutubeDL object handles it to the first InfoExtractor it
187 finds that reports being able to handle it. The InfoExtractor extracts
188 all the information about the video or videos the URL refers to, and
189 YoutubeDL process the extracted information, possibly using a File
190 Downloader to download the video.
191
192 YoutubeDL objects accept a lot of parameters. In order not to saturate
193 the object constructor with arguments, it receives a dictionary of
194 options instead. These options are available through the params
195 attribute for the InfoExtractors to use. The YoutubeDL also
196 registers itself as the downloader in charge for the InfoExtractors
197 that are added to it, so this is a "mutual registration".
198
199 Available options:
200
201 username: Username for authentication purposes.
202 password: Password for authentication purposes.
180940e0 203 videopassword: Password for accessing a video.
1da50aa3
S
204 ap_mso: Adobe Pass multiple-system operator identifier.
205 ap_username: Multiple-system operator account username.
206 ap_password: Multiple-system operator account password.
8222d8de 207 usenetrc: Use netrc for authentication instead.
c8bc203f 208 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
db3ad8a6 209 netrc_cmd: Use a shell command to get credentials
8222d8de
JMF
210 verbose: Print additional info to stdout.
211 quiet: Do not print messages to stdout.
ad8915b7 212 no_warnings: Do not print out anything for warnings.
bb66c247 213 forceprint: A dict with keys WHEN mapped to a list of templates to
214 print to stdout. The allowed keys are video or any of the
215 items in utils.POSTPROCESS_WHEN.
ca30f449 216 For compatibility, a single list is also accepted
bb66c247 217 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
218 a list of tuples with (template, filename)
8694c600 219 forcejson: Force printing info_dict as JSON.
63e0be34
PH
220 dump_single_json: Force printing the info_dict of the whole playlist
221 (or video) as a single JSON line.
c25228e5 222 force_write_download_archive: Force writing download archive regardless
223 of 'skip_download' or 'simulate'.
b7b04c78 224 simulate: Do not download the video files. If unset (or None),
225 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 226 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 227 You can also pass a function. The function takes 'ctx' as
228 argument and returns the formats to download.
229 See "build_format_selector" for an implementation
63ad4d43 230 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 231 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
232 extracting metadata even if the video is not actually
233 available for download (experimental)
0930b11f 234 format_sort: A list of fields by which to sort the video formats.
235 See "Sorting Formats" for more details.
c25228e5 236 format_sort_force: Force the given format_sort. see "Sorting Formats"
237 for more details.
08d30158 238 prefer_free_formats: Whether to prefer video formats with free containers
239 over non-free ones of same quality.
c25228e5 240 allow_multiple_video_streams: Allow multiple video streams to be merged
241 into a single file
242 allow_multiple_audio_streams: Allow multiple audio streams to be merged
243 into a single file
0ba692ac 244 check_formats Whether to test if the formats are downloadable.
9f1a1c36 245 Can be True (check all), False (check none),
246 'selected' (check selected formats),
0ba692ac 247 or None (check only if requested by extractor)
4524baf0 248 paths: Dictionary of output paths. The allowed keys are 'home'
5ca095cb 249 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
de6000d9 250 outtmpl: Dictionary of templates for output names. Allowed keys
5ca095cb 251 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
34488702 252 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
253 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
254 restrictfilenames: Do not allow "&" and spaces in file names
255 trim_file_name: Limit length of filename (extension excluded)
4524baf0 256 windowsfilenames: Force the filenames to be windows compatible
b1940459 257 ignoreerrors: Do not stop on download/postprocessing errors.
258 Can be 'only_download' to ignore only download errors.
259 Default is 'only_download' for CLI, but False for API
26e2805c 260 skip_playlist_after_errors: Number of allowed failures until the rest of
261 the playlist is skipped
fe7866d0 262 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 263 overwrites: Overwrite all video and metadata files if True,
264 overwrite only non-video files if None
265 and don't overwrite any file if False
c14e88f0 266 playlist_items: Specific indices of playlist to download.
75822ca7 267 playlistrandom: Download playlist items in random order.
7e9a6125 268 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
8bf9319e 271 logger: Log messages to a logging.Logger instance.
17ffed18 272 logtostderr: Print everything to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
ad54c913 276 clean_infojson: Remove internal metadata from the infojson
34488702 277 getcomments: Extract video comments. This will not be written to disk
06167fbb 278 unless writeinfojson is also given
1fb07d10 279 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 280 writethumbnail: Write the thumbnail image to a file
c25228e5 281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
ec82d85a 283 write_all_thumbnails: Write all thumbnail formats to files
732044af 284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 289 writesubtitles: Write the video subtitles to a file
741dd8ea 290 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 291 listsubtitles: Lists all available subtitles for the video
a504ced0 292 subtitlesformat: The format code for subtitles
c32b0aab 293 subtitleslangs: List of languages of the subtitles to download (can be regex).
294 The list may contain "all" to refer to all the available
295 subtitles. The language can be prefixed with a "-" to
62b58c09 296 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de 297 keepvideo: Keep the video file after post-processing
46f1370e 298 daterange: A utils.DateRange object, download only if the upload_date is in the range.
8222d8de 299 skip_download: Skip the actual download of the video file
c35f9e72 300 cachedir: Location of the cache files in the filesystem.
a0e07d31 301 False to disable filesystem cache.
47192f92 302 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
303 age_limit: An integer representing the user's age in years.
304 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
305 min_views: An integer representing the minimum view count the video
306 must have in order to not be skipped.
307 Videos without view count information are always
308 downloaded. None for no limit.
309 max_views: An integer representing the maximum view count.
310 Videos that are more popular than that are not
311 downloaded.
312 Videos without view count information are always
313 downloaded. None for no limit.
ae103564 314 download_archive: A set, or the name of a file where all downloads are recorded.
315 Videos already present in the file are not downloaded again.
8a51f564 316 break_on_existing: Stop the download process after attempting to download a
317 file that is in the archive.
b222c271 318 break_per_url: Whether break_on_reject and break_on_existing
319 should act on each input URL as opposed to for the entire queue
d76fa1f3 320 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 321 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 322 name/path from where cookies are loaded, the name of the keyring,
323 and the container name, e.g. ('chrome', ) or
324 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 325 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
326 support RFC 5746 secure renegotiation
f59f5ef8 327 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 328 client_certificate: Path to client certificate file in PEM format. May include the private key
329 client_certificate_key: Path to private key file for client certificate
330 client_certificate_password: Password for client certificate private key, if encrypted.
331 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 332 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 333 (Only supported by some extractors)
8300774c 334 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
8b7539d2 335 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 336 proxy: URL of the proxy server to use
38cce791 337 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 338 on geo-restricted sites.
e344693b 339 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
340 bidi_workaround: Work around buggy terminals without bidirectional text
341 support, using fridibi
a0ddb8a2 342 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
62fec3b2 345 encoding: Use this encoding instead of the system-specified.
134c913c 346 extract_flat: Whether to resolve and process url_results further
46f1370e 347 * False: Always process. Default for API
134c913c 348 * True: Never process
349 * 'in_playlist': Do not process inside playlist/multi_video
350 * 'discard': Always process, but don't return the result
351 from inside playlist/multi_video
352 * 'discard_in_playlist': Same as "discard", but only for
46f1370e 353 playlists (not multi_video). Default for CLI
f2ebc5c7 354 wait_for_video: If given, wait for scheduled streams to become available.
355 The value should be a tuple containing the range
356 (min_secs, max_secs) to wait between retries
4f026faf 357 postprocessors: A list of dictionaries, each with an entry
71b640cc 358 * key: The name of the postprocessor. See
7a5c1cfe 359 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 360 * when: When to run the postprocessor. Allowed values are
361 the entries of utils.POSTPROCESS_WHEN
56d868db 362 Assumed to be 'post_process' if not given
71b640cc
PH
363 progress_hooks: A list of functions that get called on download
364 progress, with a dictionary with the entries
5cda4eda 365 * status: One of "downloading", "error", or "finished".
ee69b99a 366 Check this first and ignore unknown values.
3ba7740d 367 * info_dict: The extracted info_dict
71b640cc 368
5cda4eda 369 If status is one of "downloading", or "finished", the
ee69b99a
PH
370 following properties may also be present:
371 * filename: The final filename (always present)
5cda4eda 372 * tmpfilename: The filename we're currently writing to
71b640cc
PH
373 * downloaded_bytes: Bytes on disk
374 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
375 * total_bytes_estimate: Guess of the eventual file size,
376 None if unavailable.
377 * elapsed: The number of seconds since download started.
71b640cc
PH
378 * eta: The estimated time in seconds, None if unknown
379 * speed: The download speed in bytes/second, None if
380 unknown
5cda4eda
PH
381 * fragment_index: The counter of the currently
382 downloaded video fragment.
383 * fragment_count: The number of fragments (= individual
384 files that will be merged)
71b640cc
PH
385
386 Progress hooks are guaranteed to be called at least once
387 (with status "finished") if the download is successful.
819e0531 388 postprocessor_hooks: A list of functions that get called on postprocessing
389 progress, with a dictionary with the entries
390 * status: One of "started", "processing", or "finished".
391 Check this first and ignore unknown values.
392 * postprocessor: Name of the postprocessor
393 * info_dict: The extracted info_dict
394
395 Progress hooks are guaranteed to be called at least twice
396 (with status "started" and "finished") if the processing is successful.
fc61aff4 397 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 398 final_ext: Expected final extension; used to detect when the file was
59a7a13e 399 already downloaded and converted
6271f1ca
PH
400 fixup: Automatically correct known faults of the file.
401 One of:
402 - "never": do nothing
403 - "warn": only emit a warning
404 - "detect_or_warn": check whether we can do anything
62cd676c 405 about it, warn otherwise (default)
504f20dd 406 source_address: Client-side IP address to bind to.
0b81d4d2 407 impersonate: Client to impersonate for requests.
408 An ImpersonateTarget (from yt_dlp.networking.impersonate)
1cf376f5 409 sleep_interval_requests: Number of seconds to sleep between requests
410 during extraction
7aa589a5
S
411 sleep_interval: Number of seconds to sleep before each download when
412 used alone or a lower bound of a range for randomized
413 sleep before each download (minimum possible number
414 of seconds to sleep) when used along with
415 max_sleep_interval.
416 max_sleep_interval:Upper bound of a range for randomized sleep before each
417 download (maximum possible number of seconds to sleep).
418 Must only be used along with sleep_interval.
419 Actual sleep time will be a random float from range
420 [sleep_interval; max_sleep_interval].
1cf376f5 421 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
422 listformats: Print an overview of available video formats and exit.
423 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 424 match_filter: A function that gets called for every video with the signature
425 (info_dict, *, incomplete: bool) -> Optional[str]
426 For backward compatibility with youtube-dl, the signature
427 (info_dict) -> Optional[str] is also allowed.
428 - If it returns a message, the video is ignored.
429 - If it returns None, the video is downloaded.
430 - If it returns utils.NO_DEFAULT, the user is interactively
431 asked whether to download the video.
fe2ce85a 432 - Raise utils.DownloadCancelled(msg) to abort remaining
433 downloads when a video is rejected.
5ca095cb 434 match_filter_func in utils/_utils.py is one example for this.
8417f26b
SS
435 color: A Dictionary with output stream names as keys
436 and their respective color policy as values.
437 Can also just be a single color policy,
438 in which case it applies to all outputs.
439 Valid stream names are 'stdout' and 'stderr'.
440 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
0a840f58 441 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 442 HTTP header
0a840f58 443 geo_bypass_country:
773f291d
S
444 Two-letter ISO 3166-2 country code that will be used for
445 explicit geographic restriction bypassing via faking
504f20dd 446 X-Forwarded-For HTTP header
5f95927a
S
447 geo_bypass_ip_block:
448 IP range in CIDR notation that will be used similarly to
504f20dd 449 geo_bypass_country
52a8a1e1 450 external_downloader: A dictionary of protocol keys and the executable of the
451 external downloader to use for it. The allowed protocols
452 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
453 Set the value to 'native' to use the native downloader
53ed7066 454 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 455 The following options do not work when used through the API:
b5ae35ee 456 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 457 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 458 Refer __init__.py for their implementation
819e0531 459 progress_template: Dictionary of templates for progress outputs.
460 Allowed keys are 'download', 'postprocess',
461 'download-title' (console title) and 'postprocess-title'.
462 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 463 retry_sleep_functions: Dictionary of functions that takes the number of attempts
464 as argument and returns the time to sleep in seconds.
465 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
466 download_ranges: A callback function that gets called for every video with
467 the signature (info_dict, ydl) -> Iterable[Section].
468 Only the returned sections will be downloaded.
469 Each Section is a dict with the following keys:
5ec1b6b7 470 * start_time: Start time of the section in seconds
471 * end_time: End time of the section in seconds
472 * title: Section title (Optional)
473 * index: Section number (Optional)
0f446365 474 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 475 noprogress: Do not print the progress bar
a831c2ea 476 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 477
8222d8de 478 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 479 the downloader (see yt_dlp/downloader/common.py):
51d9739f 480 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 481 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 482 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
9590cc6b 483 external_downloader_args, concurrent_fragment_downloads, progress_delta.
76b1bd67
JMF
484
485 The following options are used by the post processors:
c0b7d117
S
486 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
487 to the binary or its containing directory.
43820c03 488 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 489 and a list of additional command-line arguments for the
490 postprocessor/executable. The dict can also have "PP+EXE" keys
491 which are used when the given exe is used by the given PP.
492 Use 'default' as the name for arguments to passed to all PP
493 For compatibility with youtube-dl, a single list of args
494 can also be used
e409895f 495
496 The following options are used by the extractors:
46f1370e 497 extractor_retries: Number of times to retry for known errors (default: 3)
62bff2c1 498 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 499 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 500 discontinuities such as ad breaks (default: False)
5d3a0e79 501 extractor_args: A dictionary of arguments to be passed to the extractors.
502 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 503 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 504 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 505
506 The following options are deprecated and may be removed in the future:
507
fe2ce85a 508 break_on_reject: Stop the download process when encountering a video that
509 has been filtered out.
510 - `raise DownloadCancelled(msg)` in match_filter instead
fe7866d0 511 force_generic_extractor: Force downloader to use the generic extractor
512 - Use allowed_extractors = ['generic', 'default']
7e9a6125 513 playliststart: - Use playlist_items
514 Playlist item to start at.
515 playlistend: - Use playlist_items
516 Playlist item to end at.
517 playlistreverse: - Use playlist_items
518 Download playlist items in reverse order.
1890fc63 519 forceurl: - Use forceprint
520 Force printing final URL.
521 forcetitle: - Use forceprint
522 Force printing title.
523 forceid: - Use forceprint
524 Force printing ID.
525 forcethumbnail: - Use forceprint
526 Force printing thumbnail URL.
527 forcedescription: - Use forceprint
528 Force printing description.
529 forcefilename: - Use forceprint
530 Force printing final filename.
531 forceduration: - Use forceprint
532 Force printing duration.
533 allsubtitles: - Use subtitleslangs = ['all']
534 Downloads all the subtitles of the video
535 (requires writesubtitles or writeautomaticsub)
536 include_ads: - Doesn't work
537 Download ads as well
538 call_home: - Not implemented
539 Boolean, true iff we are allowed to contact the
540 yt-dlp servers for debugging.
541 post_hooks: - Register a custom postprocessor
542 A list of functions that get called as the final step
543 for each video file, after all postprocessors have been
544 called. The filename will be passed as the only argument.
545 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
546 Use the native HLS downloader instead of ffmpeg/avconv
547 if True, otherwise use ffmpeg/avconv if False, otherwise
548 use downloader suggested by extractor if None.
549 prefer_ffmpeg: - avconv support is deprecated
550 If False, use avconv instead of ffmpeg if both are available,
551 otherwise prefer ffmpeg.
552 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 553 If True (default), DASH manifests and related
62bff2c1 554 data will be downloaded and processed by extractor.
555 You can reduce network I/O by disabling it if you don't
556 care about DASH. (only for youtube)
1890fc63 557 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 558 If True (default), HLS manifests and related
62bff2c1 559 data will be downloaded and processed by extractor.
560 You can reduce network I/O by disabling it if you don't
561 care about HLS. (only for youtube)
8417f26b 562 no_color: Same as `color='no_color'`
6148833f 563 no_overwrites: Same as `overwrites=False`
8222d8de
JMF
564 """
565
86e5f3ed 566 _NUMERIC_FIELDS = {
b8ed0f15 567 'width', 'height', 'asr', 'audio_channels', 'fps',
568 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 569 'timestamp', 'release_timestamp',
c9969434
S
570 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
571 'average_rating', 'comment_count', 'age_limit',
572 'start_time', 'end_time',
573 'chapter_number', 'season_number', 'episode_number',
574 'track_number', 'disc_number', 'release_year',
86e5f3ed 575 }
c9969434 576
6db9c4d5 577 _format_fields = {
578 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 579 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 580 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 581 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
615a8444 582 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data',
62b5c94c 583 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
7e68567e 584 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
add96eb9 585 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
6db9c4d5 586 }
104a7b5a
L
587 _deprecated_multivalue_fields = {
588 'album_artist': 'album_artists',
589 'artist': 'artists',
590 'composer': 'composers',
591 'creator': 'creators',
592 'genre': 'genres',
593 }
48ee10ee 594 _format_selection_exts = {
8dc59305 595 'audio': set(MEDIA_EXTENSIONS.common_audio),
add96eb9 596 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'},
8dc59305 597 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 598 }
599
3511266b 600 def __init__(self, params=None, auto_init=True):
883d4b1e 601 """Create a FileDownloader object with the given options.
602 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 603 Set to 'no_verbose_header' to not print the header
883d4b1e 604 """
e9f9a10f
JMF
605 if params is None:
606 params = {}
592b7485 607 self.params = params
8b7491c8 608 self._ies = {}
56c73665 609 self._ies_instances = {}
1e43a6f7 610 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 611 self._printed_messages = set()
1cf376f5 612 self._first_webpage_request = True
ab8e5e51 613 self._post_hooks = []
933605d7 614 self._progress_hooks = []
819e0531 615 self._postprocessor_hooks = []
8222d8de
JMF
616 self._download_retcode = 0
617 self._num_downloads = 0
9c906919 618 self._num_videos = 0
592b7485 619 self._playlist_level = 0
620 self._playlist_urls = set()
a0e07d31 621 self.cache = Cache(self)
6148833f 622 self.__header_cookies = []
34308b30 623
591bb9d3 624 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
625 self._out_files = Namespace(
626 out=stdout,
627 error=sys.stderr,
628 screen=sys.stderr if self.params.get('quiet') else stdout,
629 console=None if compat_os_name == 'nt' else next(
add96eb9 630 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None),
591bb9d3 631 )
f0795149 632
633 try:
634 windows_enable_vt_mode()
635 except Exception as e:
636 self.write_debug(f'Failed to enable VT mode: {e}')
637
8417f26b
SS
638 if self.params.get('no_color'):
639 if self.params.get('color') is not None:
62b5c94c 640 self.params.setdefault('_warnings', []).append(
641 'Overwriting params from "color" with "no_color"')
8417f26b
SS
642 self.params['color'] = 'no_color'
643
a0b19d31 644 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
645 no_color = bool(os.getenv('NO_COLOR'))
8417f26b
SS
646
647 def process_color_policy(stream):
648 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
649 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
650 if policy in ('auto', None):
a0b19d31 651 if term_allow_color and supports_terminal_sequences(stream):
652 return 'no_color' if no_color else True
653 return False
6148833f 654 assert policy in ('always', 'never', 'no_color'), policy
8417f26b
SS
655 return {'always': True, 'never': False}.get(policy, policy)
656
591bb9d3 657 self._allow_colors = Namespace(**{
8417f26b
SS
658 name: process_color_policy(stream)
659 for name, stream in self._out_files.items_ if name != 'console'
591bb9d3 660 })
819e0531 661
61bdf15f
SS
662 system_deprecation = _get_system_deprecation()
663 if system_deprecation:
664 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
a61f4b28 665
88acdbc2 666 if self.params.get('allow_unplayable_formats'):
667 self.report_warning(
ec11a9f4 668 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 669 'This is a developer option intended for debugging. \n'
670 ' If you experience any issues while using this option, '
ec11a9f4 671 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 672
497074f0 673 if self.params.get('bidi_workaround', False):
674 try:
675 import pty
676 master, slave = pty.openpty()
677 width = shutil.get_terminal_size().columns
678 width_args = [] if width is None else ['-w', str(width)]
679 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
680 try:
add96eb9 681 self._output_process = Popen(['bidiv', *width_args], **sp_kwargs)
497074f0 682 except OSError:
add96eb9 683 self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs)
497074f0 684 self._output_channel = os.fdopen(master, 'rb')
685 except OSError as ose:
686 if ose.errno == errno.ENOENT:
687 self.report_warning(
688 'Could not find fribidi executable, ignoring --bidi-workaround. '
689 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
690 else:
691 raise
692
693 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
227bf1a3 694 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
6c5211ce 695 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
696 self.params['http_headers'].pop('Cookie', None)
697
497074f0 698 if auto_init and auto_init != 'no_verbose_header':
699 self.print_debug_header()
700
be5df5ee
S
701 def check_deprecated(param, option, suggestion):
702 if self.params.get(param) is not None:
86e5f3ed 703 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
704 return True
705 return False
706
707 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
708 if self.params.get('geo_verification_proxy') is None:
709 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
710
0d1bb027 711 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
712 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 713 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 714
49a57e70 715 for msg in self.params.get('_warnings', []):
0d1bb027 716 self.report_warning(msg)
ee8dd27a 717 for msg in self.params.get('_deprecation_warnings', []):
da4db748 718 self.deprecated_feature(msg)
0d1bb027 719
0b81d4d2 720 if impersonate_target := self.params.get('impersonate'):
721 if not self._impersonate_target_available(impersonate_target):
722 raise YoutubeDLError(
723 f'Impersonate target "{impersonate_target}" is not available. '
724 f'Use --list-impersonate-targets to see available targets. '
725 f'You may be missing dependencies required to support this target.')
726
8a82af35 727 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 728 self.params['listformats_table'] = False
729
b5ae35ee 730 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 731 # nooverwrites was unnecessarily changed to overwrites
732 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
733 # This ensures compatibility with both keys
734 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 735 elif self.params.get('overwrites') is None:
736 self.params.pop('overwrites', None)
b868936c 737 else:
738 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 739
e4221b70 740 if self.params.get('simulate') is None and any((
741 self.params.get('list_thumbnails'),
742 self.params.get('listformats'),
743 self.params.get('listsubtitles'),
744 )):
745 self.params['simulate'] = 'list_only'
746
455a15e2 747 self.params.setdefault('forceprint', {})
748 self.params.setdefault('print_to_file', {})
bb66c247 749
750 # Compatibility with older syntax
ca30f449 751 if not isinstance(params['forceprint'], dict):
455a15e2 752 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 753
97ec5bc5 754 if auto_init:
97ec5bc5 755 self.add_default_info_extractors()
756
3089bc74
S
757 if (sys.platform != 'win32'
758 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 759 and not self.params.get('restrictfilenames', False)):
e9137224 760 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 761 self.report_warning(
6febd1c1 762 'Assuming --restrict-filenames since file system encoding '
1b725173 763 'cannot encode all characters. '
6febd1c1 764 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 765 self.params['restrictfilenames'] = True
34308b30 766
bf1824b3 767 self._parse_outtmpl()
486dd09e 768
187986a8 769 # Creating format selector here allows us to catch syntax errors before the extraction
770 self.format_selector = (
fa9f30b8 771 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 772 else self.params['format'] if callable(self.params['format'])
187986a8 773 else self.build_format_selector(self.params['format']))
774
013b50b7 775 hooks = {
776 'post_hooks': self.add_post_hook,
777 'progress_hooks': self.add_progress_hook,
778 'postprocessor_hooks': self.add_postprocessor_hook,
779 }
780 for opt, fn in hooks.items():
781 for ph in self.params.get(opt, []):
782 fn(ph)
71b640cc 783
5bfc8bee 784 for pp_def_raw in self.params.get('postprocessors', []):
785 pp_def = dict(pp_def_raw)
786 when = pp_def.pop('when', 'post_process')
787 self.add_post_processor(
f9934b96 788 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 789 when=when)
790
ed39cac5 791 def preload_download_archive(fn):
792 """Preload the archive, if any is specified"""
ae103564 793 archive = set()
ed39cac5 794 if fn is None:
ae103564 795 return archive
941e881e 796 elif not is_path_like(fn):
ae103564 797 return fn
798
49a57e70 799 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 800 try:
801 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
802 for line in archive_file:
ae103564 803 archive.add(line.strip())
86e5f3ed 804 except OSError as ioe:
ed39cac5 805 if ioe.errno != errno.ENOENT:
806 raise
ae103564 807 return archive
ed39cac5 808
ae103564 809 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 810
7d4111ed
PH
811 def warn_if_short_id(self, argv):
812 # short YouTube ID starting with dash?
813 idxs = [
814 i for i, a in enumerate(argv)
815 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
816 if idxs:
817 correct_argv = (
7a5c1cfe 818 ['yt-dlp']
3089bc74
S
819 + [a for i, a in enumerate(argv) if i not in idxs]
820 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
821 )
822 self.report_warning(
823 'Long argument string detected. '
add96eb9 824 f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}')
7d4111ed 825
8222d8de
JMF
826 def add_info_extractor(self, ie):
827 """Add an InfoExtractor object to the end of the list."""
8b7491c8 828 ie_key = ie.ie_key()
829 self._ies[ie_key] = ie
e52d7f85 830 if not isinstance(ie, type):
8b7491c8 831 self._ies_instances[ie_key] = ie
e52d7f85 832 ie.set_downloader(self)
8222d8de 833
56c73665
JMF
834 def get_info_extractor(self, ie_key):
835 """
836 Get an instance of an IE with name ie_key, it will try to get one from
837 the _ies list, if there's no instance it will create a new one and add
838 it to the extractor list.
839 """
840 ie = self._ies_instances.get(ie_key)
841 if ie is None:
842 ie = get_info_extractor(ie_key)()
843 self.add_info_extractor(ie)
844 return ie
845
023fa8c4
JMF
846 def add_default_info_extractors(self):
847 """
848 Add the InfoExtractors returned by gen_extractors to the end of the list
849 """
fe7866d0 850 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
851 all_ies['end'] = UnsupportedURLIE()
852 try:
853 ie_names = orderedSet_from_options(
854 self.params.get('allowed_extractors', ['default']), {
855 'all': list(all_ies),
856 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
857 }, use_regex=True)
858 except re.error as e:
859 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
860 for name in ie_names:
861 self.add_info_extractor(all_ies[name])
862 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 863
56d868db 864 def add_post_processor(self, pp, when='post_process'):
8222d8de 865 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 866 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 867 self._pps[when].append(pp)
8222d8de
JMF
868 pp.set_downloader(self)
869
ab8e5e51
AM
870 def add_post_hook(self, ph):
871 """Add the post hook"""
872 self._post_hooks.append(ph)
873
933605d7 874 def add_progress_hook(self, ph):
819e0531 875 """Add the download progress hook"""
933605d7 876 self._progress_hooks.append(ph)
8ab470f1 877
819e0531 878 def add_postprocessor_hook(self, ph):
879 """Add the postprocessing progress hook"""
880 self._postprocessor_hooks.append(ph)
5bfc8bee 881 for pps in self._pps.values():
882 for pp in pps:
883 pp.add_progress_hook(ph)
819e0531 884
1c088fa8 885 def _bidi_workaround(self, message):
5d681e96 886 if not hasattr(self, '_output_channel'):
1c088fa8
PH
887 return message
888
5d681e96 889 assert hasattr(self, '_output_process')
14f25df2 890 assert isinstance(message, str)
6febd1c1 891 line_count = message.count('\n') + 1
0f06bcd7 892 self._output_process.stdin.write((message + '\n').encode())
5d681e96 893 self._output_process.stdin.flush()
0f06bcd7 894 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 895 for _ in range(line_count))
6febd1c1 896 return res[:-len('\n')]
1c088fa8 897
b35496d8 898 def _write_string(self, message, out=None, only_once=False):
899 if only_once:
900 if message in self._printed_messages:
901 return
902 self._printed_messages.add(message)
903 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 904
cf4f42cb 905 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 906 """Print message to stdout"""
cf4f42cb 907 if quiet is not None:
da4db748 908 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
909 'Use "YoutubeDL.to_screen" instead')
8a82af35 910 if skip_eol is not False:
da4db748 911 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
912 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 913 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 914
dfea94f8 915 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 916 """Print message to screen if not in quiet mode"""
8bf9319e 917 if self.params.get('logger'):
43afe285 918 self.params['logger'].debug(message)
cf4f42cb 919 return
920 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
921 return
922 self._write_string(
add96eb9 923 '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 924 self._out_files.screen, only_once=only_once)
8222d8de 925
b35496d8 926 def to_stderr(self, message, only_once=False):
0760b0a7 927 """Print message to stderr"""
14f25df2 928 assert isinstance(message, str)
8bf9319e 929 if self.params.get('logger'):
43afe285
IB
930 self.params['logger'].error(message)
931 else:
5792c950 932 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 933
934 def _send_console_code(self, code):
591bb9d3 935 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 936 return
591bb9d3 937 self._write_string(code, self._out_files.console)
8222d8de 938
1e5b9a95
PH
939 def to_console_title(self, message):
940 if not self.params.get('consoletitle', False):
941 return
3efb96a6 942 message = remove_terminal_sequences(message)
4bede0d8
C
943 if compat_os_name == 'nt':
944 if ctypes.windll.kernel32.GetConsoleWindow():
945 # c_wchar_p() might not be necessary if `message` is
946 # already of type unicode()
947 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 948 else:
949 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 950
bdde425c 951 def save_console_title(self):
cf4f42cb 952 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 953 return
592b7485 954 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
955
956 def restore_console_title(self):
cf4f42cb 957 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 958 return
592b7485 959 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
960
961 def __enter__(self):
962 self.save_console_title()
963 return self
964
c365dba8 965 def save_cookies(self):
dca08720 966 if self.params.get('cookiefile') is not None:
62b5c94c 967 self.cookiejar.save()
bdde425c 968
c365dba8 969 def __exit__(self, *args):
970 self.restore_console_title()
227bf1a3 971 self.close()
972
973 def close(self):
c365dba8 974 self.save_cookies()
dbd8b1bf
SS
975 if '_request_director' in self.__dict__:
976 self._request_director.close()
977 del self._request_director
c365dba8 978
fa9f30b8 979 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
980 """Determine action to take when a download problem appears.
981
982 Depending on if the downloader has been configured to ignore
983 download errors or not, this method may throw an exception or
984 not when errors are found, after printing the message.
985
fa9f30b8 986 @param tb If given, is additional traceback information
987 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
988 """
989 if message is not None:
990 self.to_stderr(message)
991 if self.params.get('verbose'):
992 if tb is None:
993 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 994 tb = ''
8222d8de 995 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 996 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 997 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
998 else:
999 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 1000 tb = ''.join(tb_data)
c19bc311 1001 if tb:
1002 self.to_stderr(tb)
fa9f30b8 1003 if not is_error:
1004 return
b1940459 1005 if not self.params.get('ignoreerrors'):
8222d8de
JMF
1006 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
1007 exc_info = sys.exc_info()[1].exc_info
1008 else:
1009 exc_info = sys.exc_info()
1010 raise DownloadError(message, exc_info)
1011 self._download_retcode = 1
1012
19a03940 1013 Styles = Namespace(
1014 HEADERS='yellow',
1015 EMPHASIS='light blue',
492272fe 1016 FILENAME='green',
19a03940 1017 ID='green',
1018 DELIM='blue',
1019 ERROR='red',
bc344cd4 1020 BAD_FORMAT='light red',
19a03940 1021 WARNING='yellow',
1022 SUPPRESS='light black',
1023 )
ec11a9f4 1024
7578d77d 1025 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 1026 text = str(text)
ec11a9f4 1027 if test_encoding:
1028 original_text = text
5c104538 1029 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1030 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 1031 text = text.encode(encoding, 'ignore').decode(encoding)
1032 if fallback is not None and text != original_text:
1033 text = fallback
8417f26b 1034 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
ec11a9f4 1035
591bb9d3 1036 def _format_out(self, *args, **kwargs):
1037 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1038
ec11a9f4 1039 def _format_screen(self, *args, **kwargs):
591bb9d3 1040 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 1041
1042 def _format_err(self, *args, **kwargs):
591bb9d3 1043 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 1044
c84aeac6 1045 def report_warning(self, message, only_once=False):
add96eb9 1046 """
8222d8de
JMF
1047 Print the message to stderr, it will be prefixed with 'WARNING:'
1048 If stderr is a tty file the 'WARNING:' will be colored
add96eb9 1049 """
6d07ce01
JMF
1050 if self.params.get('logger') is not None:
1051 self.params['logger'].warning(message)
8222d8de 1052 else:
ad8915b7
PH
1053 if self.params.get('no_warnings'):
1054 return
ec11a9f4 1055 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 1056
da4db748 1057 def deprecation_warning(self, message, *, stacklevel=0):
1058 deprecation_warning(
1059 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1060
1061 def deprecated_feature(self, message):
ee8dd27a 1062 if self.params.get('logger') is not None:
da4db748 1063 self.params['logger'].warning(f'Deprecated Feature: {message}')
1064 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1065
fa9f30b8 1066 def report_error(self, message, *args, **kwargs):
add96eb9 1067 """
8222d8de
JMF
1068 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1069 in red if stderr is a tty file.
add96eb9 1070 """
fa9f30b8 1071 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1072
b35496d8 1073 def write_debug(self, message, only_once=False):
add96eb9 1074 """Log debug message or Print message to stderr"""
0760b0a7 1075 if not self.params.get('verbose', False):
1076 return
8a82af35 1077 message = f'[debug] {message}'
0760b0a7 1078 if self.params.get('logger'):
1079 self.params['logger'].debug(message)
1080 else:
b35496d8 1081 self.to_stderr(message, only_once)
0760b0a7 1082
8222d8de
JMF
1083 def report_file_already_downloaded(self, file_name):
1084 """Report file has already been fully downloaded."""
1085 try:
add96eb9 1086 self.to_screen(f'[download] {file_name} has already been downloaded')
ce02ed60 1087 except UnicodeEncodeError:
6febd1c1 1088 self.to_screen('[download] The file has already been downloaded')
8222d8de 1089
0c3d0f51 1090 def report_file_delete(self, file_name):
1091 """Report that existing file will be deleted."""
1092 try:
add96eb9 1093 self.to_screen(f'Deleting existing file {file_name}')
0c3d0f51 1094 except UnicodeEncodeError:
c25228e5 1095 self.to_screen('Deleting existing file')
0c3d0f51 1096
319b6059 1097 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1098 has_drm = info.get('_has_drm')
319b6059 1099 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1100 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1101 if forced or not ignored:
1151c407 1102 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1103 expected=has_drm or ignored or expected)
88acdbc2 1104 else:
1105 self.report_warning(msg)
1106
de6000d9 1107 def parse_outtmpl(self):
bf1824b3 1108 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1109 self._parse_outtmpl()
1110 return self.params['outtmpl']
1111
1112 def _parse_outtmpl(self):
7b2c3f47 1113 sanitize = IDENTITY
bf1824b3 1114 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1115 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1116
1117 outtmpl = self.params.setdefault('outtmpl', {})
1118 if not isinstance(outtmpl, dict):
1119 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1120 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1121
21cd8fae 1122 def get_output_path(self, dir_type='', filename=None):
1123 paths = self.params.get('paths', {})
d2c8aadf 1124 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1125 path = os.path.join(
1126 expand_path(paths.get('home', '').strip()),
1127 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1128 filename or '')
21cd8fae 1129 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1130
76a264ac 1131 @staticmethod
901130bb 1132 def _outtmpl_expandpath(outtmpl):
1133 # expand_path translates '%%' into '%' and '$$' into '$'
1134 # correspondingly that is not what we want since we need to keep
1135 # '%%' intact for template dict substitution step. Working around
1136 # with boundary-alike separator hack.
6f2287cb 1137 sep = ''.join(random.choices(string.ascii_letters, k=32))
86e5f3ed 1138 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1139
1140 # outtmpl should be expand_path'ed before template dict substitution
1141 # because meta fields may contain env variables we don't want to
62b58c09 1142 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1143 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1144 return expand_path(outtmpl).replace(sep, '')
1145
1146 @staticmethod
1147 def escape_outtmpl(outtmpl):
add96eb9 1148 """ Escape any remaining strings like %s, %abc% etc. """
901130bb 1149 return re.sub(
1150 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1151 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1152 outtmpl)
1153
1154 @classmethod
1155 def validate_outtmpl(cls, outtmpl):
add96eb9 1156 """ @return None or Exception object """
7d1eb38a 1157 outtmpl = re.sub(
47cdc68e 1158 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1159 lambda mobj: f'{mobj.group(0)[:-1]}s',
1160 cls._outtmpl_expandpath(outtmpl))
76a264ac 1161 try:
7d1eb38a 1162 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1163 return None
1164 except ValueError as err:
1165 return err
1166
03b4de72 1167 @staticmethod
1168 def _copy_infodict(info_dict):
1169 info_dict = dict(info_dict)
09b49e1f 1170 info_dict.pop('__postprocessors', None)
415f8d51 1171 info_dict.pop('__pending_error', None)
03b4de72 1172 return info_dict
1173
e0fd9573 1174 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1175 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1176 @param sanitize Whether to sanitize the output as a filename.
1177 For backward compatibility, a function can also be passed
1178 """
1179
6e84b215 1180 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1181
03b4de72 1182 info_dict = self._copy_infodict(info_dict)
752cda38 1183 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1184 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1185 if info_dict.get('duration', None) is not None
1186 else None)
1d485a1a 1187 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1188 info_dict['video_autonumber'] = self._num_videos
752cda38 1189 if info_dict.get('resolution') is None:
1190 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1191
e6f21b3d 1192 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1193 # of %(field)s to %(field)0Nd for backward compatibility
1194 field_size_compat_map = {
0a5a191a 1195 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1196 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1197 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1198 }
752cda38 1199
385a27fa 1200 TMPL_DICT = {}
47cdc68e 1201 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1202 MATH_FUNCTIONS = {
1203 '+': float.__add__,
1204 '-': float.__sub__,
993edd3f 1205 '*': float.__mul__,
385a27fa 1206 }
e625be0d 1207 # Field is of the form key1.key2...
07a1250e 1208 # where keys (except first) can be string, int, slice or "{field, ...}"
add96eb9 1209 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031
1210 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031
07a1250e 1211 'inner': FIELD_INNER_RE,
add96eb9 1212 'field': rf'\w*(?:\.{FIELD_INNER_RE})*',
07a1250e 1213 }
1d485a1a 1214 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
add96eb9 1215 MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys())))
78fde6e3 1216 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
e625be0d 1217 (?P<negate>-)?
1d485a1a 1218 (?P<fields>{FIELD_RE})
1219 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1220 (?:>(?P<strf_format>.+?))?
34baa9fd 1221 (?P<remaining>
1222 (?P<alternate>(?<!\\),[^|&)]+)?
1223 (?:&(?P<replacement>.*?))?
1224 (?:\|(?P<default>.*?))?
1d485a1a 1225 )$''')
752cda38 1226
0b6f829b
SS
1227 def _from_user_input(field):
1228 if field == ':':
1229 return ...
1230 elif ':' in field:
1231 return slice(*map(int_or_none, field.split(':')))
1232 elif int_or_none(field) is not None:
1233 return int(field)
1234 return field
1235
07a1250e 1236 def _traverse_infodict(fields):
1237 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1238 for f in ([x] if x.startswith('{') else x.split('.'))]
1239 for i in (0, -1):
1240 if fields and not fields[i]:
1241 fields.pop(i)
1242
1243 for i, f in enumerate(fields):
1244 if not f.startswith('{'):
0b6f829b 1245 fields[i] = _from_user_input(f)
07a1250e 1246 continue
1247 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
0b6f829b 1248 fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')}
07a1250e 1249
0b6f829b 1250 return traverse_obj(info_dict, fields, traverse_string=True)
76a264ac 1251
752cda38 1252 def get_value(mdict):
1253 # Object traversal
2b8a2973 1254 value = _traverse_infodict(mdict['fields'])
752cda38 1255 # Negative
1256 if mdict['negate']:
1257 value = float_or_none(value)
1258 if value is not None:
1259 value *= -1
1260 # Do maths
385a27fa 1261 offset_key = mdict['maths']
1262 if offset_key:
752cda38 1263 value = float_or_none(value)
1264 operator = None
385a27fa 1265 while offset_key:
1266 item = re.match(
1267 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1268 offset_key).group(0)
1269 offset_key = offset_key[len(item):]
1270 if operator is None:
752cda38 1271 operator = MATH_FUNCTIONS[item]
385a27fa 1272 continue
1273 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1274 offset = float_or_none(item)
1275 if offset is None:
2b8a2973 1276 offset = float_or_none(_traverse_infodict(item))
385a27fa 1277 try:
1278 value = operator(value, multiplier * offset)
1279 except (TypeError, ZeroDivisionError):
1280 return None
1281 operator = None
752cda38 1282 # Datetime formatting
1283 if mdict['strf_format']:
7c37ff97 1284 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1285
a6bcaf71 1286 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1287 if sanitize and value == '':
1288 value = None
752cda38 1289 return value
1290
b868936c 1291 na = self.params.get('outtmpl_na_placeholder', 'NA')
1292
e0fd9573 1293 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1294 return sanitize_filename(str(value), restricted=restricted, is_id=(
1295 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1296 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1297 else NO_DEFAULT))
e0fd9573 1298
1299 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1300 sanitize = bool(sanitize)
1301
6e84b215 1302 def _dumpjson_default(obj):
1303 if isinstance(obj, (set, LazyList)):
1304 return list(obj)
adbc4ec4 1305 return repr(obj)
6e84b215 1306
6f2287cb 1307 class _ReplacementFormatter(string.Formatter):
ec9311c4 1308 def get_field(self, field_name, args, kwargs):
1309 if field_name.isdigit():
1310 return args[0], -1
1311 raise ValueError('Unsupported field')
1312
1313 replacement_formatter = _ReplacementFormatter()
1314
752cda38 1315 def create_key(outer_mobj):
1316 if not outer_mobj.group('has_key'):
b836dc94 1317 return outer_mobj.group(0)
752cda38 1318 key = outer_mobj.group('key')
752cda38 1319 mobj = re.match(INTERNAL_FORMAT_RE, key)
47bcd437 1320 value, replacement, default, last_field = None, None, na, ''
7c37ff97 1321 while mobj:
e625be0d 1322 mobj = mobj.groupdict()
7c37ff97 1323 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1324 value = get_value(mobj)
47bcd437 1325 last_field, replacement = mobj['fields'], mobj['replacement']
7c37ff97 1326 if value is None and mobj['alternate']:
34baa9fd 1327 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1328 else:
1329 break
752cda38 1330
ebe1b4e3 1331 if None not in (value, replacement):
ec9311c4 1332 try:
1333 value = replacement_formatter.format(replacement, value)
1334 except ValueError:
ebe1b4e3 1335 value, default = None, na
752cda38 1336
a264433c 1337 fmt = outer_mobj.group('format')
add96eb9 1338 if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int):
a264433c 1339 fmt = f'0{field_size_compat_map[last_field]:d}d'
1340
4476d2c7 1341 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1342 str_fmt = f'{fmt[:-1]}s'
ebe1b4e3 1343 if value is None:
1344 value, fmt = default, 's'
1345 elif fmt[-1] == 'l': # list
4476d2c7 1346 delim = '\n' if '#' in flags else ', '
9e907ebd 1347 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1348 elif fmt[-1] == 'j': # json
deae7c17 1349 value, fmt = json.dumps(
1350 value, default=_dumpjson_default,
9b9dad11 1351 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1352 elif fmt[-1] == 'h': # html
deae7c17 1353 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1354 elif fmt[-1] == 'q': # quoted
4476d2c7 1355 value = map(str, variadic(value) if '#' in flags else [value])
ff077926 1356 value, fmt = shell_quote(value, shell=True), str_fmt
524e2e4f 1357 elif fmt[-1] == 'B': # bytes
0f06bcd7 1358 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1359 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1360 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1361 value, fmt = unicodedata.normalize(
1362 # "+" = compatibility equivalence, "#" = NFD
add96eb9 1363 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1364 value), str_fmt
e0fd9573 1365 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1366 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1367 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1368 factor=1024 if '#' in flags else 1000)
37893bb0 1369 elif fmt[-1] == 'S': # filename sanitization
47bcd437 1370 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1371 elif fmt[-1] == 'c':
524e2e4f 1372 if value:
1373 value = str(value)[0]
76a264ac 1374 else:
524e2e4f 1375 fmt = str_fmt
1619ab3e 1376 elif fmt[-1] not in 'rsa': # numeric
a439a3a4 1377 value = float_or_none(value)
752cda38 1378 if value is None:
1379 value, fmt = default, 's'
901130bb 1380
752cda38 1381 if sanitize:
1619ab3e 1382 # If value is an object, sanitize might convert it to a string
1383 # So we convert it to repr first
752cda38 1384 if fmt[-1] == 'r':
7d1eb38a 1385 value, fmt = repr(value), str_fmt
1619ab3e 1386 elif fmt[-1] == 'a':
1387 value, fmt = ascii(value), str_fmt
1388 if fmt[-1] in 'csra':
47bcd437 1389 value = sanitizer(last_field, value)
901130bb 1390
add96eb9 1391 key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1392 TMPL_DICT[key] = value
b868936c 1393 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1394
385a27fa 1395 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1396
819e0531 1397 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1398 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1399 return self.escape_outtmpl(outtmpl) % info_dict
1400
5127e92a 1401 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1402 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1403 if outtmpl is None:
bf1824b3 1404 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1405 try:
5127e92a 1406 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1407 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1408 if not filename:
1409 return None
15da37c7 1410
5127e92a 1411 if tmpl_type in ('', 'temp'):
6a0546e3 1412 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1413 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1414 filename = replace_extension(filename, ext, final_ext)
5127e92a 1415 elif tmpl_type:
6a0546e3 1416 force_ext = OUTTMPL_TYPES[tmpl_type]
1417 if force_ext:
1418 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1419
bdc3fd2f
U
1420 # https://github.com/blackjack4494/youtube-dlc/issues/85
1421 trim_file_name = self.params.get('trim_file_name', False)
1422 if trim_file_name:
5c22c63d 1423 no_ext, *ext = filename.rsplit('.', 2)
1424 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1425
0202b52a 1426 return filename
8222d8de 1427 except ValueError as err:
6febd1c1 1428 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1429 return None
1430
5127e92a 1431 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1432 """Generate the output filename"""
1433 if outtmpl:
1434 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1435 dir_type = None
1436 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1437 if not filename and dir_type not in ('', 'temp'):
1438 return ''
de6000d9 1439
c84aeac6 1440 if warn:
21cd8fae 1441 if not self.params.get('paths'):
de6000d9 1442 pass
1443 elif filename == '-':
c84aeac6 1444 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1445 elif os.path.isabs(filename):
c84aeac6 1446 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1447 if filename == '-' or not filename:
1448 return filename
1449
21cd8fae 1450 return self.get_output_path(dir_type, filename)
0202b52a 1451
120fe513 1452 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1453 """Returns None if the file should be downloaded"""
93b39cdb 1454 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
d7b460d0 1455 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1456
3bec830a 1457 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1458
8b0d7497 1459 def check_filter():
d7b460d0 1460 if _type in ('playlist', 'multi_video'):
1461 return
1462 elif _type in ('url', 'url_transparent') and not try_call(
1463 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1464 return
1465
8b0d7497 1466 if 'title' in info_dict:
1467 # This can happen when we're just evaluating the playlist
1468 title = info_dict['title']
1469 matchtitle = self.params.get('matchtitle', False)
1470 if matchtitle:
1471 if not re.search(matchtitle, title, re.IGNORECASE):
1472 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1473 rejecttitle = self.params.get('rejecttitle', False)
1474 if rejecttitle:
1475 if re.search(rejecttitle, title, re.IGNORECASE):
1476 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1477
8b0d7497 1478 date = info_dict.get('upload_date')
1479 if date is not None:
add96eb9 1480 date_range = self.params.get('daterange', DateRange())
1481 if date not in date_range:
1482 return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}'
8b0d7497 1483 view_count = info_dict.get('view_count')
1484 if view_count is not None:
1485 min_views = self.params.get('min_views')
1486 if min_views is not None and view_count < min_views:
1487 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1488 max_views = self.params.get('max_views')
1489 if max_views is not None and view_count > max_views:
1490 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1491 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
add96eb9 1492 return f'Skipping "{video_title}" because it is age restricted'
8b0d7497 1493
8f18aca8 1494 match_filter = self.params.get('match_filter')
fe2ce85a 1495 if match_filter is None:
1496 return None
1497
1498 cancelled = None
1499 try:
8f18aca8 1500 try:
1501 ret = match_filter(info_dict, incomplete=incomplete)
1502 except TypeError:
1503 # For backward compatibility
1504 ret = None if incomplete else match_filter(info_dict)
fe2ce85a 1505 except DownloadCancelled as err:
1506 if err.msg is not NO_DEFAULT:
1507 raise
1508 ret, cancelled = err.msg, err
1509
1510 if ret is NO_DEFAULT:
1511 while True:
1512 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1513 reply = input(self._format_screen(
1514 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1515 if reply in {'y', ''}:
1516 return None
1517 elif reply == 'n':
1518 if cancelled:
1519 raise type(cancelled)(f'Skipping {video_title}')
1520 return f'Skipping {video_title}'
1521 return ret
8b0d7497 1522
c77495e3 1523 if self.in_download_archive(info_dict):
2b029ca0
AK
1524 reason = ''.join((
1525 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1526 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1527 'has already been recorded in the archive'))
c77495e3 1528 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1529 else:
fe2ce85a 1530 try:
1531 reason = check_filter()
1532 except DownloadCancelled as e:
1533 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1534 else:
1535 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1536 if reason is not None:
120fe513 1537 if not silent:
1538 self.to_screen('[download] ' + reason)
c77495e3 1539 if self.params.get(break_opt, False):
1540 raise break_err()
8b0d7497 1541 return reason
fe7e0c98 1542
b6c45014
JMF
1543 @staticmethod
1544 def add_extra_info(info_dict, extra_info):
add96eb9 1545 """Set the keys from extra_info in info dict if they are missing"""
b6c45014
JMF
1546 for key, value in extra_info.items():
1547 info_dict.setdefault(key, value)
1548
409e1828 1549 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1550 process=True, force_generic_extractor=False):
41d1cca3 1551 """
17ffed18 1552 Extract and return the information dictionary of the URL
41d1cca3 1553
1554 Arguments:
17ffed18 1555 @param url URL to extract
41d1cca3 1556
1557 Keyword arguments:
17ffed18 1558 @param download Whether to download videos
1559 @param process Whether to resolve all unresolved references (URLs, playlist items).
1560 Must be True for download to work
1561 @param ie_key Use only the extractor with this key
1562
1563 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1564 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1565 """
fe7e0c98 1566
409e1828 1567 if extra_info is None:
1568 extra_info = {}
1569
61aa5ba3 1570 if not ie_key and force_generic_extractor:
d22dec74
S
1571 ie_key = 'Generic'
1572
8222d8de 1573 if ie_key:
fe7866d0 1574 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1575 else:
1576 ies = self._ies
1577
fe7866d0 1578 for key, ie in ies.items():
8222d8de
JMF
1579 if not ie.suitable(url):
1580 continue
1581
1582 if not ie.working():
6febd1c1
PH
1583 self.report_warning('The program functionality for this site has been marked as broken, '
1584 'and will probably not work.')
8222d8de 1585
1151c407 1586 temp_id = ie.get_temp_id(url)
fe7866d0 1587 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
2b029ca0
AK
1588 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1589 'has already been recorded in the archive')
5e5be0c0 1590 if self.params.get('break_on_existing', False):
add96eb9 1591 raise ExistingVideoReached
a0566bbf 1592 break
fe7866d0 1593 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1594 else:
fe7866d0 1595 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1596 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1597 tb=False if extractors_restricted else None)
a0566bbf 1598
7e88d7d7 1599 def _handle_extraction_exceptions(func):
b5ae35ee 1600 @functools.wraps(func)
a0566bbf 1601 def wrapper(self, *args, **kwargs):
6da22e7d 1602 while True:
1603 try:
1604 return func(self, *args, **kwargs)
1605 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1606 raise
6da22e7d 1607 except ReExtractInfo as e:
1608 if e.expected:
1609 self.to_screen(f'{e}; Re-extracting data')
1610 else:
1611 self.to_stderr('\r')
1612 self.report_warning(f'{e}; Re-extracting data')
1613 continue
1614 except GeoRestrictedError as e:
1615 msg = e.msg
1616 if e.countries:
add96eb9 1617 msg += '\nThis video is available in {}.'.format(', '.join(
1618 map(ISO3166Utils.short2full, e.countries)))
6da22e7d 1619 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1620 self.report_error(msg)
1621 except ExtractorError as e: # An error we somewhat expected
1622 self.report_error(str(e), e.format_traceback())
1623 except Exception as e:
1624 if self.params.get('ignoreerrors'):
1625 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1626 else:
1627 raise
1628 break
a0566bbf 1629 return wrapper
1630
693f0600 1631 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1632 if (not self.params.get('wait_for_video')
1633 or ie_result.get('_type', 'video') != 'video'
1634 or ie_result.get('formats') or ie_result.get('url')):
1635 return
1636
1637 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1638 last_msg = ''
1639
1640 def progress(msg):
1641 nonlocal last_msg
a7dc6a89 1642 full_msg = f'{msg}\n'
1643 if not self.params.get('noprogress'):
1644 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1645 elif last_msg:
1646 return
1647 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1648 last_msg = msg
1649
1650 min_wait, max_wait = self.params.get('wait_for_video')
1651 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1652 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1653 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1654 self.report_warning('Release time of video is not known')
693f0600 1655 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1656 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1657 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1658 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1659
1660 wait_till = time.time() + diff
1661 try:
1662 while True:
1663 diff = wait_till - time.time()
1664 if diff <= 0:
1665 progress('')
1666 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1667 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1668 time.sleep(1)
1669 except KeyboardInterrupt:
1670 progress('')
1671 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1672 except BaseException as e:
1673 if not isinstance(e, ReExtractInfo):
1674 self.to_screen('')
1675 raise
1676
6c5211ce 1677 def _load_cookies(self, data, *, autoscope=True):
31215122
SS
1678 """Loads cookies from a `Cookie` header
1679
1680 This tries to work around the security vulnerability of passing cookies to every domain.
1681 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
31215122
SS
1682
1683 @param data The Cookie header as string to load the cookies from
6c5211ce 1684 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1685 If `True`, save cookies for later to be stored in the jar with a limited scope
1686 If a URL, save cookies in the jar with the domain of the URL
31215122
SS
1687 """
1688 for cookie in LenientSimpleCookie(data).values():
6c5211ce 1689 if autoscope and any(cookie.values()):
31215122
SS
1690 raise ValueError('Invalid syntax in Cookie Header')
1691
1692 domain = cookie.get('domain') or ''
1693 expiry = cookie.get('expires')
1694 if expiry == '': # 0 is valid
1695 expiry = None
1696 prepared_cookie = http.cookiejar.Cookie(
1697 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1698 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1699 cookie.get('secure') or False, expiry, False, None, None, {})
1700
1701 if domain:
1702 self.cookiejar.set_cookie(prepared_cookie)
6c5211ce 1703 elif autoscope is True:
31215122
SS
1704 self.deprecated_feature(
1705 'Passing cookies as a header is a potential security risk; '
1706 'they will be scoped to the domain of the downloaded urls. '
1707 'Please consider loading cookies from a file or browser instead.')
1708 self.__header_cookies.append(prepared_cookie)
6c5211ce 1709 elif autoscope:
1710 self.report_warning(
1711 'The extractor result contains an unscoped cookie as an HTTP header. '
1712 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1713 only_once=True)
1714 self._apply_header_cookies(autoscope, [prepared_cookie])
31215122
SS
1715 else:
1716 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1717 tb=False, is_error=False)
1718
6c5211ce 1719 def _apply_header_cookies(self, url, cookies=None):
31215122
SS
1720 """Applies stray header cookies to the provided url
1721
1722 This loads header cookies and scopes them to the domain provided in `url`.
1723 While this is not ideal, it helps reduce the risk of them being sent
1724 to an unintended destination while mostly maintaining compatibility.
1725 """
1726 parsed = urllib.parse.urlparse(url)
1727 if not parsed.hostname:
1728 return
1729
6c5211ce 1730 for cookie in map(copy.copy, cookies or self.__header_cookies):
31215122
SS
1731 cookie.domain = f'.{parsed.hostname}'
1732 self.cookiejar.set_cookie(cookie)
1733
7e88d7d7 1734 @_handle_extraction_exceptions
58f197b7 1735 def __extract_info(self, url, ie, download, extra_info, process):
31215122
SS
1736 self._apply_header_cookies(url)
1737
693f0600 1738 try:
1739 ie_result = ie.extract(url)
1740 except UserNotLive as e:
1741 if process:
1742 if self.params.get('wait_for_video'):
1743 self.report_warning(e)
1744 self._wait_for_video()
1745 raise
a0566bbf 1746 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1747 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1748 return
1749 if isinstance(ie_result, list):
1750 # Backwards compatibility: old IE result format
1751 ie_result = {
1752 '_type': 'compat_list',
1753 'entries': ie_result,
1754 }
e37d0efb 1755 if extra_info.get('original_url'):
1756 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1757 self.add_default_extra_info(ie_result, ie, url)
1758 if process:
f2ebc5c7 1759 self._wait_for_video(ie_result)
a0566bbf 1760 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1761 else:
a0566bbf 1762 return ie_result
fe7e0c98 1763
ea38e55f 1764 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1765 if url is not None:
1766 self.add_extra_info(ie_result, {
1767 'webpage_url': url,
1768 'original_url': url,
57ebfca3 1769 })
1770 webpage_url = ie_result.get('webpage_url')
1771 if webpage_url:
1772 self.add_extra_info(ie_result, {
1773 'webpage_url_basename': url_basename(webpage_url),
1774 'webpage_url_domain': get_domain(webpage_url),
6033d980 1775 })
1776 if ie is not None:
1777 self.add_extra_info(ie_result, {
1778 'extractor': ie.IE_NAME,
1779 'extractor_key': ie.ie_key(),
1780 })
ea38e55f 1781
58adec46 1782 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1783 """
1784 Take the result of the ie(may be modified) and resolve all unresolved
1785 references (URLs, playlist items).
1786
1787 It will also download the videos if 'download'.
1788 Returns the resolved ie_result.
1789 """
58adec46 1790 if extra_info is None:
1791 extra_info = {}
e8ee972c
PH
1792 result_type = ie_result.get('_type', 'video')
1793
057a5206 1794 if result_type in ('url', 'url_transparent'):
8f97a15d 1795 ie_result['url'] = sanitize_url(
1796 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1797 if ie_result.get('original_url') and not extra_info.get('original_url'):
1798 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1799
057a5206 1800 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1801 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1802 or extract_flat is True):
ecb54191 1803 info_copy = ie_result.copy()
6033d980 1804 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1805 if ie and not ie_result.get('id'):
4614bc22 1806 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1807 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1808 self.add_extra_info(info_copy, extra_info)
b5475f11 1809 info_copy, _ = self.pre_process(info_copy)
94dc8604 1810 self._fill_common_fields(info_copy, False)
17060584 1811 self.__forced_printings(info_copy)
415f8d51 1812 self._raise_pending_errors(info_copy)
4614bc22 1813 if self.params.get('force_write_download_archive', False):
1814 self.record_download_archive(info_copy)
e8ee972c
PH
1815 return ie_result
1816
8222d8de 1817 if result_type == 'video':
b6c45014 1818 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1819 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1820 self._raise_pending_errors(ie_result)
28b0eb0f 1821 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1822 if additional_urls:
e9f4ccd1 1823 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1824 if isinstance(additional_urls, str):
9c2b75b5 1825 additional_urls = [additional_urls]
1826 self.to_screen(
add96eb9 1827 '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls)))
1828 self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls)))
9c2b75b5 1829 ie_result['additional_entries'] = [
1830 self.extract_info(
b69fd25c 1831 url, download, extra_info=extra_info,
9c2b75b5 1832 force_generic_extractor=self.params.get('force_generic_extractor'))
1833 for url in additional_urls
1834 ]
1835 return ie_result
8222d8de
JMF
1836 elif result_type == 'url':
1837 # We have to add extra_info to the results because it may be
1838 # contained in a playlist
07cce701 1839 return self.extract_info(
1840 ie_result['url'], download,
1841 ie_key=ie_result.get('ie_key'),
1842 extra_info=extra_info)
7fc3fa05
PH
1843 elif result_type == 'url_transparent':
1844 # Use the information from the embedding page
1845 info = self.extract_info(
1846 ie_result['url'], ie_key=ie_result.get('ie_key'),
1847 extra_info=extra_info, download=False, process=False)
1848
1640eb09
S
1849 # extract_info may return None when ignoreerrors is enabled and
1850 # extraction failed with an error, don't crash and return early
1851 # in this case
1852 if not info:
1853 return info
1854
3975b4d2 1855 exempted_fields = {'_type', 'url', 'ie_key'}
1856 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1857 # For video clips, the id etc of the clip extractor should be used
1858 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1859
412c617d 1860 new_result = info.copy()
3975b4d2 1861 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1862
0563f7ac
S
1863 # Extracted info may not be a video result (i.e.
1864 # info.get('_type', 'video') != video) but rather an url or
1865 # url_transparent. In such cases outer metadata (from ie_result)
1866 # should be propagated to inner one (info). For this to happen
1867 # _type of info should be overridden with url_transparent. This
067aa17e 1868 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1869 if new_result.get('_type') == 'url':
1870 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1871
1872 return self.process_ie_result(
1873 new_result, download=download, extra_info=extra_info)
40fcba5e 1874 elif result_type in ('playlist', 'multi_video'):
30a074c2 1875 # Protect from infinite recursion due to recursively nested playlists
1876 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1877 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1878 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1879 self.to_screen(
add96eb9 1880 '[download] Skipping already downloaded playlist: {}'.format(
1881 ie_result.get('title')) or ie_result.get('id'))
30a074c2 1882 return
7e85e872 1883
30a074c2 1884 self._playlist_level += 1
1885 self._playlist_urls.add(webpage_url)
03f83004 1886 self._fill_common_fields(ie_result, False)
bc516a3f 1887 self._sanitize_thumbnails(ie_result)
30a074c2 1888 try:
1889 return self.__process_playlist(ie_result, download)
1890 finally:
1891 self._playlist_level -= 1
1892 if not self._playlist_level:
1893 self._playlist_urls.clear()
8222d8de 1894 elif result_type == 'compat_list':
c9bf4114 1895 self.report_warning(
add96eb9 1896 'Extractor {} returned a compat_list result. '
1897 'It needs to be updated.'.format(ie_result.get('extractor')))
5f6a1245 1898
8222d8de 1899 def _fixup(r):
b868936c 1900 self.add_extra_info(r, {
1901 'extractor': ie_result['extractor'],
1902 'webpage_url': ie_result['webpage_url'],
1903 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1904 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1905 'extractor_key': ie_result['extractor_key'],
1906 })
8222d8de
JMF
1907 return r
1908 ie_result['entries'] = [
b6c45014 1909 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1910 for r in ie_result['entries']
1911 ]
1912 return ie_result
1913 else:
add96eb9 1914 raise Exception(f'Invalid result type: {result_type}')
8222d8de 1915
e92caff5 1916 def _ensure_dir_exists(self, path):
1917 return make_dir(path, self.report_error)
1918
3b603dbd 1919 @staticmethod
3bec830a 1920 def _playlist_infodict(ie_result, strict=False, **kwargs):
1921 info = {
1922 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1923 'playlist': ie_result.get('title') or ie_result.get('id'),
1924 'playlist_id': ie_result.get('id'),
1925 'playlist_title': ie_result.get('title'),
1926 'playlist_uploader': ie_result.get('uploader'),
1927 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1928 **kwargs,
1929 }
3bec830a 1930 if strict:
1931 return info
0bd5a039 1932 if ie_result.get('webpage_url'):
1933 info.update({
1934 'webpage_url': ie_result['webpage_url'],
1935 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1936 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1937 })
3bec830a 1938 return {
1939 **info,
1940 'playlist_index': 0,
59d7de0d 1941 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
3bec830a 1942 'extractor': ie_result['extractor'],
3bec830a 1943 'extractor_key': ie_result['extractor_key'],
1944 }
3b603dbd 1945
30a074c2 1946 def __process_playlist(self, ie_result, download):
7e88d7d7 1947 """Process each entry in the playlist"""
f5ea4748 1948 assert ie_result['_type'] in ('playlist', 'multi_video')
1949
3bec830a 1950 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1951 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1952 if self._match_entry(common_info, incomplete=True) is not None:
1953 return
c6e07cf1 1954 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1955
7e88d7d7 1956 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1957 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1958
1959 lazy = self.params.get('lazy_playlist')
1960 if lazy:
1961 resolved_entries, n_entries = [], 'N/A'
1962 ie_result['requested_entries'], ie_result['entries'] = None, None
1963 else:
1964 entries = resolved_entries = list(entries)
1965 n_entries = len(resolved_entries)
1966 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1967 if not ie_result.get('playlist_count'):
1968 # Better to do this after potentially exhausting entries
1969 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1970
0647d925 1971 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1972 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1973
e08a85d8 1974 _infojson_written = False
0bfc53d0 1975 write_playlist_files = self.params.get('allow_playlist_files', True)
1976 if write_playlist_files and self.params.get('list_thumbnails'):
1977 self.list_thumbnails(ie_result)
1978 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1979 _infojson_written = self._write_info_json(
1980 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1981 if _infojson_written is None:
80c03fa9 1982 return
1983 if self._write_description('playlist', ie_result,
1984 self.prepare_filename(ie_copy, 'pl_description')) is None:
1985 return
681de68e 1986 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1987 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1988
7e9a6125 1989 if lazy:
1990 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1991 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1992 elif self.params.get('playlistreverse'):
1993 entries.reverse()
1994 elif self.params.get('playlistrandom'):
30a074c2 1995 random.shuffle(entries)
1996
bc5c2f8a 1997 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1998 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1999
134c913c 2000 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
2001 if self.params.get('extract_flat') == 'discard_in_playlist':
2002 keep_resolved_entries = ie_result['_type'] != 'playlist'
2003 if keep_resolved_entries:
2004 self.write_debug('The information of all playlist entries will be held in memory')
2005
26e2805c 2006 failures = 0
2007 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 2008 for i, (playlist_index, entry) in enumerate(entries):
2009 if lazy:
2010 resolved_entries.append((playlist_index, entry))
3bec830a 2011 if not entry:
7e88d7d7 2012 continue
2013
7e88d7d7 2014 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
ad54c913 2015 if not lazy and 'playlist-index' in self.params['compat_opts']:
7e9a6125 2016 playlist_index = ie_result['requested_entries'][i]
2017
0647d925 2018 entry_copy = collections.ChainMap(entry, {
3bec830a 2019 **common_info,
3955b207 2020 'n_entries': int_or_none(n_entries),
71729754 2021 'playlist_index': playlist_index,
7e9a6125 2022 'playlist_autonumber': i + 1,
0647d925 2023 })
3bec830a 2024
0647d925 2025 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 2026 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2027 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 2028 continue
2029
add96eb9 2030 self.to_screen(
2031 f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} '
2032 f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}')
3bec830a 2033
ec54bd43 2034 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 2035 'playlist_index': playlist_index,
2036 'playlist_autonumber': i + 1,
ec54bd43 2037 }, extra))
26e2805c 2038 if not entry_result:
2039 failures += 1
2040 if failures >= max_failures:
2041 self.report_error(
7e88d7d7 2042 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 2043 break
134c913c 2044 if keep_resolved_entries:
2045 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 2046
2047 # Update with processed data
f0ad6f8c 2048 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 2049 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2050 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2051 # Do not set for full playlist
2052 ie_result.pop('requested_entries')
e08a85d8 2053
2054 # Write the updated info to json
cb96c5be 2055 if _infojson_written is True and self._write_info_json(
e08a85d8 2056 'updated playlist', ie_result,
2057 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2058 return
ca30f449 2059
ed5835b4 2060 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 2061 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 2062 return ie_result
2063
7e88d7d7 2064 @_handle_extraction_exceptions
a0566bbf 2065 def __process_iterable_entry(self, entry, download, extra_info):
2066 return self.process_ie_result(
2067 entry, download=download, extra_info=extra_info)
2068
67134eab
JMF
2069 def _build_format_filter(self, filter_spec):
2070 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
2071
2072 OPERATORS = {
2073 '<': operator.lt,
2074 '<=': operator.le,
2075 '>': operator.gt,
2076 '>=': operator.ge,
2077 '=': operator.eq,
2078 '!=': operator.ne,
2079 }
67134eab 2080 operator_rex = re.compile(r'''(?x)\s*
c3f624ef 2081 (?P<key>[\w.-]+)\s*
add96eb9 2082 (?P<op>{})(?P<none_inclusive>\s*\?)?\s*
187986a8 2083 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
add96eb9 2084 '''.format('|'.join(map(re.escape, OPERATORS.keys()))))
187986a8 2085 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
2086 if m:
2087 try:
2088 comparison_value = int(m.group('value'))
2089 except ValueError:
2090 comparison_value = parse_filesize(m.group('value'))
2091 if comparison_value is None:
2092 comparison_value = parse_filesize(m.group('value') + 'B')
2093 if comparison_value is None:
2094 raise ValueError(
add96eb9 2095 'Invalid value {!r} in format specification {!r}'.format(
67134eab 2096 m.group('value'), filter_spec))
9ddb6925
S
2097 op = OPERATORS[m.group('op')]
2098
083c9df9 2099 if not m:
9ddb6925
S
2100 STR_OPERATORS = {
2101 '=': operator.eq,
10d33b34
YCH
2102 '^=': lambda attr, value: attr.startswith(value),
2103 '$=': lambda attr, value: attr.endswith(value),
2104 '*=': lambda attr, value: value in attr,
add96eb9 2105 '~=': lambda attr, value: value.search(attr) is not None,
9ddb6925 2106 }
187986a8 2107 str_operator_rex = re.compile(r'''(?x)\s*
2108 (?P<key>[a-zA-Z0-9._-]+)\s*
add96eb9 2109 (?P<negation>!\s*)?(?P<op>{})\s*(?P<none_inclusive>\?\s*)?
1ce9a3cb
LF
2110 (?P<quote>["'])?
2111 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2112 (?(quote)(?P=quote))\s*
add96eb9 2113 '''.format('|'.join(map(re.escape, STR_OPERATORS.keys()))))
187986a8 2114 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 2115 if m:
1ce9a3cb
LF
2116 if m.group('op') == '~=':
2117 comparison_value = re.compile(m.group('value'))
2118 else:
2119 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
2120 str_op = STR_OPERATORS[m.group('op')]
2121 if m.group('negation'):
e118a879 2122 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
2123 else:
2124 op = str_op
083c9df9 2125
9ddb6925 2126 if not m:
add96eb9 2127 raise SyntaxError(f'Invalid filter specification {filter_spec!r}')
083c9df9
PH
2128
2129 def _filter(f):
2130 actual_value = f.get(m.group('key'))
2131 if actual_value is None:
2132 return m.group('none_inclusive')
2133 return op(actual_value, comparison_value)
67134eab
JMF
2134 return _filter
2135
9f1a1c36 2136 def _check_formats(self, formats):
2137 for f in formats:
96da9525 2138 working = f.get('__working')
2139 if working is not None:
2140 if working:
2141 yield f
2142 continue
add96eb9 2143 self.to_screen('[info] Testing format {}'.format(f['format_id']))
75689fe5 2144 path = self.get_output_path('temp')
2145 if not self._ensure_dir_exists(f'{path}/'):
2146 continue
2147 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 2148 temp_file.close()
2149 try:
2150 success, _ = self.dl(temp_file.name, f, test=True)
add96eb9 2151 except (DownloadError, OSError, ValueError, *network_exceptions):
9f1a1c36 2152 success = False
2153 finally:
2154 if os.path.exists(temp_file.name):
2155 try:
2156 os.remove(temp_file.name)
2157 except OSError:
add96eb9 2158 self.report_warning(f'Unable to delete temporary file "{temp_file.name}"')
96da9525 2159 f['__working'] = success
9f1a1c36 2160 if success:
2161 yield f
2162 else:
add96eb9 2163 self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id']))
9f1a1c36 2164
96da9525 2165 def _select_formats(self, formats, selector):
2166 return list(selector({
2167 'formats': formats,
2168 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2169 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2170 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2171 }))
2172
0017d9ad 2173 def _default_format_spec(self, info_dict, download=True):
96da9525 2174 download = download and not self.params.get('simulate')
2175 prefer_best = download and (
2176 self.params['outtmpl']['default'] == '-'
2177 or info_dict.get('is_live') and not self.params.get('live_from_start'))
0017d9ad 2178
af0f7428
S
2179 def can_merge():
2180 merger = FFmpegMergerPP(self)
2181 return merger.available and merger.can_merge()
2182
96da9525 2183 if not prefer_best and download and not can_merge():
2184 prefer_best = True
2185 formats = self._get_formats(info_dict)
2186 evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec))
2187 if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'):
2188 self.report_warning('ffmpeg not found. The downloaded format may not be the best available. '
2189 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies')
2190
2191 compat = (self.params.get('allow_multiple_audio_streams')
2192 or 'format-spec' in self.params['compat_opts'])
2193
2194 return ('best/bestvideo+bestaudio' if prefer_best
2195 else 'bestvideo+bestaudio/best' if compat
2196 else 'bestvideo*+bestaudio/best')
0017d9ad 2197
67134eab
JMF
2198 def build_format_selector(self, format_spec):
2199 def syntax_error(note, start):
2200 message = (
2201 'Invalid format specification: '
86e5f3ed 2202 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2203 return SyntaxError(message)
2204
2205 PICKFIRST = 'PICKFIRST'
2206 MERGE = 'MERGE'
2207 SINGLE = 'SINGLE'
0130afb7 2208 GROUP = 'GROUP'
67134eab
JMF
2209 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2210
91ebc640 2211 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2212 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2213
67134eab
JMF
2214 def _parse_filter(tokens):
2215 filter_parts = []
add96eb9 2216 for type_, string_, _start, _, _ in tokens:
2217 if type_ == tokenize.OP and string_ == ']':
67134eab
JMF
2218 return ''.join(filter_parts)
2219 else:
6f2287cb 2220 filter_parts.append(string_)
67134eab 2221
232541df 2222 def _remove_unused_ops(tokens):
62b58c09
L
2223 # Remove operators that we don't use and join them with the surrounding strings.
2224 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2225 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2226 last_string, last_start, last_end, last_line = None, None, None, None
add96eb9 2227 for type_, string_, start, end, line in tokens:
2228 if type_ == tokenize.OP and string_ == '[':
232541df
JMF
2229 if last_string:
2230 yield tokenize.NAME, last_string, last_start, last_end, last_line
2231 last_string = None
add96eb9 2232 yield type_, string_, start, end, line
232541df 2233 # everything inside brackets will be handled by _parse_filter
add96eb9 2234 for type_, string_, start, end, line in tokens:
2235 yield type_, string_, start, end, line
2236 if type_ == tokenize.OP and string_ == ']':
232541df 2237 break
add96eb9 2238 elif type_ == tokenize.OP and string_ in ALLOWED_OPS:
232541df
JMF
2239 if last_string:
2240 yield tokenize.NAME, last_string, last_start, last_end, last_line
2241 last_string = None
add96eb9 2242 yield type_, string_, start, end, line
2243 elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
232541df 2244 if not last_string:
6f2287cb 2245 last_string = string_
232541df
JMF
2246 last_start = start
2247 last_end = end
2248 else:
6f2287cb 2249 last_string += string_
232541df
JMF
2250 if last_string:
2251 yield tokenize.NAME, last_string, last_start, last_end, last_line
2252
cf2ac6df 2253 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2254 selectors = []
2255 current_selector = None
add96eb9 2256 for type_, string_, start, _, _ in tokens:
47ab66db 2257 # ENCODING is only defined in Python 3.x
add96eb9 2258 if type_ == getattr(tokenize, 'ENCODING', None):
67134eab 2259 continue
add96eb9 2260 elif type_ in [tokenize.NAME, tokenize.NUMBER]:
6f2287cb 2261 current_selector = FormatSelector(SINGLE, string_, [])
add96eb9 2262 elif type_ == tokenize.OP:
6f2287cb 2263 if string_ == ')':
cf2ac6df
JMF
2264 if not inside_group:
2265 # ')' will be handled by the parentheses group
2266 tokens.restore_last_token()
67134eab 2267 break
6f2287cb 2268 elif inside_merge and string_ in ['/', ',']:
0130afb7
JMF
2269 tokens.restore_last_token()
2270 break
6f2287cb 2271 elif inside_choice and string_ == ',':
cf2ac6df
JMF
2272 tokens.restore_last_token()
2273 break
6f2287cb 2274 elif string_ == ',':
0a31a350
JMF
2275 if not current_selector:
2276 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2277 selectors.append(current_selector)
2278 current_selector = None
6f2287cb 2279 elif string_ == '/':
d96d604e
JMF
2280 if not current_selector:
2281 raise syntax_error('"/" must follow a format selector', start)
67134eab 2282 first_choice = current_selector
cf2ac6df 2283 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2284 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
6f2287cb 2285 elif string_ == '[':
67134eab
JMF
2286 if not current_selector:
2287 current_selector = FormatSelector(SINGLE, 'best', [])
2288 format_filter = _parse_filter(tokens)
2289 current_selector.filters.append(format_filter)
6f2287cb 2290 elif string_ == '(':
0130afb7
JMF
2291 if current_selector:
2292 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2293 group = _parse_format_selection(tokens, inside_group=True)
2294 current_selector = FormatSelector(GROUP, group, [])
6f2287cb 2295 elif string_ == '+':
d03cfdce 2296 if not current_selector:
2297 raise syntax_error('Unexpected "+"', start)
2298 selector_1 = current_selector
2299 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2300 if not selector_2:
2301 raise syntax_error('Expected a selector', start)
2302 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2303 else:
6f2287cb 2304 raise syntax_error(f'Operator not recognized: "{string_}"', start)
add96eb9 2305 elif type_ == tokenize.ENDMARKER:
67134eab
JMF
2306 break
2307 if current_selector:
2308 selectors.append(current_selector)
2309 return selectors
2310
f8d4ad9a 2311 def _merge(formats_pair):
2312 format_1, format_2 = formats_pair
2313
2314 formats_info = []
2315 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2316 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2317
2318 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2319 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2320 for (i, fmt_info) in enumerate(formats_info):
551f9388 2321 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2322 formats_info.pop(i)
2323 continue
2324 for aud_vid in ['audio', 'video']:
f8d4ad9a 2325 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2326 if get_no_more[aud_vid]:
2327 formats_info.pop(i)
f5510afe 2328 break
f8d4ad9a 2329 get_no_more[aud_vid] = True
2330
2331 if len(formats_info) == 1:
2332 return formats_info[0]
2333
2334 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2335 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2336
2337 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2338 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2339
fc61aff4
LL
2340 output_ext = get_compatible_ext(
2341 vcodecs=[f.get('vcodec') for f in video_fmts],
2342 acodecs=[f.get('acodec') for f in audio_fmts],
2343 vexts=[f['ext'] for f in video_fmts],
2344 aexts=[f['ext'] for f in audio_fmts],
2345 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2346 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2347
975a0d0d 2348 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2349
f8d4ad9a 2350 new_dict = {
2351 'requested_formats': formats_info,
975a0d0d 2352 'format': '+'.join(filtered('format')),
2353 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2354 'ext': output_ext,
975a0d0d 2355 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2356 'language': '+'.join(orderedSet(filtered('language'))) or None,
2357 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2358 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2359 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2360 }
2361
2362 if the_only_video:
2363 new_dict.update({
2364 'width': the_only_video.get('width'),
2365 'height': the_only_video.get('height'),
2366 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2367 'fps': the_only_video.get('fps'),
49a57e70 2368 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2369 'vcodec': the_only_video.get('vcodec'),
2370 'vbr': the_only_video.get('vbr'),
2371 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2372 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2373 })
2374
2375 if the_only_audio:
2376 new_dict.update({
2377 'acodec': the_only_audio.get('acodec'),
2378 'abr': the_only_audio.get('abr'),
975a0d0d 2379 'asr': the_only_audio.get('asr'),
add96eb9 2380 'audio_channels': the_only_audio.get('audio_channels'),
f8d4ad9a 2381 })
2382
2383 return new_dict
2384
e8e73840 2385 def _check_formats(formats):
8cb7fc44 2386 if self.params.get('check_formats') == 'selected':
2387 yield from self._check_formats(formats)
2388 return
2389 elif (self.params.get('check_formats') is not None
bc344cd4 2390 or self.params.get('allow_unplayable_formats')):
981052c9 2391 yield from formats
b5ac45b1 2392 return
bc344cd4 2393
2394 for f in formats:
ef79d20d 2395 if f.get('has_drm') or f.get('__needs_testing'):
bc344cd4 2396 yield from self._check_formats([f])
2397 else:
2398 yield f
e8e73840 2399
67134eab 2400 def _build_selector_function(selector):
909d24dd 2401 if isinstance(selector, list): # ,
67134eab
JMF
2402 fs = [_build_selector_function(s) for s in selector]
2403
317f7ab6 2404 def selector_function(ctx):
67134eab 2405 for f in fs:
981052c9 2406 yield from f(ctx)
67134eab 2407 return selector_function
909d24dd 2408
2409 elif selector.type == GROUP: # ()
0130afb7 2410 selector_function = _build_selector_function(selector.selector)
909d24dd 2411
2412 elif selector.type == PICKFIRST: # /
67134eab
JMF
2413 fs = [_build_selector_function(s) for s in selector.selector]
2414
317f7ab6 2415 def selector_function(ctx):
67134eab 2416 for f in fs:
317f7ab6 2417 picked_formats = list(f(ctx))
67134eab
JMF
2418 if picked_formats:
2419 return picked_formats
2420 return []
67134eab 2421
981052c9 2422 elif selector.type == MERGE: # +
2423 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2424
2425 def selector_function(ctx):
adbc4ec4 2426 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2427 yield _merge(pair)
2428
909d24dd 2429 elif selector.type == SINGLE: # atom
598d185d 2430 format_spec = selector.selector or 'best'
909d24dd 2431
f8d4ad9a 2432 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2433 if format_spec == 'all':
2434 def selector_function(ctx):
9222c381 2435 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2436 elif format_spec == 'mergeall':
2437 def selector_function(ctx):
316f2650 2438 formats = list(_check_formats(
2439 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2440 if not formats:
2441 return
921b76ca 2442 merged_format = formats[-1]
2443 for f in formats[-2::-1]:
f8d4ad9a 2444 merged_format = _merge((merged_format, f))
2445 yield merged_format
909d24dd 2446
2447 else:
85e801a9 2448 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2449 mobj = re.match(
2450 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2451 format_spec)
2452 if mobj is not None:
2453 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2454 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2455 format_type = (mobj.group('type') or [None])[0]
2456 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2457 format_modified = mobj.group('mod') is not None
909d24dd 2458
2459 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2460 _filter_f = (
add96eb9 2461 (lambda f: f.get(f'{format_type}codec') != 'none')
eff63539 2462 if format_type and format_modified # bv*, ba*, wv*, wa*
add96eb9 2463 else (lambda f: f.get(f'{not_format_type}codec') == 'none')
eff63539 2464 if format_type # bv, ba, wv, wa
2465 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2466 if not format_modified # b, w
8326b00a 2467 else lambda f: True) # b*, w*
2468 filter_f = lambda f: _filter_f(f) and (
2469 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2470 else:
48ee10ee 2471 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2472 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2473 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2474 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2475 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2476 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2477 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2478 else:
b5ae35ee 2479 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2480
2481 def selector_function(ctx):
2482 formats = list(ctx['formats'])
909d24dd 2483 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2484 if not matches:
2485 if format_fallback and ctx['incomplete_formats']:
2486 # for extractors with incomplete formats (audio only (soundcloud)
2487 # or video only (imgur)) best/worst will fallback to
2488 # best/worst {video,audio}-only format
d63eae7e 2489 matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats))
85e801a9 2490 elif seperate_fallback and not ctx['has_merged_format']:
2491 # for compatibility with youtube-dl when there is no pre-merged format
2492 matches = list(filter(seperate_fallback, formats))
981052c9 2493 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2494 try:
e8e73840 2495 yield matches[format_idx - 1]
4abea8ca 2496 except LazyList.IndexError:
981052c9 2497 return
083c9df9 2498
67134eab 2499 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2500
317f7ab6 2501 def final_selector(ctx):
adbc4ec4 2502 ctx_copy = dict(ctx)
67134eab 2503 for _filter in filters:
317f7ab6
S
2504 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2505 return selector_function(ctx_copy)
67134eab 2506 return final_selector
083c9df9 2507
00cdda4f
SS
2508 # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid
2509 # Prefix numbers with random letters to avoid it being classified as a number
2510 # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
2511 # TODO: Implement parser not reliant on tokenize.tokenize
2512 prefix = ''.join(random.choices(string.ascii_letters, k=32))
2513 stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode())
0130afb7 2514 try:
00cdda4f
SS
2515 tokens = list(_remove_unused_ops(
2516 token._replace(string=token.string.replace(prefix, ''))
2517 for token in tokenize.tokenize(stream.readline)))
0130afb7
JMF
2518 except tokenize.TokenError:
2519 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2520
86e5f3ed 2521 class TokenIterator:
0130afb7
JMF
2522 def __init__(self, tokens):
2523 self.tokens = tokens
2524 self.counter = 0
2525
2526 def __iter__(self):
2527 return self
2528
2529 def __next__(self):
2530 if self.counter >= len(self.tokens):
add96eb9 2531 raise StopIteration
0130afb7
JMF
2532 value = self.tokens[self.counter]
2533 self.counter += 1
2534 return value
2535
2536 next = __next__
2537
2538 def restore_last_token(self):
2539 self.counter -= 1
2540
2541 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2542 return _build_selector_function(parsed_selector)
a9c58ad9 2543
6c5211ce 2544 def _calc_headers(self, info_dict, load_cookies=False):
227bf1a3 2545 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
c365dba8 2546 clean_headers(res)
6c5211ce 2547
2548 if load_cookies: # For --load-info-json
2549 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2550 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2551 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2552 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2553 res.pop('Cookie', None)
31215122 2554 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
e5660ee6 2555 if cookies:
31215122
SS
2556 encoder = LenientSimpleCookie()
2557 values = []
2558 for cookie in cookies:
2559 _, value = encoder.value_encode(cookie.value)
2560 values.append(f'{cookie.name}={value}')
2561 if cookie.domain:
2562 values.append(f'Domain={cookie.domain}')
2563 if cookie.path:
2564 values.append(f'Path={cookie.path}')
2565 if cookie.secure:
2566 values.append('Secure')
2567 if cookie.expires:
2568 values.append(f'Expires={cookie.expires}')
2569 if cookie.version:
2570 values.append(f'Version={cookie.version}')
2571 info_dict['cookies'] = '; '.join(values)
e5660ee6 2572
0016b84e
S
2573 if 'X-Forwarded-For' not in res:
2574 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2575 if x_forwarded_for_ip:
2576 res['X-Forwarded-For'] = x_forwarded_for_ip
2577
e5660ee6
JMF
2578 return res
2579
c487cf00 2580 def _calc_cookies(self, url):
b87e01c1 2581 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2582 return self.cookiejar.get_cookie_header(url)
e5660ee6 2583
9f1a1c36 2584 def _sort_thumbnails(self, thumbnails):
2585 thumbnails.sort(key=lambda t: (
2586 t.get('preference') if t.get('preference') is not None else -1,
2587 t.get('width') if t.get('width') is not None else -1,
2588 t.get('height') if t.get('height') is not None else -1,
2589 t.get('id') if t.get('id') is not None else '',
2590 t.get('url')))
2591
b0249bca 2592 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2593 thumbnails = info_dict.get('thumbnails')
2594 if thumbnails is None:
2595 thumbnail = info_dict.get('thumbnail')
2596 if thumbnail:
2597 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2598 if not thumbnails:
2599 return
2600
2601 def check_thumbnails(thumbnails):
2602 for t in thumbnails:
2603 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2604 try:
2605 self.urlopen(HEADRequest(t['url']))
2606 except network_exceptions as err:
2607 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2608 continue
2609 yield t
2610
2611 self._sort_thumbnails(thumbnails)
2612 for i, t in enumerate(thumbnails):
2613 if t.get('id') is None:
add96eb9 2614 t['id'] = str(i)
9f1a1c36 2615 if t.get('width') and t.get('height'):
2616 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2617 t['url'] = sanitize_url(t['url'])
2618
2619 if self.params.get('check_formats') is True:
282f5709 2620 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2621 else:
2622 info_dict['thumbnails'] = thumbnails
bc516a3f 2623
94dc8604 2624 def _fill_common_fields(self, info_dict, final=True):
03f83004 2625 # TODO: move sanitization here
94dc8604 2626 if final:
7aefd19a 2627 title = info_dict['fulltitle'] = info_dict.get('title')
d4736fdb 2628 if not title:
2629 if title == '':
2630 self.write_debug('Extractor gave empty title. Creating a generic title')
2631 else:
2632 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2633 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2634
2635 if info_dict.get('duration') is not None:
2636 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2637
2638 for ts_key, date_key in (
2639 ('timestamp', 'upload_date'),
2640 ('release_timestamp', 'release_date'),
2641 ('modified_timestamp', 'modified_date'),
2642 ):
2643 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2644 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2645 # see http://bugs.python.org/issue1646728)
19a03940 2646 with contextlib.suppress(ValueError, OverflowError, OSError):
c305a25c 2647 upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc)
03f83004 2648 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004 2649
1732eccc 2650 if not info_dict.get('release_year'):
2651 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2652
03f83004
LNO
2653 live_keys = ('is_live', 'was_live')
2654 live_status = info_dict.get('live_status')
2655 if live_status is None:
2656 for key in live_keys:
2657 if info_dict.get(key) is False:
2658 continue
2659 if info_dict.get(key):
2660 live_status = key
2661 break
2662 if all(info_dict.get(key) is False for key in live_keys):
2663 live_status = 'not_live'
2664 if live_status:
2665 info_dict['live_status'] = live_status
2666 for key in live_keys:
2667 if info_dict.get(key) is None:
2668 info_dict[key] = (live_status == key)
a057779d 2669 if live_status == 'post_live':
2670 info_dict['was_live'] = True
03f83004
LNO
2671
2672 # Auto generate title fields corresponding to the *_number fields when missing
2673 # in order to always have clean titles. This is very common for TV series.
2674 for field in ('chapter', 'season', 'episode'):
add96eb9 2675 if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field):
2676 info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number'])
03f83004 2677
104a7b5a
L
2678 for old_key, new_key in self._deprecated_multivalue_fields.items():
2679 if new_key in info_dict and old_key in info_dict:
b136e2af 2680 if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json
2681 self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present')
104a7b5a
L
2682 elif old_value := info_dict.get(old_key):
2683 info_dict[new_key] = old_value.split(', ')
2684 elif new_value := info_dict.get(new_key):
2685 info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value)
2686
415f8d51 2687 def _raise_pending_errors(self, info):
2688 err = info.pop('__pending_error', None)
2689 if err:
2690 self.report_error(err, tb=False)
2691
784320c9 2692 def sort_formats(self, info_dict):
2693 formats = self._get_formats(info_dict)
784320c9 2694 formats.sort(key=FormatSorter(
c154302c 2695 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
784320c9 2696
dd82ffea
JMF
2697 def process_video_result(self, info_dict, download=True):
2698 assert info_dict.get('_type', 'video') == 'video'
9c906919 2699 self._num_videos += 1
dd82ffea 2700
bec1fad2 2701 if 'id' not in info_dict:
fc08bdd6 2702 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2703 elif not info_dict.get('id'):
2704 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2705
c9969434
S
2706 def report_force_conversion(field, field_not, conversion):
2707 self.report_warning(
add96eb9 2708 f'"{field}" field is not {field_not} - forcing {conversion} conversion, '
2709 'there is an error in extractor')
c9969434
S
2710
2711 def sanitize_string_field(info, string_field):
2712 field = info.get(string_field)
14f25df2 2713 if field is None or isinstance(field, str):
c9969434
S
2714 return
2715 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2716 info[string_field] = str(field)
c9969434
S
2717
2718 def sanitize_numeric_fields(info):
2719 for numeric_field in self._NUMERIC_FIELDS:
2720 field = info.get(numeric_field)
f9934b96 2721 if field is None or isinstance(field, (int, float)):
c9969434
S
2722 continue
2723 report_force_conversion(numeric_field, 'numeric', 'int')
2724 info[numeric_field] = int_or_none(field)
2725
2726 sanitize_string_field(info_dict, 'id')
2727 sanitize_numeric_fields(info_dict)
3975b4d2 2728 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2729 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2730 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2731 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2732
9eef7c4e 2733 chapters = info_dict.get('chapters') or []
a3976e07 2734 if chapters and chapters[0].get('start_time'):
2735 chapters.insert(0, {'start_time': 0})
2736
9eef7c4e 2737 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2738 for idx, (prev, current, next_) in enumerate(zip(
2739 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2740 if current.get('start_time') is None:
2741 current['start_time'] = prev.get('end_time')
2742 if not current.get('end_time'):
2743 current['end_time'] = next_.get('start_time')
a3976e07 2744 if not current.get('title'):
2745 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2746
dd82ffea
JMF
2747 if 'playlist' not in info_dict:
2748 # It isn't part of a playlist
2749 info_dict['playlist'] = None
2750 info_dict['playlist_index'] = None
2751
bc516a3f 2752 self._sanitize_thumbnails(info_dict)
d5519808 2753
536a55da 2754 thumbnail = info_dict.get('thumbnail')
bc516a3f 2755 thumbnails = info_dict.get('thumbnails')
536a55da
S
2756 if thumbnail:
2757 info_dict['thumbnail'] = sanitize_url(thumbnail)
2758 elif thumbnails:
d5519808
PH
2759 info_dict['thumbnail'] = thumbnails[-1]['url']
2760
ae30b840 2761 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2762 info_dict['display_id'] = info_dict['id']
2763
03f83004 2764 self._fill_common_fields(info_dict)
33d2fc2f 2765
05108a49
S
2766 for cc_kind in ('subtitles', 'automatic_captions'):
2767 cc = info_dict.get(cc_kind)
2768 if cc:
2769 for _, subtitle in cc.items():
2770 for subtitle_format in subtitle:
2771 if subtitle_format.get('url'):
2772 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2773 if subtitle_format.get('ext') is None:
2774 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2775
2776 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2777 subtitles = info_dict.get('subtitles')
4bba3716 2778
360e1ca5 2779 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2780 info_dict['id'], subtitles, automatic_captions)
a504ced0 2781
aebb4f4b 2782 formats = self._get_formats(info_dict)
dd82ffea 2783
c154302c 2784 # Backward compatibility with InfoExtractor._sort_formats
9ebac355 2785 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
c154302c 2786 if field_preference:
2787 info_dict['_format_sort_fields'] = field_preference
2788
bc344cd4 2789 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2790 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
88acdbc2 2791 if not self.params.get('allow_unplayable_formats'):
bc344cd4 2792 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
17ffed18 2793
2794 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2795 self.report_warning(
2796 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2797 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2798
319b6059 2799 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2800 if not get_from_start:
c305a25c 2801 info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M')
319b6059 2802 if info_dict.get('is_live') and formats:
adbc4ec4 2803 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2804 if get_from_start and not formats:
a44ca5a4 2805 self.raise_no_formats(info_dict, msg=(
2806 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2807 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2808
73af5cc8
S
2809 def is_wellformed(f):
2810 url = f.get('url')
a5ac0c47 2811 if not url:
73af5cc8
S
2812 self.report_warning(
2813 '"url" field is missing or empty - skipping format, '
2814 'there is an error in extractor')
a5ac0c47
S
2815 return False
2816 if isinstance(url, bytes):
2817 sanitize_string_field(f, 'url')
2818 return True
73af5cc8
S
2819
2820 # Filter out malformed formats for better extraction robustness
1ac7f461 2821 formats = list(filter(is_wellformed, formats or []))
2822
2823 if not formats:
2824 self.raise_no_formats(info_dict)
73af5cc8 2825
add96eb9 2826 for fmt in formats:
2827 sanitize_string_field(fmt, 'format_id')
2828 sanitize_numeric_fields(fmt)
2829 fmt['url'] = sanitize_url(fmt['url'])
2830 if fmt.get('ext') is None:
2831 fmt['ext'] = determine_ext(fmt['url']).lower()
2832 if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'):
2833 if fmt.get('acodec') is None:
2834 fmt['acodec'] = fmt['ext']
2835 if fmt.get('protocol') is None:
2836 fmt['protocol'] = determine_protocol(fmt)
2837 if fmt.get('resolution') is None:
2838 fmt['resolution'] = self.format_resolution(fmt, default=None)
2839 if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none':
2840 fmt['dynamic_range'] = 'SDR'
2841 if fmt.get('aspect_ratio') is None:
2842 fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2))
10025b71 2843 # For fragmented formats, "tbr" is often max bitrate and not average
add96eb9 2844 if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url'))
2845 and not fmt.get('filesize') and not fmt.get('filesize_approx')):
2846 fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration'))
2847 fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True)
6c5211ce 2848
2849 # Safeguard against old/insecure infojson when using --load-info-json
2850 if info_dict.get('http_headers'):
2851 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2852 info_dict['http_headers'].pop('Cookie', None)
39f32f17 2853
2854 # This is copied to http_headers by the above _calc_headers and can now be removed
2855 if '__x_forwarded_for_ip' in info_dict:
2856 del info_dict['__x_forwarded_for_ip']
2857
c154302c 2858 self.sort_formats({
2859 'formats': formats,
add96eb9 2860 '_format_sort_fields': info_dict.get('_format_sort_fields'),
c154302c 2861 })
39f32f17 2862
2863 # Sanitize and group by format_id
2864 formats_dict = {}
add96eb9 2865 for i, fmt in enumerate(formats):
2866 if not fmt.get('format_id'):
2867 fmt['format_id'] = str(i)
e2effb08
S
2868 else:
2869 # Sanitize format_id from characters used in format selector expression
add96eb9 2870 fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id'])
2871 formats_dict.setdefault(fmt['format_id'], []).append(fmt)
181c7053
S
2872
2873 # Make sure all formats have unique format_id
03b4de72 2874 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2875 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2876 ambigious_id = len(ambiguous_formats) > 1
add96eb9 2877 for i, fmt in enumerate(ambiguous_formats):
48ee10ee 2878 if ambigious_id:
add96eb9 2879 fmt['format_id'] = f'{format_id}-{i}'
48ee10ee 2880 # Ensure there is no conflict between id and ext in format selection
2881 # See https://github.com/yt-dlp/yt-dlp/issues/1282
add96eb9 2882 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts:
2883 fmt['format_id'] = 'f{}'.format(fmt['format_id'])
2884
2885 if fmt.get('format') is None:
2886 fmt['format'] = '{id} - {res}{note}'.format(
2887 id=fmt['format_id'],
2888 res=self.format_resolution(fmt),
2889 note=format_field(fmt, 'format_note', ' (%s)'),
39f32f17 2890 )
dd82ffea 2891
9f1a1c36 2892 if self.params.get('check_formats') is True:
282f5709 2893 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2894
88acdbc2 2895 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2896 # only set the 'formats' fields if the original info_dict list them
2897 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2898 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2899 # which can't be exported to json
b3d9ef88 2900 info_dict['formats'] = formats
4ec82a72 2901
2902 info_dict, _ = self.pre_process(info_dict)
2903
6db9c4d5 2904 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2905 return info_dict
2906
2907 self.post_extract(info_dict)
2908 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2909
093a1710 2910 # The pre-processors may have modified the formats
aebb4f4b 2911 formats = self._get_formats(info_dict)
093a1710 2912
e4221b70 2913 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2914 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2915 if self.params.get('list_thumbnails'):
2916 self.list_thumbnails(info_dict)
b7b04c78 2917 if self.params.get('listsubtitles'):
2918 if 'automatic_captions' in info_dict:
2919 self.list_subtitles(
2920 info_dict['id'], automatic_captions, 'automatic captions')
2921 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2922 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2923 self.list_formats(info_dict)
169dbde9 2924 if list_only:
b7b04c78 2925 # Without this printing, -F --print-json will not work
17060584 2926 self.__forced_printings(info_dict)
c487cf00 2927 return info_dict
bfaae0a7 2928
187986a8 2929 format_selector = self.format_selector
fa9f30b8 2930 while True:
2931 if interactive_format_selection:
372a0f3b
IS
2932 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2933 + '(Press ENTER for default, or Ctrl+C to quit)'
2934 + self._format_screen(': ', self.Styles.EMPHASIS))
fa9f30b8 2935 try:
372a0f3b 2936 format_selector = self.build_format_selector(req_format) if req_format else None
fa9f30b8 2937 except SyntaxError as err:
2938 self.report_error(err, tb=False, is_error=False)
2939 continue
2940
372a0f3b
IS
2941 if format_selector is None:
2942 req_format = self._default_format_spec(info_dict, download=download)
2943 self.write_debug(f'Default format spec: {req_format}')
2944 format_selector = self.build_format_selector(req_format)
2945
96da9525 2946 formats_to_download = self._select_formats(formats, format_selector)
fa9f30b8 2947 if interactive_format_selection and not formats_to_download:
2948 self.report_error('Requested format is not available', tb=False, is_error=False)
2949 continue
2950 break
317f7ab6 2951
dd82ffea 2952 if not formats_to_download:
b7da73eb 2953 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2954 raise ExtractorError(
2955 'Requested format is not available. Use --list-formats for a list of available formats',
2956 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2957 self.report_warning('Requested format is not available')
2958 # Process what we can, even without any available formats.
2959 formats_to_download = [{}]
a13e6848 2960
0500ee3d 2961 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2962 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2963 if download:
0500ee3d 2964 if best_format and requested_ranges:
5ec1b6b7 2965 def to_screen(*msg):
2966 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2967
2968 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2969 (f['format_id'] for f in formats_to_download))
0500ee3d 2970 if requested_ranges != ({}, ):
5ec1b6b7 2971 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2972 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2973 max_downloads_reached = False
5ec1b6b7 2974
0500ee3d 2975 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2976 new_info = self._copy_infodict(info_dict)
b7da73eb 2977 new_info.update(fmt)
3975b4d2 2978 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2979 end_time = offset + min(chapter.get('end_time', duration), duration)
b4e0d758 2980 # duration may not be accurate. So allow deviations <1sec
2981 if end_time == float('inf') or end_time > offset + duration + 1:
2982 end_time = None
3975b4d2 2983 if chapter or offset:
5ec1b6b7 2984 new_info.update({
3975b4d2 2985 'section_start': offset + chapter.get('start_time', 0),
b4e0d758 2986 'section_end': end_time,
5ec1b6b7 2987 'section_title': chapter.get('title'),
2988 'section_number': chapter.get('index'),
2989 })
2990 downloaded_formats.append(new_info)
a13e6848 2991 try:
2992 self.process_info(new_info)
2993 except MaxDownloadsReached:
2994 max_downloads_reached = True
415f8d51 2995 self._raise_pending_errors(new_info)
f46e2f9d 2996 # Remove copied info
2997 for key, val in tuple(new_info.items()):
2998 if info_dict.get(key) == val:
2999 new_info.pop(key)
a13e6848 3000 if max_downloads_reached:
3001 break
ebed8b37 3002
5ec1b6b7 3003 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 3004 assert write_archive.issubset({True, False, 'ignore'})
3005 if True in write_archive and False not in write_archive:
3006 self.record_download_archive(info_dict)
be72c624 3007
5ec1b6b7 3008 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 3009 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 3010 if max_downloads_reached:
add96eb9 3011 raise MaxDownloadsReached
ebed8b37 3012
49a57e70 3013 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 3014 info_dict.update(best_format)
dd82ffea
JMF
3015 return info_dict
3016
98c70d6f 3017 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 3018 """Select the requested subtitles and their format"""
d8a58ddc 3019 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
3020 if normal_subtitles and self.params.get('writesubtitles'):
3021 available_subs.update(normal_subtitles)
d8a58ddc 3022 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
3023 if automatic_captions and self.params.get('writeautomaticsub'):
3024 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
3025 if lang not in available_subs:
3026 available_subs[lang] = cap_info
3027
d2c8aadf 3028 if not available_subs or (
3029 not self.params.get('writesubtitles')
3030 and not self.params.get('writeautomaticsub')):
4d171848 3031 return None
a504ced0 3032
d8a58ddc 3033 all_sub_langs = tuple(available_subs.keys())
a504ced0 3034 if self.params.get('allsubtitles', False):
c32b0aab 3035 requested_langs = all_sub_langs
3036 elif self.params.get('subtitleslangs', False):
5314b521 3037 try:
3038 requested_langs = orderedSet_from_options(
3039 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
3040 except re.error as e:
3041 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
a504ced0 3042 else:
376aa24b
SS
3043 requested_langs = LazyList(itertools.chain(
3044 ['en'] if 'en' in normal_sub_langs else [],
3045 filter(lambda f: f.startswith('en'), normal_sub_langs),
3046 ['en'] if 'en' in all_sub_langs else [],
3047 filter(lambda f: f.startswith('en'), all_sub_langs),
3048 normal_sub_langs, all_sub_langs,
3049 ))[:1]
ad3dc496 3050 if requested_langs:
d2c8aadf 3051 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
3052
3053 formats_query = self.params.get('subtitlesformat', 'best')
3054 formats_preference = formats_query.split('/') if formats_query else []
3055 subs = {}
3056 for lang in requested_langs:
3057 formats = available_subs.get(lang)
3058 if formats is None:
86e5f3ed 3059 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 3060 continue
a504ced0
JMF
3061 for ext in formats_preference:
3062 if ext == 'best':
3063 f = formats[-1]
3064 break
3065 matches = list(filter(lambda f: f['ext'] == ext, formats))
3066 if matches:
3067 f = matches[-1]
3068 break
3069 else:
3070 f = formats[-1]
3071 self.report_warning(
add96eb9 3072 'No subtitle format found matching "{}" for language {}, '
3073 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext']))
a504ced0
JMF
3074 subs[lang] = f
3075 return subs
3076
bb66c247 3077 def _forceprint(self, key, info_dict):
3078 if info_dict is None:
3079 return
3080 info_copy = info_dict.copy()
17060584 3081 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3082 if info_dict.get('requested_formats') is not None:
3083 # For RTMP URLs, also include the playpath
3084 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3085 elif info_dict.get('url'):
3086 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
bb66c247 3087 info_copy['formats_table'] = self.render_formats_table(info_dict)
3088 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3089 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3090 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3091
3092 def format_tmpl(tmpl):
48c8424b 3093 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 3094 if not mobj:
3095 return tmpl
48c8424b 3096
3097 fmt = '%({})s'
3098 if tmpl.startswith('{'):
6f2287cb 3099 tmpl, fmt = f'.{tmpl}', '%({})j'
48c8424b 3100 if tmpl.endswith('='):
3101 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3102 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 3103
bb66c247 3104 for tmpl in self.params['forceprint'].get(key, []):
3105 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3106
3107 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 3108 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 3109 tmpl = format_tmpl(tmpl)
3110 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 3111 if self._ensure_dir_exists(filename):
9874e82b 3112 with open(filename, 'a', encoding='utf-8', newline='') as f:
3113 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
ca30f449 3114
17060584 3115 return info_copy
3116
3117 def __forced_printings(self, info_dict, filename=None, incomplete=True):
bb66c247 3118 if (self.params.get('forcejson')
3119 or self.params['forceprint'].get('video')
3120 or self.params['print_to_file'].get('video')):
2b8a2973 3121 self.post_extract(info_dict)
17060584 3122 if filename:
3123 info_dict['filename'] = filename
b5f61b69 3124 info_copy = self._forceprint('video', info_dict)
3125
3126 def print_field(field, actual_field=None, optional=False):
3127 if actual_field is None:
3128 actual_field = field
3129 if self.params.get(f'force{field}') and (
3130 info_copy.get(field) is not None or (not optional and not incomplete)):
3131 self.to_stdout(info_copy[actual_field])
3132
3133 print_field('title')
3134 print_field('id')
3135 print_field('url', 'urls')
3136 print_field('thumbnail', optional=True)
3137 print_field('description', optional=True)
ad54c913 3138 print_field('filename')
b5f61b69 3139 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3140 self.to_stdout(formatSeconds(info_copy['duration']))
3141 print_field('format')
53c18592 3142
2b8a2973 3143 if self.params.get('forcejson'):
6e84b215 3144 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 3145
e8e73840 3146 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 3147 if not info.get('url'):
1151c407 3148 self.raise_no_formats(info, True)
e8e73840 3149
3150 if test:
3151 verbose = self.params.get('verbose')
3152 params = {
3153 'test': True,
a169858f 3154 'quiet': self.params.get('quiet') or not verbose,
e8e73840 3155 'verbose': verbose,
3156 'noprogress': not verbose,
3157 'nopart': True,
3158 'skip_unavailable_fragments': False,
3159 'keep_fragments': False,
3160 'overwrites': True,
3161 '_no_ytdl_file': True,
3162 }
3163 else:
3164 params = self.params
96fccc10 3165 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 3166 if not test:
3167 for ph in self._progress_hooks:
3168 fd.add_progress_hook(ph)
42676437
M
3169 urls = '", "'.join(
3170 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3171 for f in info.get('requested_formats', []) or [info])
3a408f9d 3172 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 3173
adbc4ec4
THD
3174 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3175 # But it may contain objects that are not deep-copyable
3176 new_info = self._copy_infodict(info)
e8e73840 3177 if new_info.get('http_headers') is None:
3178 new_info['http_headers'] = self._calc_headers(new_info)
3179 return fd.download(name, new_info, subtitle)
3180
e04938ab 3181 def existing_file(self, filepaths, *, default_overwrite=True):
3182 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3183 if existing_files and not self.params.get('overwrites', default_overwrite):
3184 return existing_files[0]
3185
3186 for file in existing_files:
3187 self.report_file_delete(file)
3188 os.remove(file)
3189 return None
3190
8222d8de 3191 def process_info(self, info_dict):
09b49e1f 3192 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
3193
3194 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 3195 original_infodict = info_dict
fd288278 3196
4513a41a 3197 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
3198 info_dict['format'] = info_dict['ext']
3199
c77495e3 3200 if self._match_entry(info_dict) is not None:
9e907ebd 3201 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
3202 return
3203
09b49e1f 3204 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 3205 self.post_extract(info_dict)
119e40ef 3206
3207 def replace_info_dict(new_info):
3208 nonlocal info_dict
3209 if new_info == info_dict:
3210 return
3211 info_dict.clear()
3212 info_dict.update(new_info)
3213
3214 new_info, _ = self.pre_process(info_dict, 'video')
3215 replace_info_dict(new_info)
0c14d66a 3216 self._num_downloads += 1
8222d8de 3217
dcf64d43 3218 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 3219 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3220 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 3221 files_to_move = {}
8222d8de
JMF
3222
3223 # Forced printings
4513a41a 3224 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 3225
ca6d59d2 3226 def check_max_downloads():
3227 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
add96eb9 3228 raise MaxDownloadsReached
ca6d59d2 3229
b7b04c78 3230 if self.params.get('simulate'):
9e907ebd 3231 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3232 check_max_downloads()
8222d8de
JMF
3233 return
3234
de6000d9 3235 if full_filename is None:
8222d8de 3236 return
e92caff5 3237 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3238 return
e92caff5 3239 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3240 return
3241
80c03fa9 3242 if self._write_description('video', info_dict,
3243 self.prepare_filename(info_dict, 'description')) is None:
3244 return
3245
3246 sub_files = self._write_subtitles(info_dict, temp_filename)
3247 if sub_files is None:
3248 return
3249 files_to_move.update(dict(sub_files))
3250
3251 thumb_files = self._write_thumbnails(
3252 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3253 if thumb_files is None:
3254 return
3255 files_to_move.update(dict(thumb_files))
8222d8de 3256
80c03fa9 3257 infofn = self.prepare_filename(info_dict, 'infojson')
3258 _infojson_written = self._write_info_json('video', info_dict, infofn)
3259 if _infojson_written:
dac5df5a 3260 info_dict['infojson_filename'] = infofn
e75bb0d6 3261 # For backward compatibility, even though it was a private field
80c03fa9 3262 info_dict['__infojson_filename'] = infofn
3263 elif _infojson_written is None:
3264 return
3265
3266 # Note: Annotations are deprecated
3267 annofn = None
1fb07d10 3268 if self.params.get('writeannotations', False):
de6000d9 3269 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3270 if annofn:
e92caff5 3271 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3272 return
0c3d0f51 3273 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3274 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3275 elif not info_dict.get('annotations'):
3276 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3277 else:
3278 try:
6febd1c1 3279 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3280 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3281 annofile.write(info_dict['annotations'])
3282 except (KeyError, TypeError):
6febd1c1 3283 self.report_warning('There are no annotations to write.')
86e5f3ed 3284 except OSError:
6febd1c1 3285 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3286 return
1fb07d10 3287
732044af 3288 # Write internet shortcut files
08438d2c 3289 def _write_link_file(link_type):
60f3e995 3290 url = try_get(info_dict['webpage_url'], iri_to_uri)
3291 if not url:
3292 self.report_warning(
3293 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3294 return True
08438d2c 3295 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3296 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3297 return False
10e3742e 3298 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3299 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3300 return True
3301 try:
3302 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3303 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3304 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3305 template_vars = {'url': url}
08438d2c 3306 if link_type == 'desktop':
3307 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3308 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3309 except OSError:
08438d2c 3310 self.report_error(f'Cannot write internet shortcut {linkfn}')
3311 return False
732044af 3312 return True
3313
08438d2c 3314 write_links = {
3315 'url': self.params.get('writeurllink'),
3316 'webloc': self.params.get('writewebloclink'),
3317 'desktop': self.params.get('writedesktoplink'),
3318 }
3319 if self.params.get('writelink'):
3320 link_type = ('webloc' if sys.platform == 'darwin'
3321 else 'desktop' if sys.platform.startswith('linux')
3322 else 'url')
3323 write_links[link_type] = True
3324
3325 if any(should_write and not _write_link_file(link_type)
3326 for link_type, should_write in write_links.items()):
3327 return
732044af 3328
415f8d51 3329 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3330 replace_info_dict(new_info)
56d868db 3331
a13e6848 3332 if self.params.get('skip_download'):
56d868db 3333 info_dict['filepath'] = temp_filename
3334 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3335 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3336 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3337 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3338 else:
3339 # Download
b868936c 3340 info_dict.setdefault('__postprocessors', [])
4340deca 3341 try:
0202b52a 3342
e04938ab 3343 def existing_video_file(*filepaths):
6b591b29 3344 ext = info_dict.get('ext')
e04938ab 3345 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3346 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3347 default_overwrite=False)
3348 if file:
3349 info_dict['ext'] = os.path.splitext(file)[1][1:]
3350 return file
0202b52a 3351
7b2c3f47 3352 fd, success = None, True
fccf90e7 3353 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3354 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
62b5c94c 3355 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3356 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3357 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3358 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3359 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3360 return
5ec1b6b7 3361
4340deca 3362 if info_dict.get('requested_formats') is not None:
0202b52a 3363 old_ext = info_dict['ext']
4e3b637d 3364 if self.params.get('merge_output_format') is None:
4e3b637d 3365 if (info_dict['ext'] == 'webm'
3366 and info_dict.get('thumbnails')
3367 # check with type instead of pp_key, __name__, or isinstance
3368 # since we dont want any custom PPs to trigger this
c487cf00 3369 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3370 info_dict['ext'] = 'mkv'
3371 self.report_warning(
3372 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3373 new_ext = info_dict['ext']
0202b52a 3374
124bc071 3375 def correct_ext(filename, ext=new_ext):
96fccc10 3376 if filename == '-':
3377 return filename
0202b52a 3378 filename_real_ext = os.path.splitext(filename)[1][1:]
3379 filename_wo_ext = (
3380 os.path.splitext(filename)[0]
124bc071 3381 if filename_real_ext in (old_ext, new_ext)
0202b52a 3382 else filename)
86e5f3ed 3383 return f'{filename_wo_ext}.{ext}'
0202b52a 3384
38c6902b 3385 # Ensure filename always has a correct extension for successful merge
0202b52a 3386 full_filename = correct_ext(full_filename)
3387 temp_filename = correct_ext(temp_filename)
e04938ab 3388 dl_filename = existing_video_file(full_filename, temp_filename)
ad54c913 3389
1ea24129 3390 info_dict['__real_download'] = False
84078a8b 3391 # NOTE: Copy so that original format dicts are not modified
3392 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
18e674b4 3393
7b2c3f47 3394 merger = FFmpegMergerPP(self)
adbc4ec4 3395 downloaded = []
dbf5416a 3396 if dl_filename is not None:
6c7274ec 3397 self.report_file_already_downloaded(dl_filename)
adbc4ec4 3398 elif fd:
ad54c913 3399 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
adbc4ec4
THD
3400 f['filepath'] = fname = prepend_extension(
3401 correct_ext(temp_filename, info_dict['ext']),
add96eb9 3402 'f{}'.format(f['format_id']), info_dict['ext'])
adbc4ec4 3403 downloaded.append(fname)
ad54c913 3404 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
dbf5416a 3405 success, real_download = self.dl(temp_filename, info_dict)
3406 info_dict['__real_download'] = real_download
18e674b4 3407 else:
18e674b4 3408 if self.params.get('allow_unplayable_formats'):
3409 self.report_warning(
3410 'You have requested merging of multiple formats '
3411 'while also allowing unplayable formats to be downloaded. '
3412 'The formats won\'t be merged to prevent data corruption.')
3413 elif not merger.available:
e8969bda 3414 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3415 if not self.params.get('ignoreerrors'):
3416 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3417 return
3418 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3419
96fccc10 3420 if temp_filename == '-':
adbc4ec4 3421 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3422 else 'but the formats are incompatible for simultaneous download' if merger.available
3423 else 'but ffmpeg is not installed')
3424 self.report_warning(
3425 f'You have requested downloading multiple formats to stdout {reason}. '
3426 'The formats will be streamed one after the other')
3427 fname = temp_filename
ad54c913 3428 for f in info_dict['requested_formats']:
dbf5416a 3429 new_info = dict(info_dict)
3430 del new_info['requested_formats']
3431 new_info.update(f)
96fccc10 3432 if temp_filename != '-':
124bc071 3433 fname = prepend_extension(
3434 correct_ext(temp_filename, new_info['ext']),
add96eb9 3435 'f{}'.format(f['format_id']), new_info['ext'])
96fccc10 3436 if not self._ensure_dir_exists(fname):
3437 return
a21e0ab1 3438 f['filepath'] = fname
96fccc10 3439 downloaded.append(fname)
dbf5416a 3440 partial_success, real_download = self.dl(fname, new_info)
3441 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3442 success = success and partial_success
adbc4ec4
THD
3443
3444 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3445 info_dict['__postprocessors'].append(merger)
3446 info_dict['__files_to_merge'] = downloaded
3447 # Even if there were no downloads, it is being merged only now
3448 info_dict['__real_download'] = True
3449 else:
3450 for file in downloaded:
3451 files_to_move[file] = None
4340deca
P
3452 else:
3453 # Just a single file
e04938ab 3454 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3455 if dl_filename is None or dl_filename == temp_filename:
3456 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3457 # So we should try to resume the download
e8e73840 3458 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3459 info_dict['__real_download'] = real_download
6c7274ec 3460 else:
3461 self.report_file_already_downloaded(dl_filename)
0202b52a 3462
0202b52a 3463 dl_filename = dl_filename or temp_filename
c571435f 3464 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3465
3158150c 3466 except network_exceptions as err:
add96eb9 3467 self.report_error(f'unable to download video data: {err}')
4340deca 3468 return
86e5f3ed 3469 except OSError as err:
4340deca 3470 raise UnavailableVideoError(err)
add96eb9 3471 except ContentTooShortError as err:
86e5f3ed 3472 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3473 return
8222d8de 3474
415f8d51 3475 self._raise_pending_errors(info_dict)
de6000d9 3476 if success and full_filename != '-':
f17f8651 3477
fd7cfb64 3478 def fixup():
3479 do_fixup = True
3480 fixup_policy = self.params.get('fixup')
3481 vid = info_dict['id']
3482
3483 if fixup_policy in ('ignore', 'never'):
3484 return
3485 elif fixup_policy == 'warn':
3fe75fdc 3486 do_fixup = 'warn'
f89b3e2d 3487 elif fixup_policy != 'force':
3488 assert fixup_policy in ('detect_or_warn', None)
3489 if not info_dict.get('__real_download'):
3490 do_fixup = False
fd7cfb64 3491
3492 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3493 if not (do_fixup and cndn):
fd7cfb64 3494 return
3fe75fdc 3495 elif do_fixup == 'warn':
fd7cfb64 3496 self.report_warning(f'{vid}: {msg}')
3497 return
3498 pp = cls(self)
3499 if pp.available:
3500 info_dict['__postprocessors'].append(pp)
3501 else:
3502 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3503
3504 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3505 ffmpeg_fixup(stretched_ratio not in (1, None),
3506 f'Non-uniform pixel ratio {stretched_ratio}',
3507 FFmpegFixupStretchedPP)
fd7cfb64 3508
993191c0 3509 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3510 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3511
ca9def71
LNO
3512 ext = info_dict.get('ext')
3513 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3514 isinstance(pp, FFmpegVideoConvertorPP)
3515 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
f73c1180 3516 ) for pp in self._pps['post_process'])
ca9def71
LNO
3517
3518 if not postprocessed_by_ffmpeg:
f73c1180 3519 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3520 and info_dict.get('container') == 'm4a_dash',
f2df4071 3521 'writing DASH m4a. Only some players support this container',
3522 FFmpegFixupM4aPP)
24146491 3523 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3524 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3525 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3526 FFmpegFixupM3u8PP)
4ce57d3b
A
3527 ffmpeg_fixup(downloader == 'dashsegments'
3528 and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
adbc4ec4
THD
3529 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3530
24146491 3531 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3532 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3533
3534 fixup()
8222d8de 3535 try:
f46e2f9d 3536 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3537 except PostProcessingError as err:
add96eb9 3538 self.report_error(f'Postprocessing: {err}')
8222d8de 3539 return
ab8e5e51
AM
3540 try:
3541 for ph in self._post_hooks:
23c1a667 3542 ph(info_dict['filepath'])
ab8e5e51 3543 except Exception as err:
add96eb9 3544 self.report_error(f'post hooks: {err}')
ab8e5e51 3545 return
9e907ebd 3546 info_dict['__write_download_archive'] = True
2d30509f 3547
c487cf00 3548 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3549 if self.params.get('force_write_download_archive'):
9e907ebd 3550 info_dict['__write_download_archive'] = True
ca6d59d2 3551 check_max_downloads()
8222d8de 3552
aa9369a2 3553 def __download_wrapper(self, func):
3554 @functools.wraps(func)
3555 def wrapper(*args, **kwargs):
3556 try:
3557 res = func(*args, **kwargs)
3558 except UnavailableVideoError as e:
3559 self.report_error(e)
b222c271 3560 except DownloadCancelled as e:
3561 self.to_screen(f'[info] {e}')
3562 if not self.params.get('break_per_url'):
3563 raise
fd404bec 3564 self._num_downloads = 0
aa9369a2 3565 else:
3566 if self.params.get('dump_single_json', False):
3567 self.post_extract(res)
3568 self.to_stdout(json.dumps(self.sanitize_info(res)))
3569 return wrapper
3570
8222d8de
JMF
3571 def download(self, url_list):
3572 """Download a given list of URLs."""
aa9369a2 3573 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3574 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3575 if (len(url_list) > 1
3576 and outtmpl != '-'
3577 and '%' not in outtmpl
3578 and self.params.get('max_downloads') != 1):
acd69589 3579 raise SameFileError(outtmpl)
8222d8de
JMF
3580
3581 for url in url_list:
aa9369a2 3582 self.__download_wrapper(self.extract_info)(
3583 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3584
3585 return self._download_retcode
3586
1dcc4c0c 3587 def download_with_info_file(self, info_filename):
31bd3925
JMF
3588 with contextlib.closing(fileinput.FileInput(
3589 [info_filename], mode='r',
3590 openhook=fileinput.hook_encoded('utf-8'))) as f:
3591 # FileInput doesn't have a read method, we can't call json.load
ab1de9cb 3592 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3593 for info in variadic(json.loads('\n'.join(f)))]
3594 for info in infos:
3595 try:
3596 self.__download_wrapper(self.process_ie_result)(info, download=True)
3597 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3598 if not isinstance(e, EntryNotInPlaylist):
3599 self.to_stderr('\r')
3600 webpage_url = info.get('webpage_url')
3601 if webpage_url is None:
3602 raise
aa9369a2 3603 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
ab1de9cb 3604 self.download([webpage_url])
263a4b55 3605 except ExtractorError as e:
3606 self.report_error(e)
d4943898 3607 return self._download_retcode
1dcc4c0c 3608
cb202fd2 3609 @staticmethod
8012d892 3610 def sanitize_info(info_dict, remove_private_keys=False):
add96eb9 3611 """ Sanitize the infodict for converting to json """
3ad56b42 3612 if info_dict is None:
3613 return info_dict
6e84b215 3614 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3615 info_dict.setdefault('_type', 'video')
b5e7a2e6 3616 info_dict.setdefault('_version', {
3617 'version': __version__,
3618 'current_git_head': current_git_head(),
3619 'release_git_head': RELEASE_GIT_HEAD,
20314dd4 3620 'repository': ORIGIN,
b5e7a2e6 3621 })
09b49e1f 3622
8012d892 3623 if remove_private_keys:
0a5a191a 3624 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3625 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
6f2287cb 3626 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
595ea4a9 3627 'playlist_autonumber',
6e84b215 3628 }
ae8f99e6 3629 else:
09b49e1f 3630 reject = lambda k, v: False
adbc4ec4
THD
3631
3632 def filter_fn(obj):
3633 if isinstance(obj, dict):
3634 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3635 elif isinstance(obj, (list, tuple, set, LazyList)):
3636 return list(map(filter_fn, obj))
3637 elif obj is None or isinstance(obj, (str, int, float, bool)):
3638 return obj
3639 else:
3640 return repr(obj)
3641
5226731e 3642 return filter_fn(info_dict)
cb202fd2 3643
8012d892 3644 @staticmethod
3645 def filter_requested_info(info_dict, actually_filter=True):
add96eb9 3646 """ Alias of sanitize_info for backward compatibility """
8012d892 3647 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3648
43d7f5a5 3649 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3650 for filename in set(filter(None, files_to_delete)):
3651 if msg:
3652 self.to_screen(msg % filename)
3653 try:
3654 os.remove(filename)
3655 except OSError:
3656 self.report_warning(f'Unable to delete file {filename}')
3657 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3658 del info['__files_to_move'][filename]
3659
ed5835b4 3660 @staticmethod
3661 def post_extract(info_dict):
3662 def actual_post_extract(info_dict):
3663 if info_dict.get('_type') in ('playlist', 'multi_video'):
3664 for video_dict in info_dict.get('entries', {}):
3665 actual_post_extract(video_dict or {})
3666 return
3667
add96eb9 3668 post_extractor = info_dict.pop('__post_extractor', None) or dict
09b49e1f 3669 info_dict.update(post_extractor())
ed5835b4 3670
3671 actual_post_extract(info_dict or {})
3672
dcf64d43 3673 def run_pp(self, pp, infodict):
5bfa4862 3674 files_to_delete = []
dcf64d43 3675 if '__files_to_move' not in infodict:
3676 infodict['__files_to_move'] = {}
b1940459 3677 try:
3678 files_to_delete, infodict = pp.run(infodict)
3679 except PostProcessingError as e:
3680 # Must be True and not 'only_download'
3681 if self.params.get('ignoreerrors') is True:
3682 self.report_error(e)
3683 return infodict
3684 raise
3685
5bfa4862 3686 if not files_to_delete:
dcf64d43 3687 return infodict
5bfa4862 3688 if self.params.get('keepvideo', False):
3689 for f in files_to_delete:
dcf64d43 3690 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3691 else:
43d7f5a5 3692 self._delete_downloaded_files(
3693 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3694 return infodict
5bfa4862 3695
6f2287cb 3696 def run_all_pps(self, key, info, *, additional_pps=None):
17ba4343 3697 if key != 'video':
3698 self._forceprint(key, info)
3699 for pp in (additional_pps or []) + self._pps[key]:
3700 info = self.run_pp(pp, info)
ed5835b4 3701 return info
277d6ff5 3702
56d868db 3703 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3704 info = dict(ie_info)
56d868db 3705 info['__files_to_move'] = files_to_move or {}
415f8d51 3706 try:
3707 info = self.run_all_pps(key, info)
3708 except PostProcessingError as err:
3709 msg = f'Preprocessing: {err}'
3710 info.setdefault('__pending_error', msg)
3711 self.report_error(msg, is_error=False)
56d868db 3712 return info, info.pop('__files_to_move', None)
5bfa4862 3713
f46e2f9d 3714 def post_process(self, filename, info, files_to_move=None):
8222d8de 3715 """Run all the postprocessors on the given file."""
8222d8de 3716 info['filepath'] = filename
dcf64d43 3717 info['__files_to_move'] = files_to_move or {}
ed5835b4 3718 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3719 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3720 del info['__files_to_move']
ed5835b4 3721 return self.run_all_pps('after_move', info)
c1c9a79c 3722
5db07df6 3723 def _make_archive_id(self, info_dict):
e9fef7ee
S
3724 video_id = info_dict.get('id')
3725 if not video_id:
3726 return
5db07df6
PH
3727 # Future-proof against any change in case
3728 # and backwards compatibility with prior versions
e9fef7ee 3729 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3730 if extractor is None:
1211bb6d
S
3731 url = str_or_none(info_dict.get('url'))
3732 if not url:
3733 return
e9fef7ee 3734 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3735 for ie_key, ie in self._ies.items():
1211bb6d 3736 if ie.suitable(url):
8b7491c8 3737 extractor = ie_key
e9fef7ee
S
3738 break
3739 else:
3740 return
0647d925 3741 return make_archive_id(extractor, video_id)
5db07df6
PH
3742
3743 def in_download_archive(self, info_dict):
ae103564 3744 if not self.archive:
5db07df6
PH
3745 return False
3746
1e8fe57e 3747 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3748 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3749 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3750
3751 def record_download_archive(self, info_dict):
3752 fn = self.params.get('download_archive')
3753 if fn is None:
3754 return
5db07df6
PH
3755 vid_id = self._make_archive_id(info_dict)
3756 assert vid_id
ae103564 3757
a13e6848 3758 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3759 if is_path_like(fn):
ae103564 3760 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3761 archive_file.write(vid_id + '\n')
a45e8619 3762 self.archive.add(vid_id)
dd82ffea 3763
8c51aa65 3764 @staticmethod
8abeeb94 3765 def format_resolution(format, default='unknown'):
9359f3d4 3766 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3767 return 'audio only'
f49d89ee
PH
3768 if format.get('resolution') is not None:
3769 return format['resolution']
35615307 3770 if format.get('width') and format.get('height'):
ff51ed58 3771 return '%dx%d' % (format['width'], format['height'])
35615307 3772 elif format.get('height'):
add96eb9 3773 return '{}p'.format(format['height'])
35615307 3774 elif format.get('width'):
ff51ed58 3775 return '%dx?' % format['width']
3776 return default
8c51aa65 3777
8130779d 3778 def _list_format_headers(self, *headers):
3779 if self.params.get('listformats_table', True) is not False:
591bb9d3 3780 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3781 return headers
3782
c57f7757
PH
3783 def _format_note(self, fdict):
3784 res = ''
3785 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3786 res += '(unsupported)'
32f90364
PH
3787 if fdict.get('language'):
3788 if res:
3789 res += ' '
add96eb9 3790 res += '[{}]'.format(fdict['language'])
c57f7757 3791 if fdict.get('format_note') is not None:
f304da8a 3792 if res:
3793 res += ' '
3794 res += fdict['format_note']
c57f7757 3795 if fdict.get('tbr') is not None:
f304da8a 3796 if res:
3797 res += ', '
3798 res += '%4dk' % fdict['tbr']
c57f7757
PH
3799 if fdict.get('container') is not None:
3800 if res:
3801 res += ', '
add96eb9 3802 res += '{} container'.format(fdict['container'])
3089bc74
S
3803 if (fdict.get('vcodec') is not None
3804 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3805 if res:
3806 res += ', '
3807 res += fdict['vcodec']
91c7271a 3808 if fdict.get('vbr') is not None:
c57f7757
PH
3809 res += '@'
3810 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3811 res += 'video@'
3812 if fdict.get('vbr') is not None:
3813 res += '%4dk' % fdict['vbr']
fbb21cf5 3814 if fdict.get('fps') is not None:
5d583bdf
S
3815 if res:
3816 res += ', '
add96eb9 3817 res += '{}fps'.format(fdict['fps'])
c57f7757
PH
3818 if fdict.get('acodec') is not None:
3819 if res:
3820 res += ', '
3821 if fdict['acodec'] == 'none':
3822 res += 'video only'
3823 else:
3824 res += '%-5s' % fdict['acodec']
3825 elif fdict.get('abr') is not None:
3826 if res:
3827 res += ', '
3828 res += 'audio'
3829 if fdict.get('abr') is not None:
3830 res += '@%3dk' % fdict['abr']
3831 if fdict.get('asr') is not None:
3832 res += ' (%5dHz)' % fdict['asr']
3833 if fdict.get('filesize') is not None:
3834 if res:
3835 res += ', '
3836 res += format_bytes(fdict['filesize'])
9732d77e
PH
3837 elif fdict.get('filesize_approx') is not None:
3838 if res:
3839 res += ', '
3840 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3841 return res
91c7271a 3842
aebb4f4b 3843 def _get_formats(self, info_dict):
3844 if info_dict.get('formats') is None:
3845 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3846 return [info_dict]
3847 return []
3848 return info_dict['formats']
b69fd25c 3849
aebb4f4b 3850 def render_formats_table(self, info_dict):
3851 formats = self._get_formats(info_dict)
3852 if not formats:
3853 return
8130779d 3854 if not self.params.get('listformats_table', True) is not False:
76d321f6 3855 table = [
3856 [
3857 format_field(f, 'format_id'),
3858 format_field(f, 'ext'),
3859 self.format_resolution(f),
add96eb9 3860 self._format_note(f),
d5d1df8a 3861 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3862 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3863
d816f61f 3864 def simplified_codec(f, field):
3865 assert field in ('acodec', 'vcodec')
337734d4 3866 codec = f.get(field)
f5ea4748 3867 if not codec:
3868 return 'unknown'
3869 elif codec != 'none':
d816f61f 3870 return '.'.join(codec.split('.')[:4])
3871
3872 if field == 'vcodec' and f.get('acodec') == 'none':
3873 return 'images'
3874 elif field == 'acodec' and f.get('vcodec') == 'none':
3875 return ''
3876 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3877 self.Styles.SUPPRESS)
3878
591bb9d3 3879 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3880 table = [
3881 [
591bb9d3 3882 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3883 format_field(f, 'ext'),
3884 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3885 format_field(f, 'fps', '\t%d', func=round),
8130779d 3886 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3887 format_field(f, 'audio_channels', '\t%s'),
0dff8e4d 3888 delim, (
3889 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3890 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
86e3b822 3891 or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None,
3892 self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)),
563e0bf8 3893 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3894 shorten_protocol_name(f.get('protocol', '')),
3895 delim,
d816f61f 3896 simplified_codec(f, 'vcodec'),
563e0bf8 3897 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3898 simplified_codec(f, 'acodec'),
563e0bf8 3899 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3900 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
bc344cd4 3901 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3902 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3903 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3904 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3905 format_field(f, 'format_note'),
3906 format_field(f, 'container', ignore=(None, f.get('ext'))),
3907 delim=', '), delim=' '),
8130779d 3908 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3909 header_line = self._list_format_headers(
b8ed0f15 3910 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3911 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3912
3913 return render_table(
3914 header_line, table, hide_empty=True,
591bb9d3 3915 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3916
3917 def render_thumbnails_table(self, info_dict):
88f23a18 3918 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3919 if not thumbnails:
8130779d 3920 return None
3921 return render_table(
ec11a9f4 3922 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3923 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3924
8130779d 3925 def render_subtitles_table(self, video_id, subtitles):
2412044c 3926 def _row(lang, formats):
49c258e1 3927 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3928 if len(set(names)) == 1:
7aee40c1 3929 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3930 return [lang, ', '.join(names), ', '.join(exts)]
3931
8130779d 3932 if not subtitles:
3933 return None
3934 return render_table(
ec11a9f4 3935 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3936 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3937 hide_empty=True)
3938
3939 def __list_table(self, video_id, name, func, *args):
3940 table = func(*args)
3941 if not table:
3942 self.to_screen(f'{video_id} has no {name}')
3943 return
3944 self.to_screen(f'[info] Available {name} for {video_id}:')
3945 self.to_stdout(table)
3946
3947 def list_formats(self, info_dict):
3948 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3949
3950 def list_thumbnails(self, info_dict):
3951 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3952
3953 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3954 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3955
dca08720
PH
3956 def print_debug_header(self):
3957 if not self.params.get('verbose'):
3958 return
49a57e70 3959
a057779d 3960 from . import _IN_CLI # Must be delayed import
3961
560738f3 3962 # These imports can be slow. So import them only as needed
3963 from .extractor.extractors import _LAZY_LOADER
e756f45b
M
3964 from .extractor.extractors import (
3965 _PLUGIN_CLASSES as plugin_ies,
add96eb9 3966 _PLUGIN_OVERRIDES as plugin_ie_overrides,
e756f45b 3967 )
560738f3 3968
49a57e70 3969 def get_encoding(stream):
add96eb9 3970 ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})'))
8417f26b
SS
3971 additional_info = []
3972 if os.environ.get('TERM', '').lower() == 'dumb':
3973 additional_info.append('dumb')
49a57e70 3974 if not supports_terminal_sequences(stream):
53973b4d 3975 from .utils import WINDOWS_VT_MODE # Must be imported locally
8417f26b
SS
3976 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3977 if additional_info:
3978 ret = f'{ret} ({",".join(additional_info)})'
49a57e70 3979 return ret
3980
add96eb9 3981 encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format(
49a57e70 3982 locale.getpreferredencoding(),
3983 sys.getfilesystemencoding(),
591bb9d3 3984 self.get_encoding(),
3985 ', '.join(
64fa820c 3986 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
add96eb9 3987 if stream is not None and key != 'console'),
591bb9d3 3988 )
883d4b1e 3989
3990 logger = self.params.get('logger')
3991 if logger:
3992 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3993 write_debug(encoding_str)
3994 else:
96565c7e 3995 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3996 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3997
4c88ff87 3998 source = detect_variant()
70b23409 3999 if VARIANT not in (None, 'pip'):
4000 source += '*'
a5387729 4001 klass = type(self)
36eaf303 4002 write_debug(join_nonempty(
20314dd4 4003 f'{REPOSITORY.rpartition("/")[2]} version',
0b6ad22e 4004 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
29cb20bd 4005 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
36eaf303 4006 '' if source == 'unknown' else f'({source})',
a5387729 4007 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
36eaf303 4008 delim=' '))
497074f0 4009
4010 if not _IN_CLI:
4011 write_debug(f'params: {self.params}')
4012
6e21fdd2 4013 if not _LAZY_LOADER:
4014 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 4015 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 4016 else:
49a57e70 4017 write_debug('Lazy loading extractors is disabled')
8a82af35 4018 if self.params['compat_opts']:
add96eb9 4019 write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts'])))
36eaf303 4020
b5e7a2e6 4021 if current_git_head():
4022 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 4023 write_debug(system_identifier())
d28b5171 4024
8913ef74 4025 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
4026 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
4027 if ffmpeg_features:
add96eb9 4028 exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features)))
8913ef74 4029
4c83c967 4030 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 4031 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 4032 exe_str = ', '.join(
2831b468 4033 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
4034 ) or 'none'
add96eb9 4035 write_debug(f'exe versions: {exe_str}')
dca08720 4036
1d485a1a 4037 from .compat.compat_utils import get_package_info
9b8ee23b 4038 from .dependencies import available_dependencies
4039
4040 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 4041 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 4042 })) or 'none'))
2831b468 4043
227bf1a3 4044 write_debug(f'Proxy map: {self.proxies}')
8a8b5452 4045 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
e756f45b 4046 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
add96eb9 4047 display_list = ['{}{}'.format(
8e40b9d1 4048 klass.__name__, '' if klass.__name__ == name else f' as {name}')
e756f45b
M
4049 for name, klass in plugins.items()]
4050 if plugin_type == 'Extractor':
4051 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
4052 for parent, plugins in plugin_ie_overrides.items())
4053 if not display_list:
4054 continue
4055 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
4056
8e40b9d1
M
4057 plugin_dirs = plugin_directories()
4058 if plugin_dirs:
4059 write_debug(f'Plugin directories: {plugin_dirs}')
4060
49a57e70 4061 # Not implemented
4062 if False and self.params.get('call_home'):
0f06bcd7 4063 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
add96eb9 4064 write_debug(f'Public IP address: {ipaddr}')
58b1f00d 4065 latest_version = self.urlopen(
0f06bcd7 4066 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
4067 if version_tuple(latest_version) > version_tuple(__version__):
4068 self.report_warning(
add96eb9 4069 f'You are using an outdated version (newest version: {latest_version})! '
4070 'See https://yt-dl.org/update if you need help updating.')
58b1f00d 4071
227bf1a3 4072 @functools.cached_property
4073 def proxies(self):
4074 """Global proxy configuration"""
dca08720 4075 opts_proxy = self.params.get('proxy')
dca08720
PH
4076 if opts_proxy is not None:
4077 if opts_proxy == '':
227bf1a3 4078 opts_proxy = '__noproxy__'
4079 proxies = {'all': opts_proxy}
dca08720 4080 else:
ac668111 4081 proxies = urllib.request.getproxies()
227bf1a3 4082 # compat. Set HTTPS_PROXY to __noproxy__ to revert
dca08720
PH
4083 if 'http' in proxies and 'https' not in proxies:
4084 proxies['https'] = proxies['http']
227bf1a3 4085
4086 return proxies
62fec3b2 4087
c365dba8 4088 @functools.cached_property
4089 def cookiejar(self):
4090 """Global cookiejar instance"""
4091 return load_cookies(
4092 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4093
227bf1a3 4094 @property
4095 def _opener(self):
4096 """
4097 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4098 """
62b5c94c 4099 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
227bf1a3 4100 handler = self._request_director.handlers['Urllib']
4101 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4102
0b81d4d2 4103 def _get_available_impersonate_targets(self):
add96eb9 4104 # TODO(future): make available as public API
0b81d4d2 4105 return [
4106 (target, rh.RH_NAME)
4107 for rh in self._request_director.handlers.values()
4108 if isinstance(rh, ImpersonateRequestHandler)
4109 for target in rh.supported_targets
4110 ]
4111
4112 def _impersonate_target_available(self, target):
add96eb9 4113 # TODO(future): make available as public API
0b81d4d2 4114 return any(
4115 rh.is_supported_target(target)
4116 for rh in self._request_director.handlers.values()
4117 if isinstance(rh, ImpersonateRequestHandler))
4118
c365dba8 4119 def urlopen(self, req):
4120 """ Start an HTTP download """
4121 if isinstance(req, str):
227bf1a3 4122 req = Request(req)
4123 elif isinstance(req, urllib.request.Request):
3d2623a8 4124 self.deprecation_warning(
4125 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4126 'Use yt_dlp.networking.common.Request instead.')
227bf1a3 4127 req = urllib_req_to_req(req)
4128 assert isinstance(req, Request)
4129
4130 # compat: Assume user:pass url params are basic auth
4131 url, basic_auth_header = extract_basic_auth(req.url)
4132 if basic_auth_header:
4133 req.headers['Authorization'] = basic_auth_header
4134 req.url = sanitize_url(url)
4135
4136 clean_proxies(proxies=req.proxies, headers=req.headers)
4137 clean_headers(req.headers)
4138
4139 try:
4140 return self._request_director.send(req)
4141 except NoSupportingHandlers as e:
4142 for ue in e.unsupported_errors:
ccfd70f4 4143 # FIXME: This depends on the order of errors.
227bf1a3 4144 if not (ue.handler and ue.msg):
4145 continue
4146 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4147 raise RequestError(
4148 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4149 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
0b81d4d2 4150 if (
4151 'unsupported proxy type: "https"' in ue.msg.lower()
4152 and 'requests' not in self._request_director.handlers
4153 and 'curl_cffi' not in self._request_director.handlers
4154 ):
8a8b5452 4155 raise RequestError(
0b81d4d2 4156 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi')
ccfd70f4 4157
4158 elif (
4159 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4160 and 'websockets' not in self._request_director.handlers
4161 ):
4162 raise RequestError(
4163 'This request requires WebSocket support. '
4164 'Ensure one of the following dependencies are installed: websockets',
4165 cause=ue) from ue
0b81d4d2 4166
4167 elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()):
4168 raise RequestError(
4169 f'Impersonate target "{req.extensions["impersonate"]}" is not available.'
4170 f' See --list-impersonate-targets for available targets.'
4171 f' This request requires browser impersonation, however you may be missing dependencies'
4172 f' required to support this target.')
227bf1a3 4173 raise
4174 except SSLError as e:
4175 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4176 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4177 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4178 raise RequestError(
4179 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4180 'Try using --legacy-server-connect', cause=e) from e
4181 raise
227bf1a3 4182
db7b054a 4183 def build_request_director(self, handlers, preferences=None):
227bf1a3 4184 logger = _YDLLogger(self)
6148833f 4185 headers = self.params['http_headers'].copy()
227bf1a3 4186 proxies = self.proxies.copy()
4187 clean_headers(headers)
4188 clean_proxies(proxies, headers)
4189
4190 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4191 for handler in handlers:
4192 director.add_handler(handler(
4193 logger=logger,
4194 headers=headers,
4195 cookiejar=self.cookiejar,
4196 proxies=proxies,
4197 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4198 verify=not self.params.get('nocheckcertificate'),
4199 **traverse_obj(self.params, {
4200 'verbose': 'debug_printtraffic',
4201 'source_address': 'source_address',
4202 'timeout': 'socket_timeout',
75dc8e67 4203 'legacy_ssl_support': 'legacyserverconnect',
227bf1a3 4204 'enable_file_urls': 'enable_file_urls',
0b81d4d2 4205 'impersonate': 'impersonate',
227bf1a3 4206 'client_cert': {
4207 'client_certificate': 'client_certificate',
4208 'client_certificate_key': 'client_certificate_key',
4209 'client_certificate_password': 'client_certificate_password',
4210 },
4211 }),
4212 ))
db7b054a 4213 director.preferences.update(preferences or [])
8a8b5452 4214 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4215 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
227bf1a3 4216 return director
c365dba8 4217
069b2aed 4218 @functools.cached_property
4219 def _request_director(self):
4220 return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
4221
62fec3b2
PH
4222 def encode(self, s):
4223 if isinstance(s, bytes):
4224 return s # Already encoded
4225
4226 try:
4227 return s.encode(self.get_encoding())
4228 except UnicodeEncodeError as err:
4229 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4230 raise
4231
4232 def get_encoding(self):
4233 encoding = self.params.get('encoding')
4234 if encoding is None:
4235 encoding = preferredencoding()
4236 return encoding
ec82d85a 4237
e08a85d8 4238 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
add96eb9 4239 """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """
e08a85d8 4240 if overwrite is None:
4241 overwrite = self.params.get('overwrites', True)
80c03fa9 4242 if not self.params.get('writeinfojson'):
4243 return False
4244 elif not infofn:
4245 self.write_debug(f'Skipping writing {label} infojson')
4246 return False
4247 elif not self._ensure_dir_exists(infofn):
4248 return None
e08a85d8 4249 elif not overwrite and os.path.exists(infofn):
80c03fa9 4250 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 4251 return 'exists'
4252
4253 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4254 try:
4255 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4256 return True
86e5f3ed 4257 except OSError:
cb96c5be 4258 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4259 return None
80c03fa9 4260
4261 def _write_description(self, label, ie_result, descfn):
add96eb9 4262 """ Write description and returns True = written, False = skip, None = error """
80c03fa9 4263 if not self.params.get('writedescription'):
4264 return False
4265 elif not descfn:
4266 self.write_debug(f'Skipping writing {label} description')
4267 return False
4268 elif not self._ensure_dir_exists(descfn):
4269 return None
4270 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4271 self.to_screen(f'[info] {label.title()} description is already present')
4272 elif ie_result.get('description') is None:
88fb9425 4273 self.to_screen(f'[info] There\'s no {label} description to write')
80c03fa9 4274 return False
4275 else:
4276 try:
4277 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 4278 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 4279 descfile.write(ie_result['description'])
86e5f3ed 4280 except OSError:
80c03fa9 4281 self.report_error(f'Cannot write {label} description file {descfn}')
4282 return None
4283 return True
4284
4285 def _write_subtitles(self, info_dict, filename):
add96eb9 4286 """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error"""
80c03fa9 4287 ret = []
4288 subtitles = info_dict.get('requested_subtitles')
88fb9425 4289 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
80c03fa9 4290 # subtitles download errors are already managed as troubles in relevant IE
4291 # that way it will silently go on when used with unsupporting IE
4292 return ret
88fb9425 4293 elif not subtitles:
c8bc203f 4294 self.to_screen('[info] There are no subtitles for the requested languages')
88fb9425 4295 return ret
80c03fa9 4296 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4297 if not sub_filename_base:
4298 self.to_screen('[info] Skipping writing video subtitles')
4299 return ret
88fb9425 4300
80c03fa9 4301 for sub_lang, sub_info in subtitles.items():
4302 sub_format = sub_info['ext']
4303 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4304 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 4305 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4306 if existing_sub:
80c03fa9 4307 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 4308 sub_info['filepath'] = existing_sub
4309 ret.append((existing_sub, sub_filename_final))
80c03fa9 4310 continue
4311
4312 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4313 if sub_info.get('data') is not None:
4314 try:
4315 # Use newline='' to prevent conversion of newline characters
4316 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 4317 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 4318 subfile.write(sub_info['data'])
4319 sub_info['filepath'] = sub_filename
4320 ret.append((sub_filename, sub_filename_final))
4321 continue
86e5f3ed 4322 except OSError:
80c03fa9 4323 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4324 return None
4325
4326 try:
4327 sub_copy = sub_info.copy()
4328 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4329 self.dl(sub_filename, sub_copy, subtitle=True)
4330 sub_info['filepath'] = sub_filename
4331 ret.append((sub_filename, sub_filename_final))
add96eb9 4332 except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err:
c70c418d 4333 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 4334 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 4335 if not self.params.get('ignoreerrors'):
4336 self.report_error(msg)
4337 raise DownloadError(msg)
4338 self.report_warning(msg)
519804a9 4339 return ret
80c03fa9 4340
4341 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
add96eb9 4342 """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """
6c4fd172 4343 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4344 thumbnails, ret = [], []
6c4fd172 4345 if write_all or self.params.get('writethumbnail', False):
0202b52a 4346 thumbnails = info_dict.get('thumbnails') or []
88fb9425 4347 if not thumbnails:
c8bc203f 4348 self.to_screen(f'[info] There are no {label} thumbnails to download')
88fb9425 4349 return ret
6c4fd172 4350 multiple = write_all and len(thumbnails) > 1
ec82d85a 4351
80c03fa9 4352 if thumb_filename_base is None:
4353 thumb_filename_base = filename
4354 if thumbnails and not thumb_filename_base:
4355 self.write_debug(f'Skipping writing {label} thumbnail')
4356 return ret
4357
a40e0b37 4358 if thumbnails and not self._ensure_dir_exists(filename):
2acd1d55
R
4359 return None
4360
dd0228ce 4361 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4362 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4363 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4364 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4365 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4366
e04938ab 4367 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4368 if existing_thumb:
add96eb9 4369 self.to_screen('[info] {} is already present'.format((
4370 thumb_display_id if multiple else f'{label} thumbnail').capitalize()))
e04938ab 4371 t['filepath'] = existing_thumb
4372 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4373 else:
80c03fa9 4374 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4375 try:
227bf1a3 4376 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4377 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4378 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4379 shutil.copyfileobj(uf, thumbf)
80c03fa9 4380 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4381 t['filepath'] = thumb_filename
3158150c 4382 except network_exceptions as err:
3d2623a8 4383 if isinstance(err, HTTPError) and err.status == 404:
ad54c913 4384 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4385 else:
4386 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
dd0228ce 4387 thumbnails.pop(idx)
6c4fd172 4388 if ret and not write_all:
4389 break
0202b52a 4390 return ret