]> jfr.im git - yt-dlp.git/blame - yt_dlp/YoutubeDL.py
[outtmpl] Fix some minor bugs
[yt-dlp.git] / yt_dlp / YoutubeDL.py
CommitLineData
26e63931 1import collections
31bd3925 2import contextlib
9d2ecdbc 3import datetime
c1c9a79c 4import errno
31bd3925 5import fileinput
b5ae35ee 6import functools
8222d8de 7import io
b82f815f 8import itertools
8694c600 9import json
62fec3b2 10import locale
083c9df9 11import operator
8222d8de 12import os
f8271158 13import random
8222d8de
JMF
14import re
15import shutil
6f2287cb 16import string
dca08720 17import subprocess
8222d8de 18import sys
21cd8fae 19import tempfile
8222d8de 20import time
67134eab 21import tokenize
8222d8de 22import traceback
524e2e4f 23import unicodedata
961ea474 24
f8271158 25from .cache import Cache
3f66b6fe 26from .compat import urllib # isort: split
14f25df2 27from .compat import compat_os_name, compat_shlex_quote
982ee69a 28from .cookies import load_cookies
f8271158 29from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30from .downloader.rtmp import rtmpdump_version
f8271158 31from .extractor import gen_extractor_classes, get_info_extractor
fe7866d0 32from .extractor.common import UnsupportedURLIE
f8271158 33from .extractor.openload import PhantomJSwrapper
34from .minicurses import format_text
8e40b9d1 35from .plugins import directories as plugin_directories
e756f45b 36from .postprocessor import _PLUGIN_CLASSES as plugin_pps
f8271158 37from .postprocessor import (
38 EmbedThumbnailPP,
39 FFmpegFixupDuplicateMoovPP,
40 FFmpegFixupDurationPP,
41 FFmpegFixupM3u8PP,
42 FFmpegFixupM4aPP,
43 FFmpegFixupStretchedPP,
44 FFmpegFixupTimestampPP,
45 FFmpegMergerPP,
46 FFmpegPostProcessor,
ca9def71 47 FFmpegVideoConvertorPP,
f8271158 48 MoveFilesAfterDownloadPP,
49 get_postprocessor,
50)
ca9def71 51from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
b5e7a2e6 52from .update import REPOSITORY, current_git_head, detect_variant
8c25f81b 53from .utils import (
f8271158 54 DEFAULT_OUTTMPL,
7b2c3f47 55 IDENTITY,
f8271158 56 LINK_TEMPLATES,
8dc59305 57 MEDIA_EXTENSIONS,
f8271158 58 NO_DEFAULT,
1d485a1a 59 NUMBER_RE,
f8271158 60 OUTTMPL_TYPES,
61 POSTPROCESS_WHEN,
62 STR_FORMAT_RE_TMPL,
63 STR_FORMAT_TYPES,
64 ContentTooShortError,
65 DateRange,
66 DownloadCancelled,
67 DownloadError,
68 EntryNotInPlaylist,
69 ExistingVideoReached,
70 ExtractorError,
784320c9 71 FormatSorter,
f8271158 72 GeoRestrictedError,
73 HEADRequest,
f8271158 74 ISO3166Utils,
75 LazyList,
76 MaxDownloadsReached,
19a03940 77 Namespace,
f8271158 78 PagedList,
79 PerRequestProxyHandler,
7e88d7d7 80 PlaylistEntries,
f8271158 81 Popen,
82 PostProcessingError,
83 ReExtractInfo,
84 RejectedVideoReached,
85 SameFileError,
86 UnavailableVideoError,
693f0600 87 UserNotLive,
f8271158 88 YoutubeDLCookieProcessor,
89 YoutubeDLHandler,
90 YoutubeDLRedirectHandler,
eedb7ba5
S
91 age_restricted,
92 args_to_str,
cb794ee0 93 bug_reports_message,
ce02ed60 94 date_from_str,
da4db748 95 deprecation_warning,
ce02ed60 96 determine_ext,
b5559424 97 determine_protocol,
c0384f22 98 encode_compat_str,
ce02ed60 99 encodeFilename,
a06916d9 100 error_to_compat_str,
47cdc68e 101 escapeHTML,
590bc6f6 102 expand_path,
90137ca4 103 filter_dict,
e29663c6 104 float_or_none,
02dbf93f 105 format_bytes,
e0fd9573 106 format_decimal_suffix,
f8271158 107 format_field,
525ef922 108 formatSeconds,
fc61aff4 109 get_compatible_ext,
0bb322b9 110 get_domain,
c9969434 111 int_or_none,
732044af 112 iri_to_uri,
941e881e 113 is_path_like,
34921b43 114 join_nonempty,
ce02ed60 115 locked_file,
0647d925 116 make_archive_id,
0202b52a 117 make_dir,
dca08720 118 make_HTTPS_handler,
8b7539d2 119 merge_headers,
3158150c 120 network_exceptions,
ec11a9f4 121 number_of_digits,
cd6fc19e 122 orderedSet,
5314b521 123 orderedSet_from_options,
083c9df9 124 parse_filesize,
ce02ed60 125 preferredencoding,
eedb7ba5 126 prepend_extension,
3efb96a6 127 remove_terminal_sequences,
cfb56d1a 128 render_table,
eedb7ba5 129 replace_extension,
ce02ed60 130 sanitize_filename,
1bb5c511 131 sanitize_path,
dcf77cf1 132 sanitize_url,
67dda517 133 sanitized_Request,
e5660ee6 134 std_headers,
1211bb6d 135 str_or_none,
e29663c6 136 strftime_or_none,
ce02ed60 137 subtitles_filename,
819e0531 138 supports_terminal_sequences,
b1f94422 139 system_identifier,
f2ebc5c7 140 timetuple_from_msec,
732044af 141 to_high_limit_path,
324ad820 142 traverse_obj,
fc61aff4 143 try_call,
6033d980 144 try_get,
29eb5174 145 url_basename,
7d1eb38a 146 variadic,
58b1f00d 147 version_tuple,
53973b4d 148 windows_enable_vt_mode,
ce02ed60
PH
149 write_json_file,
150 write_string,
4f026faf 151)
29cb20bd 152from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
8222d8de 153
e9c0cdd3
YCH
154if compat_os_name == 'nt':
155 import ctypes
156
2459b6e1 157
86e5f3ed 158class YoutubeDL:
8222d8de
JMF
159 """YoutubeDL class.
160
161 YoutubeDL objects are the ones responsible of downloading the
162 actual video file and writing it to disk if the user has requested
163 it, among some other tasks. In most cases there should be one per
164 program. As, given a video URL, the downloader doesn't know how to
165 extract all the needed information, task that InfoExtractors do, it
166 has to pass the URL to one of them.
167
168 For this, YoutubeDL objects have a method that allows
169 InfoExtractors to be registered in a given order. When it is passed
170 a URL, the YoutubeDL object handles it to the first InfoExtractor it
171 finds that reports being able to handle it. The InfoExtractor extracts
172 all the information about the video or videos the URL refers to, and
173 YoutubeDL process the extracted information, possibly using a File
174 Downloader to download the video.
175
176 YoutubeDL objects accept a lot of parameters. In order not to saturate
177 the object constructor with arguments, it receives a dictionary of
178 options instead. These options are available through the params
179 attribute for the InfoExtractors to use. The YoutubeDL also
180 registers itself as the downloader in charge for the InfoExtractors
181 that are added to it, so this is a "mutual registration".
182
183 Available options:
184
185 username: Username for authentication purposes.
186 password: Password for authentication purposes.
180940e0 187 videopassword: Password for accessing a video.
1da50aa3
S
188 ap_mso: Adobe Pass multiple-system operator identifier.
189 ap_username: Multiple-system operator account username.
190 ap_password: Multiple-system operator account password.
8222d8de 191 usenetrc: Use netrc for authentication instead.
c8bc203f 192 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
8222d8de
JMF
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
ad8915b7 195 no_warnings: Do not print out anything for warnings.
bb66c247 196 forceprint: A dict with keys WHEN mapped to a list of templates to
197 print to stdout. The allowed keys are video or any of the
198 items in utils.POSTPROCESS_WHEN.
ca30f449 199 For compatibility, a single list is also accepted
bb66c247 200 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
201 a list of tuples with (template, filename)
8694c600 202 forcejson: Force printing info_dict as JSON.
63e0be34
PH
203 dump_single_json: Force printing the info_dict of the whole playlist
204 (or video) as a single JSON line.
c25228e5 205 force_write_download_archive: Force writing download archive regardless
206 of 'skip_download' or 'simulate'.
b7b04c78 207 simulate: Do not download the video files. If unset (or None),
208 simulate only if listsubtitles, listformats or list_thumbnails is used
eb8a4433 209 format: Video format code. see "FORMAT SELECTION" for more details.
093a1710 210 You can also pass a function. The function takes 'ctx' as
211 argument and returns the formats to download.
212 See "build_format_selector" for an implementation
63ad4d43 213 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
b7da73eb 214 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
215 extracting metadata even if the video is not actually
216 available for download (experimental)
0930b11f 217 format_sort: A list of fields by which to sort the video formats.
218 See "Sorting Formats" for more details.
c25228e5 219 format_sort_force: Force the given format_sort. see "Sorting Formats"
220 for more details.
08d30158 221 prefer_free_formats: Whether to prefer video formats with free containers
222 over non-free ones of same quality.
c25228e5 223 allow_multiple_video_streams: Allow multiple video streams to be merged
224 into a single file
225 allow_multiple_audio_streams: Allow multiple audio streams to be merged
226 into a single file
0ba692ac 227 check_formats Whether to test if the formats are downloadable.
9f1a1c36 228 Can be True (check all), False (check none),
229 'selected' (check selected formats),
0ba692ac 230 or None (check only if requested by extractor)
4524baf0 231 paths: Dictionary of output paths. The allowed keys are 'home'
232 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
de6000d9 233 outtmpl: Dictionary of templates for output names. Allowed keys
4524baf0 234 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
34488702 235 For compatibility with youtube-dl, a single string can also be used
a820dc72
RA
236 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
237 restrictfilenames: Do not allow "&" and spaces in file names
238 trim_file_name: Limit length of filename (extension excluded)
4524baf0 239 windowsfilenames: Force the filenames to be windows compatible
b1940459 240 ignoreerrors: Do not stop on download/postprocessing errors.
241 Can be 'only_download' to ignore only download errors.
242 Default is 'only_download' for CLI, but False for API
26e2805c 243 skip_playlist_after_errors: Number of allowed failures until the rest of
244 the playlist is skipped
fe7866d0 245 allowed_extractors: List of regexes to match against extractor names that are allowed
0c3d0f51 246 overwrites: Overwrite all video and metadata files if True,
247 overwrite only non-video files if None
248 and don't overwrite any file if False
34488702 249 For compatibility with youtube-dl,
250 "nooverwrites" may also be used instead
c14e88f0 251 playlist_items: Specific indices of playlist to download.
75822ca7 252 playlistrandom: Download playlist items in random order.
7e9a6125 253 lazy_playlist: Process playlist entries as they are received.
8222d8de
JMF
254 matchtitle: Download only matching titles.
255 rejecttitle: Reject downloads for matching titles.
8bf9319e 256 logger: Log messages to a logging.Logger instance.
17ffed18 257 logtostderr: Print everything to stderr instead of stdout.
258 consoletitle: Display progress in console window's titlebar.
8222d8de
JMF
259 writedescription: Write the video description to a .description file
260 writeinfojson: Write the video description to a .info.json file
75d43ca0 261 clean_infojson: Remove private fields from the infojson
34488702 262 getcomments: Extract video comments. This will not be written to disk
06167fbb 263 unless writeinfojson is also given
1fb07d10 264 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 265 writethumbnail: Write the thumbnail image to a file
c25228e5 266 allow_playlist_files: Whether to write playlists' description, infojson etc
267 also to disk when using the 'write*' options
ec82d85a 268 write_all_thumbnails: Write all thumbnail formats to files
732044af 269 writelink: Write an internet shortcut file, depending on the
270 current platform (.url/.webloc/.desktop)
271 writeurllink: Write a Windows internet shortcut file (.url)
272 writewebloclink: Write a macOS internet shortcut file (.webloc)
273 writedesktoplink: Write a Linux internet shortcut file (.desktop)
8222d8de 274 writesubtitles: Write the video subtitles to a file
741dd8ea 275 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 276 listsubtitles: Lists all available subtitles for the video
a504ced0 277 subtitlesformat: The format code for subtitles
c32b0aab 278 subtitleslangs: List of languages of the subtitles to download (can be regex).
279 The list may contain "all" to refer to all the available
280 subtitles. The language can be prefixed with a "-" to
62b58c09 281 exclude it from the requested languages, e.g. ['all', '-live_chat']
8222d8de 282 keepvideo: Keep the video file after post-processing
46f1370e 283 daterange: A utils.DateRange object, download only if the upload_date is in the range.
8222d8de 284 skip_download: Skip the actual download of the video file
c35f9e72 285 cachedir: Location of the cache files in the filesystem.
a0e07d31 286 False to disable filesystem cache.
47192f92 287 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
288 age_limit: An integer representing the user's age in years.
289 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
290 min_views: An integer representing the minimum view count the video
291 must have in order to not be skipped.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 max_views: An integer representing the maximum view count.
295 Videos that are more popular than that are not
296 downloaded.
297 Videos without view count information are always
298 downloaded. None for no limit.
ae103564 299 download_archive: A set, or the name of a file where all downloads are recorded.
300 Videos already present in the file are not downloaded again.
8a51f564 301 break_on_existing: Stop the download process after attempting to download a
302 file that is in the archive.
b222c271 303 break_per_url: Whether break_on_reject and break_on_existing
304 should act on each input URL as opposed to for the entire queue
d76fa1f3 305 cookiefile: File name or text stream from where cookies should be read and dumped to
f59f5ef8 306 cookiesfrombrowser: A tuple containing the name of the browser, the profile
9bd13fe5 307 name/path from where cookies are loaded, the name of the keyring,
308 and the container name, e.g. ('chrome', ) or
309 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
f81c62a6 310 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
311 support RFC 5746 secure renegotiation
f59f5ef8 312 nocheckcertificate: Do not verify SSL certificates
bb58c9ed 313 client_certificate: Path to client certificate file in PEM format. May include the private key
314 client_certificate_key: Path to private key file for client certificate
315 client_certificate_password: Password for client certificate private key, if encrypted.
316 If not provided and the key is encrypted, yt-dlp will ask interactively
7e8c0af0 317 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
c6e07cf1 318 (Only supported by some extractors)
8300774c 319 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
8b7539d2 320 http_headers: A dictionary of custom headers to be used for all requests
a1ee09e8 321 proxy: URL of the proxy server to use
38cce791 322 geo_verification_proxy: URL of the proxy to use for IP address verification
504f20dd 323 on geo-restricted sites.
e344693b 324 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
325 bidi_workaround: Work around buggy terminals without bidirectional text
326 support, using fridibi
a0ddb8a2 327 debug_printtraffic:Print out sent and received HTTP traffic
04b4d394
PH
328 default_search: Prepend this string if an input url is not valid.
329 'auto' for elaborate guessing
62fec3b2 330 encoding: Use this encoding instead of the system-specified.
134c913c 331 extract_flat: Whether to resolve and process url_results further
46f1370e 332 * False: Always process. Default for API
134c913c 333 * True: Never process
334 * 'in_playlist': Do not process inside playlist/multi_video
335 * 'discard': Always process, but don't return the result
336 from inside playlist/multi_video
337 * 'discard_in_playlist': Same as "discard", but only for
46f1370e 338 playlists (not multi_video). Default for CLI
f2ebc5c7 339 wait_for_video: If given, wait for scheduled streams to become available.
340 The value should be a tuple containing the range
341 (min_secs, max_secs) to wait between retries
4f026faf 342 postprocessors: A list of dictionaries, each with an entry
71b640cc 343 * key: The name of the postprocessor. See
7a5c1cfe 344 yt_dlp/postprocessor/__init__.py for a list.
bb66c247 345 * when: When to run the postprocessor. Allowed values are
346 the entries of utils.POSTPROCESS_WHEN
56d868db 347 Assumed to be 'post_process' if not given
71b640cc
PH
348 progress_hooks: A list of functions that get called on download
349 progress, with a dictionary with the entries
5cda4eda 350 * status: One of "downloading", "error", or "finished".
ee69b99a 351 Check this first and ignore unknown values.
3ba7740d 352 * info_dict: The extracted info_dict
71b640cc 353
5cda4eda 354 If status is one of "downloading", or "finished", the
ee69b99a
PH
355 following properties may also be present:
356 * filename: The final filename (always present)
5cda4eda 357 * tmpfilename: The filename we're currently writing to
71b640cc
PH
358 * downloaded_bytes: Bytes on disk
359 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
360 * total_bytes_estimate: Guess of the eventual file size,
361 None if unavailable.
362 * elapsed: The number of seconds since download started.
71b640cc
PH
363 * eta: The estimated time in seconds, None if unknown
364 * speed: The download speed in bytes/second, None if
365 unknown
5cda4eda
PH
366 * fragment_index: The counter of the currently
367 downloaded video fragment.
368 * fragment_count: The number of fragments (= individual
369 files that will be merged)
71b640cc
PH
370
371 Progress hooks are guaranteed to be called at least once
372 (with status "finished") if the download is successful.
819e0531 373 postprocessor_hooks: A list of functions that get called on postprocessing
374 progress, with a dictionary with the entries
375 * status: One of "started", "processing", or "finished".
376 Check this first and ignore unknown values.
377 * postprocessor: Name of the postprocessor
378 * info_dict: The extracted info_dict
379
380 Progress hooks are guaranteed to be called at least twice
381 (with status "started" and "finished") if the processing is successful.
fc61aff4 382 merge_output_format: "/" separated list of extensions to use when merging formats.
6b591b29 383 final_ext: Expected final extension; used to detect when the file was
59a7a13e 384 already downloaded and converted
6271f1ca
PH
385 fixup: Automatically correct known faults of the file.
386 One of:
387 - "never": do nothing
388 - "warn": only emit a warning
389 - "detect_or_warn": check whether we can do anything
62cd676c 390 about it, warn otherwise (default)
504f20dd 391 source_address: Client-side IP address to bind to.
1cf376f5 392 sleep_interval_requests: Number of seconds to sleep between requests
393 during extraction
7aa589a5
S
394 sleep_interval: Number of seconds to sleep before each download when
395 used alone or a lower bound of a range for randomized
396 sleep before each download (minimum possible number
397 of seconds to sleep) when used along with
398 max_sleep_interval.
399 max_sleep_interval:Upper bound of a range for randomized sleep before each
400 download (maximum possible number of seconds to sleep).
401 Must only be used along with sleep_interval.
402 Actual sleep time will be a random float from range
403 [sleep_interval; max_sleep_interval].
1cf376f5 404 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
cfb56d1a
PH
405 listformats: Print an overview of available video formats and exit.
406 list_thumbnails: Print a table of all thumbnails and exit.
0a41f331 407 match_filter: A function that gets called for every video with the signature
408 (info_dict, *, incomplete: bool) -> Optional[str]
409 For backward compatibility with youtube-dl, the signature
410 (info_dict) -> Optional[str] is also allowed.
411 - If it returns a message, the video is ignored.
412 - If it returns None, the video is downloaded.
413 - If it returns utils.NO_DEFAULT, the user is interactively
414 asked whether to download the video.
fe2ce85a 415 - Raise utils.DownloadCancelled(msg) to abort remaining
416 downloads when a video is rejected.
347de493 417 match_filter_func in utils.py is one example for this.
8417f26b
SS
418 color: A Dictionary with output stream names as keys
419 and their respective color policy as values.
420 Can also just be a single color policy,
421 in which case it applies to all outputs.
422 Valid stream names are 'stdout' and 'stderr'.
423 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
0a840f58 424 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
504f20dd 425 HTTP header
0a840f58 426 geo_bypass_country:
773f291d
S
427 Two-letter ISO 3166-2 country code that will be used for
428 explicit geographic restriction bypassing via faking
504f20dd 429 X-Forwarded-For HTTP header
5f95927a
S
430 geo_bypass_ip_block:
431 IP range in CIDR notation that will be used similarly to
504f20dd 432 geo_bypass_country
52a8a1e1 433 external_downloader: A dictionary of protocol keys and the executable of the
434 external downloader to use for it. The allowed protocols
435 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
436 Set the value to 'native' to use the native downloader
53ed7066 437 compat_opts: Compatibility options. See "Differences in default behavior".
3acf6d38 438 The following options do not work when used through the API:
b5ae35ee 439 filename, abort-on-error, multistreams, no-live-chat, format-sort
dac5df5a 440 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
e4f02757 441 Refer __init__.py for their implementation
819e0531 442 progress_template: Dictionary of templates for progress outputs.
443 Allowed keys are 'download', 'postprocess',
444 'download-title' (console title) and 'postprocess-title'.
445 The template is mapped on a dictionary with keys 'progress' and 'info'
23326151 446 retry_sleep_functions: Dictionary of functions that takes the number of attempts
447 as argument and returns the time to sleep in seconds.
448 Allowed keys are 'http', 'fragment', 'file_access'
0f446365
SW
449 download_ranges: A callback function that gets called for every video with
450 the signature (info_dict, ydl) -> Iterable[Section].
451 Only the returned sections will be downloaded.
452 Each Section is a dict with the following keys:
5ec1b6b7 453 * start_time: Start time of the section in seconds
454 * end_time: End time of the section in seconds
455 * title: Section title (Optional)
456 * index: Section number (Optional)
0f446365 457 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
a7dc6a89 458 noprogress: Do not print the progress bar
a831c2ea 459 live_from_start: Whether to download livestreams videos from the start
fe7e0c98 460
8222d8de 461 The following parameters are not used by YoutubeDL itself, they are used by
7a5c1cfe 462 the downloader (see yt_dlp/downloader/common.py):
51d9739f 463 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
205a0654 464 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
a7dc6a89 465 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
59a7a13e 466 external_downloader_args, concurrent_fragment_downloads.
76b1bd67
JMF
467
468 The following options are used by the post processors:
c0b7d117
S
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
43820c03 471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
34488702 472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
477 can also be used
e409895f 478
479 The following options are used by the extractors:
46f1370e 480 extractor_retries: Number of times to retry for known errors (default: 3)
62bff2c1 481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
e409895f 482 hls_split_discontinuity: Split HLS playlists to different formats at
62bff2c1 483 discontinuities such as ad breaks (default: False)
5d3a0e79 484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
62b58c09 486 E.g. {'youtube': {'skip': ['dash', 'hls']}}
88f23a18 487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
1890fc63 488
489 The following options are deprecated and may be removed in the future:
490
fe2ce85a 491 break_on_reject: Stop the download process when encountering a video that
492 has been filtered out.
493 - `raise DownloadCancelled(msg)` in match_filter instead
fe7866d0 494 force_generic_extractor: Force downloader to use the generic extractor
495 - Use allowed_extractors = ['generic', 'default']
7e9a6125 496 playliststart: - Use playlist_items
497 Playlist item to start at.
498 playlistend: - Use playlist_items
499 Playlist item to end at.
500 playlistreverse: - Use playlist_items
501 Download playlist items in reverse order.
1890fc63 502 forceurl: - Use forceprint
503 Force printing final URL.
504 forcetitle: - Use forceprint
505 Force printing title.
506 forceid: - Use forceprint
507 Force printing ID.
508 forcethumbnail: - Use forceprint
509 Force printing thumbnail URL.
510 forcedescription: - Use forceprint
511 Force printing description.
512 forcefilename: - Use forceprint
513 Force printing final filename.
514 forceduration: - Use forceprint
515 Force printing duration.
516 allsubtitles: - Use subtitleslangs = ['all']
517 Downloads all the subtitles of the video
518 (requires writesubtitles or writeautomaticsub)
519 include_ads: - Doesn't work
520 Download ads as well
521 call_home: - Not implemented
522 Boolean, true iff we are allowed to contact the
523 yt-dlp servers for debugging.
524 post_hooks: - Register a custom postprocessor
525 A list of functions that get called as the final step
526 for each video file, after all postprocessors have been
527 called. The filename will be passed as the only argument.
528 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
529 Use the native HLS downloader instead of ffmpeg/avconv
530 if True, otherwise use ffmpeg/avconv if False, otherwise
531 use downloader suggested by extractor if None.
532 prefer_ffmpeg: - avconv support is deprecated
533 If False, use avconv instead of ffmpeg if both are available,
534 otherwise prefer ffmpeg.
535 youtube_include_dash_manifest: - Use extractor_args
5d3a0e79 536 If True (default), DASH manifests and related
62bff2c1 537 data will be downloaded and processed by extractor.
538 You can reduce network I/O by disabling it if you don't
539 care about DASH. (only for youtube)
1890fc63 540 youtube_include_hls_manifest: - Use extractor_args
5d3a0e79 541 If True (default), HLS manifests and related
62bff2c1 542 data will be downloaded and processed by extractor.
543 You can reduce network I/O by disabling it if you don't
544 care about HLS. (only for youtube)
8417f26b 545 no_color: Same as `color='no_color'`
8222d8de
JMF
546 """
547
86e5f3ed 548 _NUMERIC_FIELDS = {
b8ed0f15 549 'width', 'height', 'asr', 'audio_channels', 'fps',
550 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
e6f21b3d 551 'timestamp', 'release_timestamp',
c9969434
S
552 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
553 'average_rating', 'comment_count', 'age_limit',
554 'start_time', 'end_time',
555 'chapter_number', 'season_number', 'episode_number',
556 'track_number', 'disc_number', 'release_year',
86e5f3ed 557 }
c9969434 558
6db9c4d5 559 _format_fields = {
560 # NB: Keep in sync with the docstring of extractor/common.py
a44ca5a4 561 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
105bfd90 562 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
d5d1df8a 563 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
6db9c4d5 564 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
565 'preference', 'language', 'language_preference', 'quality', 'source_preference',
7e68567e 566 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
6db9c4d5 567 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
568 }
48ee10ee 569 _format_selection_exts = {
8dc59305 570 'audio': set(MEDIA_EXTENSIONS.common_audio),
571 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
572 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
48ee10ee 573 }
574
3511266b 575 def __init__(self, params=None, auto_init=True):
883d4b1e 576 """Create a FileDownloader object with the given options.
577 @param auto_init Whether to load the default extractors and print header (if verbose).
49a57e70 578 Set to 'no_verbose_header' to not print the header
883d4b1e 579 """
e9f9a10f
JMF
580 if params is None:
581 params = {}
592b7485 582 self.params = params
8b7491c8 583 self._ies = {}
56c73665 584 self._ies_instances = {}
1e43a6f7 585 self._pps = {k: [] for k in POSTPROCESS_WHEN}
b35496d8 586 self._printed_messages = set()
1cf376f5 587 self._first_webpage_request = True
ab8e5e51 588 self._post_hooks = []
933605d7 589 self._progress_hooks = []
819e0531 590 self._postprocessor_hooks = []
8222d8de
JMF
591 self._download_retcode = 0
592 self._num_downloads = 0
9c906919 593 self._num_videos = 0
592b7485 594 self._playlist_level = 0
595 self._playlist_urls = set()
a0e07d31 596 self.cache = Cache(self)
34308b30 597
591bb9d3 598 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
599 self._out_files = Namespace(
600 out=stdout,
601 error=sys.stderr,
602 screen=sys.stderr if self.params.get('quiet') else stdout,
603 console=None if compat_os_name == 'nt' else next(
cf4f42cb 604 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
591bb9d3 605 )
f0795149 606
607 try:
608 windows_enable_vt_mode()
609 except Exception as e:
610 self.write_debug(f'Failed to enable VT mode: {e}')
611
8417f26b
SS
612 if self.params.get('no_color'):
613 if self.params.get('color') is not None:
614 self.report_warning('Overwriting params from "color" with "no_color"')
615 self.params['color'] = 'no_color'
616
617 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
618
619 def process_color_policy(stream):
620 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
621 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
622 if policy in ('auto', None):
623 return term_allow_color and supports_terminal_sequences(stream)
624 assert policy in ('always', 'never', 'no_color')
625 return {'always': True, 'never': False}.get(policy, policy)
626
591bb9d3 627 self._allow_colors = Namespace(**{
8417f26b
SS
628 name: process_color_policy(stream)
629 for name, stream in self._out_files.items_ if name != 'console'
591bb9d3 630 })
819e0531 631
6929b41a 632 # The code is left like this to be reused for future deprecations
633 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
eff42759 634 current_version = sys.version_info[:2]
635 if current_version < MIN_RECOMMENDED:
9d339c41 636 msg = ('Support for Python version %d.%d has been deprecated. '
24093d52 637 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
c6e07cf1 638 '\n You will no longer receive updates on this version')
eff42759 639 if current_version < MIN_SUPPORTED:
640 msg = 'Python version %d.%d is no longer supported'
5b28cef7 641 self.deprecated_feature(
eff42759 642 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
a61f4b28 643
88acdbc2 644 if self.params.get('allow_unplayable_formats'):
645 self.report_warning(
ec11a9f4 646 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
819e0531 647 'This is a developer option intended for debugging. \n'
648 ' If you experience any issues while using this option, '
ec11a9f4 649 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
88acdbc2 650
497074f0 651 if self.params.get('bidi_workaround', False):
652 try:
653 import pty
654 master, slave = pty.openpty()
655 width = shutil.get_terminal_size().columns
656 width_args = [] if width is None else ['-w', str(width)]
657 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
658 try:
659 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
660 except OSError:
661 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
662 self._output_channel = os.fdopen(master, 'rb')
663 except OSError as ose:
664 if ose.errno == errno.ENOENT:
665 self.report_warning(
666 'Could not find fribidi executable, ignoring --bidi-workaround. '
667 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
668 else:
669 raise
670
671 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
672 if auto_init and auto_init != 'no_verbose_header':
673 self.print_debug_header()
674
be5df5ee
S
675 def check_deprecated(param, option, suggestion):
676 if self.params.get(param) is not None:
86e5f3ed 677 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
be5df5ee
S
678 return True
679 return False
680
681 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
38cce791
YCH
682 if self.params.get('geo_verification_proxy') is None:
683 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
684
0d1bb027 685 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
686 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
53ed7066 687 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
0d1bb027 688
49a57e70 689 for msg in self.params.get('_warnings', []):
0d1bb027 690 self.report_warning(msg)
ee8dd27a 691 for msg in self.params.get('_deprecation_warnings', []):
da4db748 692 self.deprecated_feature(msg)
0d1bb027 693
8a82af35 694 if 'list-formats' in self.params['compat_opts']:
ec11a9f4 695 self.params['listformats_table'] = False
696
b5ae35ee 697 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
b868936c 698 # nooverwrites was unnecessarily changed to overwrites
699 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
700 # This ensures compatibility with both keys
701 self.params['overwrites'] = not self.params['nooverwrites']
b5ae35ee 702 elif self.params.get('overwrites') is None:
703 self.params.pop('overwrites', None)
b868936c 704 else:
705 self.params['nooverwrites'] = not self.params['overwrites']
b9d973be 706
e4221b70 707 if self.params.get('simulate') is None and any((
708 self.params.get('list_thumbnails'),
709 self.params.get('listformats'),
710 self.params.get('listsubtitles'),
711 )):
712 self.params['simulate'] = 'list_only'
713
455a15e2 714 self.params.setdefault('forceprint', {})
715 self.params.setdefault('print_to_file', {})
bb66c247 716
717 # Compatibility with older syntax
ca30f449 718 if not isinstance(params['forceprint'], dict):
455a15e2 719 self.params['forceprint'] = {'video': params['forceprint']}
ca30f449 720
97ec5bc5 721 if auto_init:
97ec5bc5 722 self.add_default_info_extractors()
723
3089bc74
S
724 if (sys.platform != 'win32'
725 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
455a15e2 726 and not self.params.get('restrictfilenames', False)):
e9137224 727 # Unicode filesystem API will throw errors (#1474, #13027)
34308b30 728 self.report_warning(
6febd1c1 729 'Assuming --restrict-filenames since file system encoding '
1b725173 730 'cannot encode all characters. '
6febd1c1 731 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 732 self.params['restrictfilenames'] = True
34308b30 733
bf1824b3 734 self._parse_outtmpl()
486dd09e 735
187986a8 736 # Creating format selector here allows us to catch syntax errors before the extraction
737 self.format_selector = (
fa9f30b8 738 self.params.get('format') if self.params.get('format') in (None, '-')
093a1710 739 else self.params['format'] if callable(self.params['format'])
187986a8 740 else self.build_format_selector(self.params['format']))
741
8b7539d2 742 # Set http_headers defaults according to std_headers
743 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
744
013b50b7 745 hooks = {
746 'post_hooks': self.add_post_hook,
747 'progress_hooks': self.add_progress_hook,
748 'postprocessor_hooks': self.add_postprocessor_hook,
749 }
750 for opt, fn in hooks.items():
751 for ph in self.params.get(opt, []):
752 fn(ph)
71b640cc 753
5bfc8bee 754 for pp_def_raw in self.params.get('postprocessors', []):
755 pp_def = dict(pp_def_raw)
756 when = pp_def.pop('when', 'post_process')
757 self.add_post_processor(
f9934b96 758 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
5bfc8bee 759 when=when)
760
97ec5bc5 761 self._setup_opener()
51fb4995 762
ed39cac5 763 def preload_download_archive(fn):
764 """Preload the archive, if any is specified"""
ae103564 765 archive = set()
ed39cac5 766 if fn is None:
ae103564 767 return archive
941e881e 768 elif not is_path_like(fn):
ae103564 769 return fn
770
49a57e70 771 self.write_debug(f'Loading archive file {fn!r}')
ed39cac5 772 try:
773 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
774 for line in archive_file:
ae103564 775 archive.add(line.strip())
86e5f3ed 776 except OSError as ioe:
ed39cac5 777 if ioe.errno != errno.ENOENT:
778 raise
ae103564 779 return archive
ed39cac5 780
ae103564 781 self.archive = preload_download_archive(self.params.get('download_archive'))
ed39cac5 782
7d4111ed
PH
783 def warn_if_short_id(self, argv):
784 # short YouTube ID starting with dash?
785 idxs = [
786 i for i, a in enumerate(argv)
787 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
788 if idxs:
789 correct_argv = (
7a5c1cfe 790 ['yt-dlp']
3089bc74
S
791 + [a for i, a in enumerate(argv) if i not in idxs]
792 + ['--'] + [argv[i] for i in idxs]
7d4111ed
PH
793 )
794 self.report_warning(
795 'Long argument string detected. '
49a57e70 796 'Use -- to separate parameters and URLs, like this:\n%s' %
7d4111ed
PH
797 args_to_str(correct_argv))
798
8222d8de
JMF
799 def add_info_extractor(self, ie):
800 """Add an InfoExtractor object to the end of the list."""
8b7491c8 801 ie_key = ie.ie_key()
802 self._ies[ie_key] = ie
e52d7f85 803 if not isinstance(ie, type):
8b7491c8 804 self._ies_instances[ie_key] = ie
e52d7f85 805 ie.set_downloader(self)
8222d8de 806
56c73665
JMF
807 def get_info_extractor(self, ie_key):
808 """
809 Get an instance of an IE with name ie_key, it will try to get one from
810 the _ies list, if there's no instance it will create a new one and add
811 it to the extractor list.
812 """
813 ie = self._ies_instances.get(ie_key)
814 if ie is None:
815 ie = get_info_extractor(ie_key)()
816 self.add_info_extractor(ie)
817 return ie
818
023fa8c4
JMF
819 def add_default_info_extractors(self):
820 """
821 Add the InfoExtractors returned by gen_extractors to the end of the list
822 """
fe7866d0 823 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
824 all_ies['end'] = UnsupportedURLIE()
825 try:
826 ie_names = orderedSet_from_options(
827 self.params.get('allowed_extractors', ['default']), {
828 'all': list(all_ies),
829 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
830 }, use_regex=True)
831 except re.error as e:
832 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
833 for name in ie_names:
834 self.add_info_extractor(all_ies[name])
835 self.write_debug(f'Loaded {len(ie_names)} extractors')
023fa8c4 836
56d868db 837 def add_post_processor(self, pp, when='post_process'):
8222d8de 838 """Add a PostProcessor object to the end of the chain."""
8aa0e7cd 839 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
5bfa4862 840 self._pps[when].append(pp)
8222d8de
JMF
841 pp.set_downloader(self)
842
ab8e5e51
AM
843 def add_post_hook(self, ph):
844 """Add the post hook"""
845 self._post_hooks.append(ph)
846
933605d7 847 def add_progress_hook(self, ph):
819e0531 848 """Add the download progress hook"""
933605d7 849 self._progress_hooks.append(ph)
8ab470f1 850
819e0531 851 def add_postprocessor_hook(self, ph):
852 """Add the postprocessing progress hook"""
853 self._postprocessor_hooks.append(ph)
5bfc8bee 854 for pps in self._pps.values():
855 for pp in pps:
856 pp.add_progress_hook(ph)
819e0531 857
1c088fa8 858 def _bidi_workaround(self, message):
5d681e96 859 if not hasattr(self, '_output_channel'):
1c088fa8
PH
860 return message
861
5d681e96 862 assert hasattr(self, '_output_process')
14f25df2 863 assert isinstance(message, str)
6febd1c1 864 line_count = message.count('\n') + 1
0f06bcd7 865 self._output_process.stdin.write((message + '\n').encode())
5d681e96 866 self._output_process.stdin.flush()
0f06bcd7 867 res = ''.join(self._output_channel.readline().decode()
9e1a5b84 868 for _ in range(line_count))
6febd1c1 869 return res[:-len('\n')]
1c088fa8 870
b35496d8 871 def _write_string(self, message, out=None, only_once=False):
872 if only_once:
873 if message in self._printed_messages:
874 return
875 self._printed_messages.add(message)
876 write_string(message, out=out, encoding=self.params.get('encoding'))
734f90bb 877
cf4f42cb 878 def to_stdout(self, message, skip_eol=False, quiet=None):
0760b0a7 879 """Print message to stdout"""
cf4f42cb 880 if quiet is not None:
da4db748 881 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
882 'Use "YoutubeDL.to_screen" instead')
8a82af35 883 if skip_eol is not False:
da4db748 884 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
885 'Use "YoutubeDL.to_screen" instead')
0bf9dc1e 886 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
cf4f42cb 887
dfea94f8 888 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
cf4f42cb 889 """Print message to screen if not in quiet mode"""
8bf9319e 890 if self.params.get('logger'):
43afe285 891 self.params['logger'].debug(message)
cf4f42cb 892 return
893 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
894 return
895 self._write_string(
896 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
dfea94f8 897 self._out_files.screen, only_once=only_once)
8222d8de 898
b35496d8 899 def to_stderr(self, message, only_once=False):
0760b0a7 900 """Print message to stderr"""
14f25df2 901 assert isinstance(message, str)
8bf9319e 902 if self.params.get('logger'):
43afe285
IB
903 self.params['logger'].error(message)
904 else:
5792c950 905 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
cf4f42cb 906
907 def _send_console_code(self, code):
591bb9d3 908 if compat_os_name == 'nt' or not self._out_files.console:
cf4f42cb 909 return
591bb9d3 910 self._write_string(code, self._out_files.console)
8222d8de 911
1e5b9a95
PH
912 def to_console_title(self, message):
913 if not self.params.get('consoletitle', False):
914 return
3efb96a6 915 message = remove_terminal_sequences(message)
4bede0d8
C
916 if compat_os_name == 'nt':
917 if ctypes.windll.kernel32.GetConsoleWindow():
918 # c_wchar_p() might not be necessary if `message` is
919 # already of type unicode()
920 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
cf4f42cb 921 else:
922 self._send_console_code(f'\033]0;{message}\007')
1e5b9a95 923
bdde425c 924 def save_console_title(self):
cf4f42cb 925 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 926 return
592b7485 927 self._send_console_code('\033[22;0t') # Save the title on stack
bdde425c
PH
928
929 def restore_console_title(self):
cf4f42cb 930 if not self.params.get('consoletitle') or self.params.get('simulate'):
bdde425c 931 return
592b7485 932 self._send_console_code('\033[23;0t') # Restore the title from stack
bdde425c
PH
933
934 def __enter__(self):
935 self.save_console_title()
936 return self
937
938 def __exit__(self, *args):
939 self.restore_console_title()
f89197d7 940
dca08720 941 if self.params.get('cookiefile') is not None:
1bab3437 942 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
bdde425c 943
fa9f30b8 944 def trouble(self, message=None, tb=None, is_error=True):
8222d8de
JMF
945 """Determine action to take when a download problem appears.
946
947 Depending on if the downloader has been configured to ignore
948 download errors or not, this method may throw an exception or
949 not when errors are found, after printing the message.
950
fa9f30b8 951 @param tb If given, is additional traceback information
952 @param is_error Whether to raise error according to ignorerrors
8222d8de
JMF
953 """
954 if message is not None:
955 self.to_stderr(message)
956 if self.params.get('verbose'):
957 if tb is None:
958 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 959 tb = ''
8222d8de 960 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 961 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 962 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
963 else:
964 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 965 tb = ''.join(tb_data)
c19bc311 966 if tb:
967 self.to_stderr(tb)
fa9f30b8 968 if not is_error:
969 return
b1940459 970 if not self.params.get('ignoreerrors'):
8222d8de
JMF
971 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
972 exc_info = sys.exc_info()[1].exc_info
973 else:
974 exc_info = sys.exc_info()
975 raise DownloadError(message, exc_info)
976 self._download_retcode = 1
977
19a03940 978 Styles = Namespace(
979 HEADERS='yellow',
980 EMPHASIS='light blue',
492272fe 981 FILENAME='green',
19a03940 982 ID='green',
983 DELIM='blue',
984 ERROR='red',
985 WARNING='yellow',
986 SUPPRESS='light black',
987 )
ec11a9f4 988
7578d77d 989 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
e5a998f3 990 text = str(text)
ec11a9f4 991 if test_encoding:
992 original_text = text
5c104538 993 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
994 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
ec11a9f4 995 text = text.encode(encoding, 'ignore').decode(encoding)
996 if fallback is not None and text != original_text:
997 text = fallback
8417f26b 998 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
ec11a9f4 999
591bb9d3 1000 def _format_out(self, *args, **kwargs):
1001 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1002
ec11a9f4 1003 def _format_screen(self, *args, **kwargs):
591bb9d3 1004 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
ec11a9f4 1005
1006 def _format_err(self, *args, **kwargs):
591bb9d3 1007 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
819e0531 1008
c84aeac6 1009 def report_warning(self, message, only_once=False):
8222d8de
JMF
1010 '''
1011 Print the message to stderr, it will be prefixed with 'WARNING:'
1012 If stderr is a tty file the 'WARNING:' will be colored
1013 '''
6d07ce01
JMF
1014 if self.params.get('logger') is not None:
1015 self.params['logger'].warning(message)
8222d8de 1016 else:
ad8915b7
PH
1017 if self.params.get('no_warnings'):
1018 return
ec11a9f4 1019 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
8222d8de 1020
da4db748 1021 def deprecation_warning(self, message, *, stacklevel=0):
1022 deprecation_warning(
1023 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1024
1025 def deprecated_feature(self, message):
ee8dd27a 1026 if self.params.get('logger') is not None:
da4db748 1027 self.params['logger'].warning(f'Deprecated Feature: {message}')
1028 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
ee8dd27a 1029
fa9f30b8 1030 def report_error(self, message, *args, **kwargs):
8222d8de
JMF
1031 '''
1032 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1033 in red if stderr is a tty file.
1034 '''
fa9f30b8 1035 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
8222d8de 1036
b35496d8 1037 def write_debug(self, message, only_once=False):
0760b0a7 1038 '''Log debug message or Print message to stderr'''
1039 if not self.params.get('verbose', False):
1040 return
8a82af35 1041 message = f'[debug] {message}'
0760b0a7 1042 if self.params.get('logger'):
1043 self.params['logger'].debug(message)
1044 else:
b35496d8 1045 self.to_stderr(message, only_once)
0760b0a7 1046
8222d8de
JMF
1047 def report_file_already_downloaded(self, file_name):
1048 """Report file has already been fully downloaded."""
1049 try:
6febd1c1 1050 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 1051 except UnicodeEncodeError:
6febd1c1 1052 self.to_screen('[download] The file has already been downloaded')
8222d8de 1053
0c3d0f51 1054 def report_file_delete(self, file_name):
1055 """Report that existing file will be deleted."""
1056 try:
c25228e5 1057 self.to_screen('Deleting existing file %s' % file_name)
0c3d0f51 1058 except UnicodeEncodeError:
c25228e5 1059 self.to_screen('Deleting existing file')
0c3d0f51 1060
319b6059 1061 def raise_no_formats(self, info, forced=False, *, msg=None):
0a5a191a 1062 has_drm = info.get('_has_drm')
319b6059 1063 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1064 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1065 if forced or not ignored:
1151c407 1066 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
319b6059 1067 expected=has_drm or ignored or expected)
88acdbc2 1068 else:
1069 self.report_warning(msg)
1070
de6000d9 1071 def parse_outtmpl(self):
bf1824b3 1072 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1073 self._parse_outtmpl()
1074 return self.params['outtmpl']
1075
1076 def _parse_outtmpl(self):
7b2c3f47 1077 sanitize = IDENTITY
bf1824b3 1078 if self.params.get('restrictfilenames'): # Remove spaces in the default template
71ce444a 1079 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
bf1824b3 1080
1081 outtmpl = self.params.setdefault('outtmpl', {})
1082 if not isinstance(outtmpl, dict):
1083 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1084 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
de6000d9 1085
21cd8fae 1086 def get_output_path(self, dir_type='', filename=None):
1087 paths = self.params.get('paths', {})
d2c8aadf 1088 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
21cd8fae 1089 path = os.path.join(
1090 expand_path(paths.get('home', '').strip()),
1091 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1092 filename or '')
21cd8fae 1093 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1094
76a264ac 1095 @staticmethod
901130bb 1096 def _outtmpl_expandpath(outtmpl):
1097 # expand_path translates '%%' into '%' and '$$' into '$'
1098 # correspondingly that is not what we want since we need to keep
1099 # '%%' intact for template dict substitution step. Working around
1100 # with boundary-alike separator hack.
6f2287cb 1101 sep = ''.join(random.choices(string.ascii_letters, k=32))
86e5f3ed 1102 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
901130bb 1103
1104 # outtmpl should be expand_path'ed before template dict substitution
1105 # because meta fields may contain env variables we don't want to
62b58c09 1106 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
901130bb 1107 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1108 return expand_path(outtmpl).replace(sep, '')
1109
1110 @staticmethod
1111 def escape_outtmpl(outtmpl):
1112 ''' Escape any remaining strings like %s, %abc% etc. '''
1113 return re.sub(
1114 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1115 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1116 outtmpl)
1117
1118 @classmethod
1119 def validate_outtmpl(cls, outtmpl):
76a264ac 1120 ''' @return None or Exception object '''
7d1eb38a 1121 outtmpl = re.sub(
47cdc68e 1122 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
7d1eb38a 1123 lambda mobj: f'{mobj.group(0)[:-1]}s',
1124 cls._outtmpl_expandpath(outtmpl))
76a264ac 1125 try:
7d1eb38a 1126 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
76a264ac 1127 return None
1128 except ValueError as err:
1129 return err
1130
03b4de72 1131 @staticmethod
1132 def _copy_infodict(info_dict):
1133 info_dict = dict(info_dict)
09b49e1f 1134 info_dict.pop('__postprocessors', None)
415f8d51 1135 info_dict.pop('__pending_error', None)
03b4de72 1136 return info_dict
1137
e0fd9573 1138 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1139 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1140 @param sanitize Whether to sanitize the output as a filename.
1141 For backward compatibility, a function can also be passed
1142 """
1143
6e84b215 1144 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
143db31d 1145
03b4de72 1146 info_dict = self._copy_infodict(info_dict)
752cda38 1147 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
53c18592 1148 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
143db31d 1149 if info_dict.get('duration', None) is not None
1150 else None)
1d485a1a 1151 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
9c906919 1152 info_dict['video_autonumber'] = self._num_videos
752cda38 1153 if info_dict.get('resolution') is None:
1154 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
143db31d 1155
e6f21b3d 1156 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
143db31d 1157 # of %(field)s to %(field)0Nd for backward compatibility
1158 field_size_compat_map = {
0a5a191a 1159 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
ec11a9f4 1160 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
752cda38 1161 'autonumber': self.params.get('autonumber_size') or 5,
143db31d 1162 }
752cda38 1163
385a27fa 1164 TMPL_DICT = {}
47cdc68e 1165 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
385a27fa 1166 MATH_FUNCTIONS = {
1167 '+': float.__add__,
1168 '-': float.__sub__,
1169 }
e625be0d 1170 # Field is of the form key1.key2...
07a1250e 1171 # where keys (except first) can be string, int, slice or "{field, ...}"
1172 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1173 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1174 'inner': FIELD_INNER_RE,
1175 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1176 }
1d485a1a 1177 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
385a27fa 1178 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
78fde6e3 1179 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
e625be0d 1180 (?P<negate>-)?
1d485a1a 1181 (?P<fields>{FIELD_RE})
1182 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
e625be0d 1183 (?:>(?P<strf_format>.+?))?
34baa9fd 1184 (?P<remaining>
1185 (?P<alternate>(?<!\\),[^|&)]+)?
1186 (?:&(?P<replacement>.*?))?
1187 (?:\|(?P<default>.*?))?
1d485a1a 1188 )$''')
752cda38 1189
07a1250e 1190 def _traverse_infodict(fields):
1191 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1192 for f in ([x] if x.startswith('{') else x.split('.'))]
1193 for i in (0, -1):
1194 if fields and not fields[i]:
1195 fields.pop(i)
1196
1197 for i, f in enumerate(fields):
1198 if not f.startswith('{'):
1199 continue
1200 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1201 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1202
1203 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
76a264ac 1204
752cda38 1205 def get_value(mdict):
1206 # Object traversal
2b8a2973 1207 value = _traverse_infodict(mdict['fields'])
752cda38 1208 # Negative
1209 if mdict['negate']:
1210 value = float_or_none(value)
1211 if value is not None:
1212 value *= -1
1213 # Do maths
385a27fa 1214 offset_key = mdict['maths']
1215 if offset_key:
752cda38 1216 value = float_or_none(value)
1217 operator = None
385a27fa 1218 while offset_key:
1219 item = re.match(
1220 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1221 offset_key).group(0)
1222 offset_key = offset_key[len(item):]
1223 if operator is None:
752cda38 1224 operator = MATH_FUNCTIONS[item]
385a27fa 1225 continue
1226 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1227 offset = float_or_none(item)
1228 if offset is None:
2b8a2973 1229 offset = float_or_none(_traverse_infodict(item))
385a27fa 1230 try:
1231 value = operator(value, multiplier * offset)
1232 except (TypeError, ZeroDivisionError):
1233 return None
1234 operator = None
752cda38 1235 # Datetime formatting
1236 if mdict['strf_format']:
7c37ff97 1237 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
752cda38 1238
a6bcaf71 1239 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1240 if sanitize and value == '':
1241 value = None
752cda38 1242 return value
1243
b868936c 1244 na = self.params.get('outtmpl_na_placeholder', 'NA')
1245
e0fd9573 1246 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
5c3895ff 1247 return sanitize_filename(str(value), restricted=restricted, is_id=(
1248 bool(re.search(r'(^|[_.])id(\.|$)', key))
8a82af35 1249 if 'filename-sanitization' in self.params['compat_opts']
5c3895ff 1250 else NO_DEFAULT))
e0fd9573 1251
1252 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1253 sanitize = bool(sanitize)
1254
6e84b215 1255 def _dumpjson_default(obj):
1256 if isinstance(obj, (set, LazyList)):
1257 return list(obj)
adbc4ec4 1258 return repr(obj)
6e84b215 1259
6f2287cb 1260 class _ReplacementFormatter(string.Formatter):
ec9311c4 1261 def get_field(self, field_name, args, kwargs):
1262 if field_name.isdigit():
1263 return args[0], -1
1264 raise ValueError('Unsupported field')
1265
1266 replacement_formatter = _ReplacementFormatter()
1267
752cda38 1268 def create_key(outer_mobj):
1269 if not outer_mobj.group('has_key'):
b836dc94 1270 return outer_mobj.group(0)
752cda38 1271 key = outer_mobj.group('key')
752cda38 1272 mobj = re.match(INTERNAL_FORMAT_RE, key)
e0fd9573 1273 initial_field = mobj.group('fields') if mobj else ''
e978789f 1274 value, replacement, default = None, None, na
7c37ff97 1275 while mobj:
e625be0d 1276 mobj = mobj.groupdict()
7c37ff97 1277 default = mobj['default'] if mobj['default'] is not None else default
752cda38 1278 value = get_value(mobj)
e978789f 1279 replacement = mobj['replacement']
7c37ff97 1280 if value is None and mobj['alternate']:
34baa9fd 1281 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
7c37ff97 1282 else:
1283 break
752cda38 1284
b868936c 1285 fmt = outer_mobj.group('format')
752cda38 1286 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
86e5f3ed 1287 fmt = f'0{field_size_compat_map[key]:d}d'
752cda38 1288
ebe1b4e3 1289 if None not in (value, replacement):
ec9311c4 1290 try:
1291 value = replacement_formatter.format(replacement, value)
1292 except ValueError:
ebe1b4e3 1293 value, default = None, na
752cda38 1294
4476d2c7 1295 flags = outer_mobj.group('conversion') or ''
7d1eb38a 1296 str_fmt = f'{fmt[:-1]}s'
ebe1b4e3 1297 if value is None:
1298 value, fmt = default, 's'
1299 elif fmt[-1] == 'l': # list
4476d2c7 1300 delim = '\n' if '#' in flags else ', '
9e907ebd 1301 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
524e2e4f 1302 elif fmt[-1] == 'j': # json
deae7c17 1303 value, fmt = json.dumps(
1304 value, default=_dumpjson_default,
9b9dad11 1305 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
47cdc68e 1306 elif fmt[-1] == 'h': # html
deae7c17 1307 value, fmt = escapeHTML(str(value)), str_fmt
524e2e4f 1308 elif fmt[-1] == 'q': # quoted
4476d2c7 1309 value = map(str, variadic(value) if '#' in flags else [value])
1310 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
524e2e4f 1311 elif fmt[-1] == 'B': # bytes
0f06bcd7 1312 value = f'%{str_fmt}'.encode() % str(value).encode()
f5aa5cfb 1313 value, fmt = value.decode('utf-8', 'ignore'), 's'
524e2e4f 1314 elif fmt[-1] == 'U': # unicode normalized
524e2e4f 1315 value, fmt = unicodedata.normalize(
1316 # "+" = compatibility equivalence, "#" = NFD
4476d2c7 1317 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
524e2e4f 1318 value), str_fmt
e0fd9573 1319 elif fmt[-1] == 'D': # decimal suffix
abbeeebc 1320 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1321 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1322 factor=1024 if '#' in flags else 1000)
37893bb0 1323 elif fmt[-1] == 'S': # filename sanitization
e0fd9573 1324 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
7d1eb38a 1325 elif fmt[-1] == 'c':
524e2e4f 1326 if value:
1327 value = str(value)[0]
76a264ac 1328 else:
524e2e4f 1329 fmt = str_fmt
76a264ac 1330 elif fmt[-1] not in 'rs': # numeric
a439a3a4 1331 value = float_or_none(value)
752cda38 1332 if value is None:
1333 value, fmt = default, 's'
901130bb 1334
752cda38 1335 if sanitize:
1336 if fmt[-1] == 'r':
1337 # If value is an object, sanitize might convert it to a string
1338 # So we convert it to repr first
7d1eb38a 1339 value, fmt = repr(value), str_fmt
639f1cea 1340 if fmt[-1] in 'csr':
e0fd9573 1341 value = sanitizer(initial_field, value)
901130bb 1342
b868936c 1343 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
385a27fa 1344 TMPL_DICT[key] = value
b868936c 1345 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
752cda38 1346
385a27fa 1347 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
143db31d 1348
819e0531 1349 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1350 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1351 return self.escape_outtmpl(outtmpl) % info_dict
1352
5127e92a 1353 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1354 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1355 if outtmpl is None:
bf1824b3 1356 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
8222d8de 1357 try:
5127e92a 1358 outtmpl = self._outtmpl_expandpath(outtmpl)
e0fd9573 1359 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
6a0546e3 1360 if not filename:
1361 return None
15da37c7 1362
5127e92a 1363 if tmpl_type in ('', 'temp'):
6a0546e3 1364 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1365 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1366 filename = replace_extension(filename, ext, final_ext)
5127e92a 1367 elif tmpl_type:
6a0546e3 1368 force_ext = OUTTMPL_TYPES[tmpl_type]
1369 if force_ext:
1370 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
de6000d9 1371
bdc3fd2f
U
1372 # https://github.com/blackjack4494/youtube-dlc/issues/85
1373 trim_file_name = self.params.get('trim_file_name', False)
1374 if trim_file_name:
5c22c63d 1375 no_ext, *ext = filename.rsplit('.', 2)
1376 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
bdc3fd2f 1377
0202b52a 1378 return filename
8222d8de 1379 except ValueError as err:
6febd1c1 1380 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
1381 return None
1382
5127e92a 1383 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1384 """Generate the output filename"""
1385 if outtmpl:
1386 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1387 dir_type = None
1388 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
80c03fa9 1389 if not filename and dir_type not in ('', 'temp'):
1390 return ''
de6000d9 1391
c84aeac6 1392 if warn:
21cd8fae 1393 if not self.params.get('paths'):
de6000d9 1394 pass
1395 elif filename == '-':
c84aeac6 1396 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
de6000d9 1397 elif os.path.isabs(filename):
c84aeac6 1398 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
de6000d9 1399 if filename == '-' or not filename:
1400 return filename
1401
21cd8fae 1402 return self.get_output_path(dir_type, filename)
0202b52a 1403
120fe513 1404 def _match_entry(self, info_dict, incomplete=False, silent=False):
6368e2e6 1405 """Returns None if the file should be downloaded"""
93b39cdb 1406 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
d7b460d0 1407 assert incomplete or _type == 'video', 'Only video result can be considered complete'
8222d8de 1408
3bec830a 1409 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
c77495e3 1410
8b0d7497 1411 def check_filter():
d7b460d0 1412 if _type in ('playlist', 'multi_video'):
1413 return
1414 elif _type in ('url', 'url_transparent') and not try_call(
1415 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1416 return
1417
8b0d7497 1418 if 'title' in info_dict:
1419 # This can happen when we're just evaluating the playlist
1420 title = info_dict['title']
1421 matchtitle = self.params.get('matchtitle', False)
1422 if matchtitle:
1423 if not re.search(matchtitle, title, re.IGNORECASE):
1424 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1425 rejecttitle = self.params.get('rejecttitle', False)
1426 if rejecttitle:
1427 if re.search(rejecttitle, title, re.IGNORECASE):
1428 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
6368e2e6 1429
8b0d7497 1430 date = info_dict.get('upload_date')
1431 if date is not None:
1432 dateRange = self.params.get('daterange', DateRange())
1433 if date not in dateRange:
86e5f3ed 1434 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
8b0d7497 1435 view_count = info_dict.get('view_count')
1436 if view_count is not None:
1437 min_views = self.params.get('min_views')
1438 if min_views is not None and view_count < min_views:
1439 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1440 max_views = self.params.get('max_views')
1441 if max_views is not None and view_count > max_views:
1442 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1443 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1444 return 'Skipping "%s" because it is age restricted' % video_title
8b0d7497 1445
8f18aca8 1446 match_filter = self.params.get('match_filter')
fe2ce85a 1447 if match_filter is None:
1448 return None
1449
1450 cancelled = None
1451 try:
8f18aca8 1452 try:
1453 ret = match_filter(info_dict, incomplete=incomplete)
1454 except TypeError:
1455 # For backward compatibility
1456 ret = None if incomplete else match_filter(info_dict)
fe2ce85a 1457 except DownloadCancelled as err:
1458 if err.msg is not NO_DEFAULT:
1459 raise
1460 ret, cancelled = err.msg, err
1461
1462 if ret is NO_DEFAULT:
1463 while True:
1464 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1465 reply = input(self._format_screen(
1466 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1467 if reply in {'y', ''}:
1468 return None
1469 elif reply == 'n':
1470 if cancelled:
1471 raise type(cancelled)(f'Skipping {video_title}')
1472 return f'Skipping {video_title}'
1473 return ret
8b0d7497 1474
c77495e3 1475 if self.in_download_archive(info_dict):
1476 reason = '%s has already been recorded in the archive' % video_title
1477 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1478 else:
fe2ce85a 1479 try:
1480 reason = check_filter()
1481 except DownloadCancelled as e:
1482 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1483 else:
1484 break_opt, break_err = 'break_on_reject', RejectedVideoReached
8b0d7497 1485 if reason is not None:
120fe513 1486 if not silent:
1487 self.to_screen('[download] ' + reason)
c77495e3 1488 if self.params.get(break_opt, False):
1489 raise break_err()
8b0d7497 1490 return reason
fe7e0c98 1491
b6c45014
JMF
1492 @staticmethod
1493 def add_extra_info(info_dict, extra_info):
1494 '''Set the keys from extra_info in info dict if they are missing'''
1495 for key, value in extra_info.items():
1496 info_dict.setdefault(key, value)
1497
409e1828 1498 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
61aa5ba3 1499 process=True, force_generic_extractor=False):
41d1cca3 1500 """
17ffed18 1501 Extract and return the information dictionary of the URL
41d1cca3 1502
1503 Arguments:
17ffed18 1504 @param url URL to extract
41d1cca3 1505
1506 Keyword arguments:
17ffed18 1507 @param download Whether to download videos
1508 @param process Whether to resolve all unresolved references (URLs, playlist items).
1509 Must be True for download to work
1510 @param ie_key Use only the extractor with this key
1511
1512 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1513 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
41d1cca3 1514 """
fe7e0c98 1515
409e1828 1516 if extra_info is None:
1517 extra_info = {}
1518
61aa5ba3 1519 if not ie_key and force_generic_extractor:
d22dec74
S
1520 ie_key = 'Generic'
1521
8222d8de 1522 if ie_key:
fe7866d0 1523 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
8222d8de
JMF
1524 else:
1525 ies = self._ies
1526
fe7866d0 1527 for key, ie in ies.items():
8222d8de
JMF
1528 if not ie.suitable(url):
1529 continue
1530
1531 if not ie.working():
6febd1c1
PH
1532 self.report_warning('The program functionality for this site has been marked as broken, '
1533 'and will probably not work.')
8222d8de 1534
1151c407 1535 temp_id = ie.get_temp_id(url)
fe7866d0 1536 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1537 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
5e5be0c0 1538 if self.params.get('break_on_existing', False):
1539 raise ExistingVideoReached()
a0566bbf 1540 break
fe7866d0 1541 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
a0566bbf 1542 else:
fe7866d0 1543 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1544 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1545 tb=False if extractors_restricted else None)
a0566bbf 1546
7e88d7d7 1547 def _handle_extraction_exceptions(func):
b5ae35ee 1548 @functools.wraps(func)
a0566bbf 1549 def wrapper(self, *args, **kwargs):
6da22e7d 1550 while True:
1551 try:
1552 return func(self, *args, **kwargs)
1553 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
8222d8de 1554 raise
6da22e7d 1555 except ReExtractInfo as e:
1556 if e.expected:
1557 self.to_screen(f'{e}; Re-extracting data')
1558 else:
1559 self.to_stderr('\r')
1560 self.report_warning(f'{e}; Re-extracting data')
1561 continue
1562 except GeoRestrictedError as e:
1563 msg = e.msg
1564 if e.countries:
1565 msg += '\nThis video is available in %s.' % ', '.join(
1566 map(ISO3166Utils.short2full, e.countries))
1567 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1568 self.report_error(msg)
1569 except ExtractorError as e: # An error we somewhat expected
1570 self.report_error(str(e), e.format_traceback())
1571 except Exception as e:
1572 if self.params.get('ignoreerrors'):
1573 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1574 else:
1575 raise
1576 break
a0566bbf 1577 return wrapper
1578
693f0600 1579 def _wait_for_video(self, ie_result={}):
f2ebc5c7 1580 if (not self.params.get('wait_for_video')
1581 or ie_result.get('_type', 'video') != 'video'
1582 or ie_result.get('formats') or ie_result.get('url')):
1583 return
1584
1585 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1586 last_msg = ''
1587
1588 def progress(msg):
1589 nonlocal last_msg
a7dc6a89 1590 full_msg = f'{msg}\n'
1591 if not self.params.get('noprogress'):
1592 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1593 elif last_msg:
1594 return
1595 self.to_screen(full_msg, skip_eol=True)
f2ebc5c7 1596 last_msg = msg
1597
1598 min_wait, max_wait = self.params.get('wait_for_video')
1599 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1600 if diff is None and ie_result.get('live_status') == 'is_upcoming':
16c620bc 1601 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
f2ebc5c7 1602 self.report_warning('Release time of video is not known')
693f0600 1603 elif ie_result and (diff or 0) <= 0:
f2ebc5c7 1604 self.report_warning('Video should already be available according to extracted info')
38d79fd1 1605 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
f2ebc5c7 1606 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1607
1608 wait_till = time.time() + diff
1609 try:
1610 while True:
1611 diff = wait_till - time.time()
1612 if diff <= 0:
1613 progress('')
1614 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1615 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1616 time.sleep(1)
1617 except KeyboardInterrupt:
1618 progress('')
1619 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1620 except BaseException as e:
1621 if not isinstance(e, ReExtractInfo):
1622 self.to_screen('')
1623 raise
1624
7e88d7d7 1625 @_handle_extraction_exceptions
58f197b7 1626 def __extract_info(self, url, ie, download, extra_info, process):
693f0600 1627 try:
1628 ie_result = ie.extract(url)
1629 except UserNotLive as e:
1630 if process:
1631 if self.params.get('wait_for_video'):
1632 self.report_warning(e)
1633 self._wait_for_video()
1634 raise
a0566bbf 1635 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
cb794ee0 1636 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
a0566bbf 1637 return
1638 if isinstance(ie_result, list):
1639 # Backwards compatibility: old IE result format
1640 ie_result = {
1641 '_type': 'compat_list',
1642 'entries': ie_result,
1643 }
e37d0efb 1644 if extra_info.get('original_url'):
1645 ie_result.setdefault('original_url', extra_info['original_url'])
a0566bbf 1646 self.add_default_extra_info(ie_result, ie, url)
1647 if process:
f2ebc5c7 1648 self._wait_for_video(ie_result)
a0566bbf 1649 return self.process_ie_result(ie_result, download, extra_info)
8222d8de 1650 else:
a0566bbf 1651 return ie_result
fe7e0c98 1652
ea38e55f 1653 def add_default_extra_info(self, ie_result, ie, url):
6033d980 1654 if url is not None:
1655 self.add_extra_info(ie_result, {
1656 'webpage_url': url,
1657 'original_url': url,
57ebfca3 1658 })
1659 webpage_url = ie_result.get('webpage_url')
1660 if webpage_url:
1661 self.add_extra_info(ie_result, {
1662 'webpage_url_basename': url_basename(webpage_url),
1663 'webpage_url_domain': get_domain(webpage_url),
6033d980 1664 })
1665 if ie is not None:
1666 self.add_extra_info(ie_result, {
1667 'extractor': ie.IE_NAME,
1668 'extractor_key': ie.ie_key(),
1669 })
ea38e55f 1670
58adec46 1671 def process_ie_result(self, ie_result, download=True, extra_info=None):
8222d8de
JMF
1672 """
1673 Take the result of the ie(may be modified) and resolve all unresolved
1674 references (URLs, playlist items).
1675
1676 It will also download the videos if 'download'.
1677 Returns the resolved ie_result.
1678 """
58adec46 1679 if extra_info is None:
1680 extra_info = {}
e8ee972c
PH
1681 result_type = ie_result.get('_type', 'video')
1682
057a5206 1683 if result_type in ('url', 'url_transparent'):
8f97a15d 1684 ie_result['url'] = sanitize_url(
1685 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
8791e78c 1686 if ie_result.get('original_url') and not extra_info.get('original_url'):
1687 extra_info = {'original_url': ie_result['original_url'], **extra_info}
e37d0efb 1688
057a5206 1689 extract_flat = self.params.get('extract_flat', False)
3089bc74
S
1690 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1691 or extract_flat is True):
ecb54191 1692 info_copy = ie_result.copy()
6033d980 1693 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
360167b9 1694 if ie and not ie_result.get('id'):
4614bc22 1695 info_copy['id'] = ie.get_temp_id(ie_result['url'])
6033d980 1696 self.add_default_extra_info(info_copy, ie, ie_result['url'])
4614bc22 1697 self.add_extra_info(info_copy, extra_info)
b5475f11 1698 info_copy, _ = self.pre_process(info_copy)
94dc8604 1699 self._fill_common_fields(info_copy, False)
17060584 1700 self.__forced_printings(info_copy)
415f8d51 1701 self._raise_pending_errors(info_copy)
4614bc22 1702 if self.params.get('force_write_download_archive', False):
1703 self.record_download_archive(info_copy)
e8ee972c
PH
1704 return ie_result
1705
8222d8de 1706 if result_type == 'video':
b6c45014 1707 self.add_extra_info(ie_result, extra_info)
9c2b75b5 1708 ie_result = self.process_video_result(ie_result, download=download)
415f8d51 1709 self._raise_pending_errors(ie_result)
28b0eb0f 1710 additional_urls = (ie_result or {}).get('additional_urls')
9c2b75b5 1711 if additional_urls:
e9f4ccd1 1712 # TODO: Improve MetadataParserPP to allow setting a list
14f25df2 1713 if isinstance(additional_urls, str):
9c2b75b5 1714 additional_urls = [additional_urls]
1715 self.to_screen(
1716 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1717 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1718 ie_result['additional_entries'] = [
1719 self.extract_info(
b69fd25c 1720 url, download, extra_info=extra_info,
9c2b75b5 1721 force_generic_extractor=self.params.get('force_generic_extractor'))
1722 for url in additional_urls
1723 ]
1724 return ie_result
8222d8de
JMF
1725 elif result_type == 'url':
1726 # We have to add extra_info to the results because it may be
1727 # contained in a playlist
07cce701 1728 return self.extract_info(
1729 ie_result['url'], download,
1730 ie_key=ie_result.get('ie_key'),
1731 extra_info=extra_info)
7fc3fa05
PH
1732 elif result_type == 'url_transparent':
1733 # Use the information from the embedding page
1734 info = self.extract_info(
1735 ie_result['url'], ie_key=ie_result.get('ie_key'),
1736 extra_info=extra_info, download=False, process=False)
1737
1640eb09
S
1738 # extract_info may return None when ignoreerrors is enabled and
1739 # extraction failed with an error, don't crash and return early
1740 # in this case
1741 if not info:
1742 return info
1743
3975b4d2 1744 exempted_fields = {'_type', 'url', 'ie_key'}
1745 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1746 # For video clips, the id etc of the clip extractor should be used
1747 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1748
412c617d 1749 new_result = info.copy()
3975b4d2 1750 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
7fc3fa05 1751
0563f7ac
S
1752 # Extracted info may not be a video result (i.e.
1753 # info.get('_type', 'video') != video) but rather an url or
1754 # url_transparent. In such cases outer metadata (from ie_result)
1755 # should be propagated to inner one (info). For this to happen
1756 # _type of info should be overridden with url_transparent. This
067aa17e 1757 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
0563f7ac
S
1758 if new_result.get('_type') == 'url':
1759 new_result['_type'] = 'url_transparent'
7fc3fa05
PH
1760
1761 return self.process_ie_result(
1762 new_result, download=download, extra_info=extra_info)
40fcba5e 1763 elif result_type in ('playlist', 'multi_video'):
30a074c2 1764 # Protect from infinite recursion due to recursively nested playlists
1765 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
0bd5a039 1766 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1767 if webpage_url and webpage_url in self._playlist_urls:
7e85e872 1768 self.to_screen(
30a074c2 1769 '[download] Skipping already downloaded playlist: %s'
1770 % ie_result.get('title') or ie_result.get('id'))
1771 return
7e85e872 1772
30a074c2 1773 self._playlist_level += 1
1774 self._playlist_urls.add(webpage_url)
03f83004 1775 self._fill_common_fields(ie_result, False)
bc516a3f 1776 self._sanitize_thumbnails(ie_result)
30a074c2 1777 try:
1778 return self.__process_playlist(ie_result, download)
1779 finally:
1780 self._playlist_level -= 1
1781 if not self._playlist_level:
1782 self._playlist_urls.clear()
8222d8de 1783 elif result_type == 'compat_list':
c9bf4114
PH
1784 self.report_warning(
1785 'Extractor %s returned a compat_list result. '
1786 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 1787
8222d8de 1788 def _fixup(r):
b868936c 1789 self.add_extra_info(r, {
1790 'extractor': ie_result['extractor'],
1791 'webpage_url': ie_result['webpage_url'],
1792 'webpage_url_basename': url_basename(ie_result['webpage_url']),
0bb322b9 1793 'webpage_url_domain': get_domain(ie_result['webpage_url']),
b868936c 1794 'extractor_key': ie_result['extractor_key'],
1795 })
8222d8de
JMF
1796 return r
1797 ie_result['entries'] = [
b6c45014 1798 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
1799 for r in ie_result['entries']
1800 ]
1801 return ie_result
1802 else:
1803 raise Exception('Invalid result type: %s' % result_type)
1804
e92caff5 1805 def _ensure_dir_exists(self, path):
1806 return make_dir(path, self.report_error)
1807
3b603dbd 1808 @staticmethod
3bec830a 1809 def _playlist_infodict(ie_result, strict=False, **kwargs):
1810 info = {
1811 'playlist_count': ie_result.get('playlist_count'),
3b603dbd 1812 'playlist': ie_result.get('title') or ie_result.get('id'),
1813 'playlist_id': ie_result.get('id'),
1814 'playlist_title': ie_result.get('title'),
1815 'playlist_uploader': ie_result.get('uploader'),
1816 'playlist_uploader_id': ie_result.get('uploader_id'),
3b603dbd 1817 **kwargs,
1818 }
3bec830a 1819 if strict:
1820 return info
0bd5a039 1821 if ie_result.get('webpage_url'):
1822 info.update({
1823 'webpage_url': ie_result['webpage_url'],
1824 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1825 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1826 })
3bec830a 1827 return {
1828 **info,
1829 'playlist_index': 0,
59d7de0d 1830 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
3bec830a 1831 'extractor': ie_result['extractor'],
3bec830a 1832 'extractor_key': ie_result['extractor_key'],
1833 }
3b603dbd 1834
30a074c2 1835 def __process_playlist(self, ie_result, download):
7e88d7d7 1836 """Process each entry in the playlist"""
f5ea4748 1837 assert ie_result['_type'] in ('playlist', 'multi_video')
1838
3bec830a 1839 common_info = self._playlist_infodict(ie_result, strict=True)
3955b207 1840 title = common_info.get('playlist') or '<Untitled>'
3bec830a 1841 if self._match_entry(common_info, incomplete=True) is not None:
1842 return
c6e07cf1 1843 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
f0d785d3 1844
7e88d7d7 1845 all_entries = PlaylistEntries(self, ie_result)
7e9a6125 1846 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1847
1848 lazy = self.params.get('lazy_playlist')
1849 if lazy:
1850 resolved_entries, n_entries = [], 'N/A'
1851 ie_result['requested_entries'], ie_result['entries'] = None, None
1852 else:
1853 entries = resolved_entries = list(entries)
1854 n_entries = len(resolved_entries)
1855 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1856 if not ie_result.get('playlist_count'):
1857 # Better to do this after potentially exhausting entries
1858 ie_result['playlist_count'] = all_entries.get_full_count()
498f5606 1859
0647d925 1860 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1861 ie_copy = collections.ChainMap(ie_result, extra)
3bec830a 1862
e08a85d8 1863 _infojson_written = False
0bfc53d0 1864 write_playlist_files = self.params.get('allow_playlist_files', True)
1865 if write_playlist_files and self.params.get('list_thumbnails'):
1866 self.list_thumbnails(ie_result)
1867 if write_playlist_files and not self.params.get('simulate'):
e08a85d8 1868 _infojson_written = self._write_info_json(
1869 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1870 if _infojson_written is None:
80c03fa9 1871 return
1872 if self._write_description('playlist', ie_result,
1873 self.prepare_filename(ie_copy, 'pl_description')) is None:
1874 return
681de68e 1875 # TODO: This should be passed to ThumbnailsConvertor if necessary
3bec830a 1876 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
30a074c2 1877
7e9a6125 1878 if lazy:
1879 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1880 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1881 elif self.params.get('playlistreverse'):
1882 entries.reverse()
1883 elif self.params.get('playlistrandom'):
30a074c2 1884 random.shuffle(entries)
1885
bc5c2f8a 1886 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
7e88d7d7 1887 f'{format_field(ie_result, "playlist_count", " of %s")}')
30a074c2 1888
134c913c 1889 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1890 if self.params.get('extract_flat') == 'discard_in_playlist':
1891 keep_resolved_entries = ie_result['_type'] != 'playlist'
1892 if keep_resolved_entries:
1893 self.write_debug('The information of all playlist entries will be held in memory')
1894
26e2805c 1895 failures = 0
1896 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
7e9a6125 1897 for i, (playlist_index, entry) in enumerate(entries):
1898 if lazy:
1899 resolved_entries.append((playlist_index, entry))
3bec830a 1900 if not entry:
7e88d7d7 1901 continue
1902
7e88d7d7 1903 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
7e9a6125 1904 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1905 playlist_index = ie_result['requested_entries'][i]
1906
0647d925 1907 entry_copy = collections.ChainMap(entry, {
3bec830a 1908 **common_info,
3955b207 1909 'n_entries': int_or_none(n_entries),
71729754 1910 'playlist_index': playlist_index,
7e9a6125 1911 'playlist_autonumber': i + 1,
0647d925 1912 })
3bec830a 1913
0647d925 1914 if self._match_entry(entry_copy, incomplete=True) is not None:
f0ad6f8c 1915 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1916 resolved_entries[i] = (playlist_index, NO_DEFAULT)
3bec830a 1917 continue
1918
bc5c2f8a 1919 self.to_screen('[download] Downloading item %s of %s' % (
3bec830a 1920 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1921
ec54bd43 1922 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
a6ca61d4 1923 'playlist_index': playlist_index,
1924 'playlist_autonumber': i + 1,
ec54bd43 1925 }, extra))
26e2805c 1926 if not entry_result:
1927 failures += 1
1928 if failures >= max_failures:
1929 self.report_error(
7e88d7d7 1930 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
26e2805c 1931 break
134c913c 1932 if keep_resolved_entries:
1933 resolved_entries[i] = (playlist_index, entry_result)
7e88d7d7 1934
1935 # Update with processed data
f0ad6f8c 1936 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
bc5c2f8a 1937 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1938 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1939 # Do not set for full playlist
1940 ie_result.pop('requested_entries')
e08a85d8 1941
1942 # Write the updated info to json
cb96c5be 1943 if _infojson_written is True and self._write_info_json(
e08a85d8 1944 'updated playlist', ie_result,
1945 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1946 return
ca30f449 1947
ed5835b4 1948 ie_result = self.run_all_pps('playlist', ie_result)
7e88d7d7 1949 self.to_screen(f'[download] Finished downloading playlist: {title}')
30a074c2 1950 return ie_result
1951
7e88d7d7 1952 @_handle_extraction_exceptions
a0566bbf 1953 def __process_iterable_entry(self, entry, download, extra_info):
1954 return self.process_ie_result(
1955 entry, download=download, extra_info=extra_info)
1956
67134eab
JMF
1957 def _build_format_filter(self, filter_spec):
1958 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
1959
1960 OPERATORS = {
1961 '<': operator.lt,
1962 '<=': operator.le,
1963 '>': operator.gt,
1964 '>=': operator.ge,
1965 '=': operator.eq,
1966 '!=': operator.ne,
1967 }
67134eab 1968 operator_rex = re.compile(r'''(?x)\s*
c3f624ef 1969 (?P<key>[\w.-]+)\s*
187986a8 1970 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1971 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
083c9df9 1972 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
187986a8 1973 m = operator_rex.fullmatch(filter_spec)
9ddb6925
S
1974 if m:
1975 try:
1976 comparison_value = int(m.group('value'))
1977 except ValueError:
1978 comparison_value = parse_filesize(m.group('value'))
1979 if comparison_value is None:
1980 comparison_value = parse_filesize(m.group('value') + 'B')
1981 if comparison_value is None:
1982 raise ValueError(
1983 'Invalid value %r in format specification %r' % (
67134eab 1984 m.group('value'), filter_spec))
9ddb6925
S
1985 op = OPERATORS[m.group('op')]
1986
083c9df9 1987 if not m:
9ddb6925
S
1988 STR_OPERATORS = {
1989 '=': operator.eq,
10d33b34
YCH
1990 '^=': lambda attr, value: attr.startswith(value),
1991 '$=': lambda attr, value: attr.endswith(value),
1992 '*=': lambda attr, value: value in attr,
1ce9a3cb 1993 '~=': lambda attr, value: value.search(attr) is not None
9ddb6925 1994 }
187986a8 1995 str_operator_rex = re.compile(r'''(?x)\s*
1996 (?P<key>[a-zA-Z0-9._-]+)\s*
1ce9a3cb
LF
1997 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1998 (?P<quote>["'])?
1999 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2000 (?(quote)(?P=quote))\s*
9ddb6925 2001 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
187986a8 2002 m = str_operator_rex.fullmatch(filter_spec)
9ddb6925 2003 if m:
1ce9a3cb
LF
2004 if m.group('op') == '~=':
2005 comparison_value = re.compile(m.group('value'))
2006 else:
2007 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2cc779f4
S
2008 str_op = STR_OPERATORS[m.group('op')]
2009 if m.group('negation'):
e118a879 2010 op = lambda attr, value: not str_op(attr, value)
2cc779f4
S
2011 else:
2012 op = str_op
083c9df9 2013
9ddb6925 2014 if not m:
187986a8 2015 raise SyntaxError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
2016
2017 def _filter(f):
2018 actual_value = f.get(m.group('key'))
2019 if actual_value is None:
2020 return m.group('none_inclusive')
2021 return op(actual_value, comparison_value)
67134eab
JMF
2022 return _filter
2023
9f1a1c36 2024 def _check_formats(self, formats):
2025 for f in formats:
2026 self.to_screen('[info] Testing format %s' % f['format_id'])
75689fe5 2027 path = self.get_output_path('temp')
2028 if not self._ensure_dir_exists(f'{path}/'):
2029 continue
2030 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
9f1a1c36 2031 temp_file.close()
2032 try:
2033 success, _ = self.dl(temp_file.name, f, test=True)
8a82af35 2034 except (DownloadError, OSError, ValueError) + network_exceptions:
9f1a1c36 2035 success = False
2036 finally:
2037 if os.path.exists(temp_file.name):
2038 try:
2039 os.remove(temp_file.name)
2040 except OSError:
2041 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2042 if success:
2043 yield f
2044 else:
2045 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2046
0017d9ad 2047 def _default_format_spec(self, info_dict, download=True):
0017d9ad 2048
af0f7428
S
2049 def can_merge():
2050 merger = FFmpegMergerPP(self)
2051 return merger.available and merger.can_merge()
2052
91ebc640 2053 prefer_best = (
b7b04c78 2054 not self.params.get('simulate')
91ebc640 2055 and download
2056 and (
2057 not can_merge()
21633673 2058 or info_dict.get('is_live') and not self.params.get('live_from_start')
bf1824b3 2059 or self.params['outtmpl']['default'] == '-'))
53ed7066 2060 compat = (
2061 prefer_best
2062 or self.params.get('allow_multiple_audio_streams', False)
8a82af35 2063 or 'format-spec' in self.params['compat_opts'])
91ebc640 2064
2065 return (
53ed7066 2066 'best/bestvideo+bestaudio' if prefer_best
2067 else 'bestvideo*+bestaudio/best' if not compat
91ebc640 2068 else 'bestvideo+bestaudio/best')
0017d9ad 2069
67134eab
JMF
2070 def build_format_selector(self, format_spec):
2071 def syntax_error(note, start):
2072 message = (
2073 'Invalid format specification: '
86e5f3ed 2074 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
67134eab
JMF
2075 return SyntaxError(message)
2076
2077 PICKFIRST = 'PICKFIRST'
2078 MERGE = 'MERGE'
2079 SINGLE = 'SINGLE'
0130afb7 2080 GROUP = 'GROUP'
67134eab
JMF
2081 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2082
91ebc640 2083 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2084 'video': self.params.get('allow_multiple_video_streams', False)}
909d24dd 2085
9f1a1c36 2086 check_formats = self.params.get('check_formats') == 'selected'
e8e73840 2087
67134eab
JMF
2088 def _parse_filter(tokens):
2089 filter_parts = []
6f2287cb 2090 for type, string_, start, _, _ in tokens:
2091 if type == tokenize.OP and string_ == ']':
67134eab
JMF
2092 return ''.join(filter_parts)
2093 else:
6f2287cb 2094 filter_parts.append(string_)
67134eab 2095
232541df 2096 def _remove_unused_ops(tokens):
62b58c09
L
2097 # Remove operators that we don't use and join them with the surrounding strings.
2098 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
232541df
JMF
2099 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2100 last_string, last_start, last_end, last_line = None, None, None, None
6f2287cb 2101 for type, string_, start, end, line in tokens:
2102 if type == tokenize.OP and string_ == '[':
232541df
JMF
2103 if last_string:
2104 yield tokenize.NAME, last_string, last_start, last_end, last_line
2105 last_string = None
6f2287cb 2106 yield type, string_, start, end, line
232541df 2107 # everything inside brackets will be handled by _parse_filter
6f2287cb 2108 for type, string_, start, end, line in tokens:
2109 yield type, string_, start, end, line
2110 if type == tokenize.OP and string_ == ']':
232541df 2111 break
6f2287cb 2112 elif type == tokenize.OP and string_ in ALLOWED_OPS:
232541df
JMF
2113 if last_string:
2114 yield tokenize.NAME, last_string, last_start, last_end, last_line
2115 last_string = None
6f2287cb 2116 yield type, string_, start, end, line
232541df
JMF
2117 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2118 if not last_string:
6f2287cb 2119 last_string = string_
232541df
JMF
2120 last_start = start
2121 last_end = end
2122 else:
6f2287cb 2123 last_string += string_
232541df
JMF
2124 if last_string:
2125 yield tokenize.NAME, last_string, last_start, last_end, last_line
2126
cf2ac6df 2127 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
2128 selectors = []
2129 current_selector = None
6f2287cb 2130 for type, string_, start, _, _ in tokens:
67134eab
JMF
2131 # ENCODING is only defined in python 3.x
2132 if type == getattr(tokenize, 'ENCODING', None):
2133 continue
2134 elif type in [tokenize.NAME, tokenize.NUMBER]:
6f2287cb 2135 current_selector = FormatSelector(SINGLE, string_, [])
67134eab 2136 elif type == tokenize.OP:
6f2287cb 2137 if string_ == ')':
cf2ac6df
JMF
2138 if not inside_group:
2139 # ')' will be handled by the parentheses group
2140 tokens.restore_last_token()
67134eab 2141 break
6f2287cb 2142 elif inside_merge and string_ in ['/', ',']:
0130afb7
JMF
2143 tokens.restore_last_token()
2144 break
6f2287cb 2145 elif inside_choice and string_ == ',':
cf2ac6df
JMF
2146 tokens.restore_last_token()
2147 break
6f2287cb 2148 elif string_ == ',':
0a31a350
JMF
2149 if not current_selector:
2150 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
2151 selectors.append(current_selector)
2152 current_selector = None
6f2287cb 2153 elif string_ == '/':
d96d604e
JMF
2154 if not current_selector:
2155 raise syntax_error('"/" must follow a format selector', start)
67134eab 2156 first_choice = current_selector
cf2ac6df 2157 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 2158 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
6f2287cb 2159 elif string_ == '[':
67134eab
JMF
2160 if not current_selector:
2161 current_selector = FormatSelector(SINGLE, 'best', [])
2162 format_filter = _parse_filter(tokens)
2163 current_selector.filters.append(format_filter)
6f2287cb 2164 elif string_ == '(':
0130afb7
JMF
2165 if current_selector:
2166 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
2167 group = _parse_format_selection(tokens, inside_group=True)
2168 current_selector = FormatSelector(GROUP, group, [])
6f2287cb 2169 elif string_ == '+':
d03cfdce 2170 if not current_selector:
2171 raise syntax_error('Unexpected "+"', start)
2172 selector_1 = current_selector
2173 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2174 if not selector_2:
2175 raise syntax_error('Expected a selector', start)
2176 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
67134eab 2177 else:
6f2287cb 2178 raise syntax_error(f'Operator not recognized: "{string_}"', start)
67134eab
JMF
2179 elif type == tokenize.ENDMARKER:
2180 break
2181 if current_selector:
2182 selectors.append(current_selector)
2183 return selectors
2184
f8d4ad9a 2185 def _merge(formats_pair):
2186 format_1, format_2 = formats_pair
2187
2188 formats_info = []
2189 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2190 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2191
2192 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
551f9388 2193 get_no_more = {'video': False, 'audio': False}
f8d4ad9a 2194 for (i, fmt_info) in enumerate(formats_info):
551f9388 2195 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2196 formats_info.pop(i)
2197 continue
2198 for aud_vid in ['audio', 'video']:
f8d4ad9a 2199 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2200 if get_no_more[aud_vid]:
2201 formats_info.pop(i)
f5510afe 2202 break
f8d4ad9a 2203 get_no_more[aud_vid] = True
2204
2205 if len(formats_info) == 1:
2206 return formats_info[0]
2207
2208 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2209 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2210
2211 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2212 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2213
fc61aff4
LL
2214 output_ext = get_compatible_ext(
2215 vcodecs=[f.get('vcodec') for f in video_fmts],
2216 acodecs=[f.get('acodec') for f in audio_fmts],
2217 vexts=[f['ext'] for f in video_fmts],
2218 aexts=[f['ext'] for f in audio_fmts],
2219 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2220 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
f8d4ad9a 2221
975a0d0d 2222 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2223
f8d4ad9a 2224 new_dict = {
2225 'requested_formats': formats_info,
975a0d0d 2226 'format': '+'.join(filtered('format')),
2227 'format_id': '+'.join(filtered('format_id')),
f8d4ad9a 2228 'ext': output_ext,
975a0d0d 2229 'protocol': '+'.join(map(determine_protocol, formats_info)),
093a1710 2230 'language': '+'.join(orderedSet(filtered('language'))) or None,
2231 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2232 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
975a0d0d 2233 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
f8d4ad9a 2234 }
2235
2236 if the_only_video:
2237 new_dict.update({
2238 'width': the_only_video.get('width'),
2239 'height': the_only_video.get('height'),
2240 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2241 'fps': the_only_video.get('fps'),
49a57e70 2242 'dynamic_range': the_only_video.get('dynamic_range'),
f8d4ad9a 2243 'vcodec': the_only_video.get('vcodec'),
2244 'vbr': the_only_video.get('vbr'),
2245 'stretched_ratio': the_only_video.get('stretched_ratio'),
105bfd90 2246 'aspect_ratio': the_only_video.get('aspect_ratio'),
f8d4ad9a 2247 })
2248
2249 if the_only_audio:
2250 new_dict.update({
2251 'acodec': the_only_audio.get('acodec'),
2252 'abr': the_only_audio.get('abr'),
975a0d0d 2253 'asr': the_only_audio.get('asr'),
b8ed0f15 2254 'audio_channels': the_only_audio.get('audio_channels')
f8d4ad9a 2255 })
2256
2257 return new_dict
2258
e8e73840 2259 def _check_formats(formats):
981052c9 2260 if not check_formats:
2261 yield from formats
b5ac45b1 2262 return
9f1a1c36 2263 yield from self._check_formats(formats)
e8e73840 2264
67134eab 2265 def _build_selector_function(selector):
909d24dd 2266 if isinstance(selector, list): # ,
67134eab
JMF
2267 fs = [_build_selector_function(s) for s in selector]
2268
317f7ab6 2269 def selector_function(ctx):
67134eab 2270 for f in fs:
981052c9 2271 yield from f(ctx)
67134eab 2272 return selector_function
909d24dd 2273
2274 elif selector.type == GROUP: # ()
0130afb7 2275 selector_function = _build_selector_function(selector.selector)
909d24dd 2276
2277 elif selector.type == PICKFIRST: # /
67134eab
JMF
2278 fs = [_build_selector_function(s) for s in selector.selector]
2279
317f7ab6 2280 def selector_function(ctx):
67134eab 2281 for f in fs:
317f7ab6 2282 picked_formats = list(f(ctx))
67134eab
JMF
2283 if picked_formats:
2284 return picked_formats
2285 return []
67134eab 2286
981052c9 2287 elif selector.type == MERGE: # +
2288 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2289
2290 def selector_function(ctx):
adbc4ec4 2291 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
981052c9 2292 yield _merge(pair)
2293
909d24dd 2294 elif selector.type == SINGLE: # atom
598d185d 2295 format_spec = selector.selector or 'best'
909d24dd 2296
f8d4ad9a 2297 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
909d24dd 2298 if format_spec == 'all':
2299 def selector_function(ctx):
9222c381 2300 yield from _check_formats(ctx['formats'][::-1])
f8d4ad9a 2301 elif format_spec == 'mergeall':
2302 def selector_function(ctx):
316f2650 2303 formats = list(_check_formats(
2304 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
e01d6aa4 2305 if not formats:
2306 return
921b76ca 2307 merged_format = formats[-1]
2308 for f in formats[-2::-1]:
f8d4ad9a 2309 merged_format = _merge((merged_format, f))
2310 yield merged_format
909d24dd 2311
2312 else:
85e801a9 2313 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
eff63539 2314 mobj = re.match(
2315 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2316 format_spec)
2317 if mobj is not None:
2318 format_idx = int_or_none(mobj.group('n'), default=1)
e8e73840 2319 format_reverse = mobj.group('bw')[0] == 'b'
eff63539 2320 format_type = (mobj.group('type') or [None])[0]
2321 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2322 format_modified = mobj.group('mod') is not None
909d24dd 2323
2324 format_fallback = not format_type and not format_modified # for b, w
8326b00a 2325 _filter_f = (
eff63539 2326 (lambda f: f.get('%scodec' % format_type) != 'none')
2327 if format_type and format_modified # bv*, ba*, wv*, wa*
2328 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2329 if format_type # bv, ba, wv, wa
2330 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2331 if not format_modified # b, w
8326b00a 2332 else lambda f: True) # b*, w*
2333 filter_f = lambda f: _filter_f(f) and (
2334 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
67134eab 2335 else:
48ee10ee 2336 if format_spec in self._format_selection_exts['audio']:
b11c04a8 2337 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
48ee10ee 2338 elif format_spec in self._format_selection_exts['video']:
b11c04a8 2339 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
85e801a9 2340 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
48ee10ee 2341 elif format_spec in self._format_selection_exts['storyboards']:
b11c04a8 2342 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2343 else:
b5ae35ee 2344 filter_f = lambda f: f.get('format_id') == format_spec # id
909d24dd 2345
2346 def selector_function(ctx):
2347 formats = list(ctx['formats'])
909d24dd 2348 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
85e801a9 2349 if not matches:
2350 if format_fallback and ctx['incomplete_formats']:
2351 # for extractors with incomplete formats (audio only (soundcloud)
2352 # or video only (imgur)) best/worst will fallback to
2353 # best/worst {video,audio}-only format
2354 matches = formats
2355 elif seperate_fallback and not ctx['has_merged_format']:
2356 # for compatibility with youtube-dl when there is no pre-merged format
2357 matches = list(filter(seperate_fallback, formats))
981052c9 2358 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2359 try:
e8e73840 2360 yield matches[format_idx - 1]
4abea8ca 2361 except LazyList.IndexError:
981052c9 2362 return
083c9df9 2363
67134eab 2364 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 2365
317f7ab6 2366 def final_selector(ctx):
adbc4ec4 2367 ctx_copy = dict(ctx)
67134eab 2368 for _filter in filters:
317f7ab6
S
2369 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2370 return selector_function(ctx_copy)
67134eab 2371 return final_selector
083c9df9 2372
0f06bcd7 2373 stream = io.BytesIO(format_spec.encode())
0130afb7 2374 try:
f9934b96 2375 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
0130afb7
JMF
2376 except tokenize.TokenError:
2377 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2378
86e5f3ed 2379 class TokenIterator:
0130afb7
JMF
2380 def __init__(self, tokens):
2381 self.tokens = tokens
2382 self.counter = 0
2383
2384 def __iter__(self):
2385 return self
2386
2387 def __next__(self):
2388 if self.counter >= len(self.tokens):
2389 raise StopIteration()
2390 value = self.tokens[self.counter]
2391 self.counter += 1
2392 return value
2393
2394 next = __next__
2395
2396 def restore_last_token(self):
2397 self.counter -= 1
2398
2399 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 2400 return _build_selector_function(parsed_selector)
a9c58ad9 2401
e5660ee6 2402 def _calc_headers(self, info_dict):
8b7539d2 2403 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
955c8958 2404 if 'Youtubedl-No-Compression' in res: # deprecated
2405 res.pop('Youtubedl-No-Compression', None)
2406 res['Accept-Encoding'] = 'identity'
b87e01c1 2407 cookies = self.cookiejar.get_cookie_header(info_dict['url'])
e5660ee6
JMF
2408 if cookies:
2409 res['Cookie'] = cookies
2410
0016b84e
S
2411 if 'X-Forwarded-For' not in res:
2412 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2413 if x_forwarded_for_ip:
2414 res['X-Forwarded-For'] = x_forwarded_for_ip
2415
e5660ee6
JMF
2416 return res
2417
c487cf00 2418 def _calc_cookies(self, url):
b87e01c1 2419 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2420 return self.cookiejar.get_cookie_header(url)
e5660ee6 2421
9f1a1c36 2422 def _sort_thumbnails(self, thumbnails):
2423 thumbnails.sort(key=lambda t: (
2424 t.get('preference') if t.get('preference') is not None else -1,
2425 t.get('width') if t.get('width') is not None else -1,
2426 t.get('height') if t.get('height') is not None else -1,
2427 t.get('id') if t.get('id') is not None else '',
2428 t.get('url')))
2429
b0249bca 2430 def _sanitize_thumbnails(self, info_dict):
bc516a3f 2431 thumbnails = info_dict.get('thumbnails')
2432 if thumbnails is None:
2433 thumbnail = info_dict.get('thumbnail')
2434 if thumbnail:
2435 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
9f1a1c36 2436 if not thumbnails:
2437 return
2438
2439 def check_thumbnails(thumbnails):
2440 for t in thumbnails:
2441 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2442 try:
2443 self.urlopen(HEADRequest(t['url']))
2444 except network_exceptions as err:
2445 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2446 continue
2447 yield t
2448
2449 self._sort_thumbnails(thumbnails)
2450 for i, t in enumerate(thumbnails):
2451 if t.get('id') is None:
2452 t['id'] = '%d' % i
2453 if t.get('width') and t.get('height'):
2454 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2455 t['url'] = sanitize_url(t['url'])
2456
2457 if self.params.get('check_formats') is True:
282f5709 2458 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
9f1a1c36 2459 else:
2460 info_dict['thumbnails'] = thumbnails
bc516a3f 2461
94dc8604 2462 def _fill_common_fields(self, info_dict, final=True):
03f83004 2463 # TODO: move sanitization here
94dc8604 2464 if final:
7aefd19a 2465 title = info_dict['fulltitle'] = info_dict.get('title')
d4736fdb 2466 if not title:
2467 if title == '':
2468 self.write_debug('Extractor gave empty title. Creating a generic title')
2469 else:
2470 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
1d485a1a 2471 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
03f83004
LNO
2472
2473 if info_dict.get('duration') is not None:
2474 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2475
2476 for ts_key, date_key in (
2477 ('timestamp', 'upload_date'),
2478 ('release_timestamp', 'release_date'),
2479 ('modified_timestamp', 'modified_date'),
2480 ):
2481 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2482 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2483 # see http://bugs.python.org/issue1646728)
19a03940 2484 with contextlib.suppress(ValueError, OverflowError, OSError):
03f83004
LNO
2485 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2486 info_dict[date_key] = upload_date.strftime('%Y%m%d')
03f83004
LNO
2487
2488 live_keys = ('is_live', 'was_live')
2489 live_status = info_dict.get('live_status')
2490 if live_status is None:
2491 for key in live_keys:
2492 if info_dict.get(key) is False:
2493 continue
2494 if info_dict.get(key):
2495 live_status = key
2496 break
2497 if all(info_dict.get(key) is False for key in live_keys):
2498 live_status = 'not_live'
2499 if live_status:
2500 info_dict['live_status'] = live_status
2501 for key in live_keys:
2502 if info_dict.get(key) is None:
2503 info_dict[key] = (live_status == key)
a057779d 2504 if live_status == 'post_live':
2505 info_dict['was_live'] = True
03f83004
LNO
2506
2507 # Auto generate title fields corresponding to the *_number fields when missing
2508 # in order to always have clean titles. This is very common for TV series.
2509 for field in ('chapter', 'season', 'episode'):
94dc8604 2510 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
03f83004
LNO
2511 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2512
415f8d51 2513 def _raise_pending_errors(self, info):
2514 err = info.pop('__pending_error', None)
2515 if err:
2516 self.report_error(err, tb=False)
2517
784320c9 2518 def sort_formats(self, info_dict):
2519 formats = self._get_formats(info_dict)
784320c9 2520 formats.sort(key=FormatSorter(
c154302c 2521 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
784320c9 2522
dd82ffea
JMF
2523 def process_video_result(self, info_dict, download=True):
2524 assert info_dict.get('_type', 'video') == 'video'
9c906919 2525 self._num_videos += 1
dd82ffea 2526
bec1fad2 2527 if 'id' not in info_dict:
fc08bdd6 2528 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2529 elif not info_dict.get('id'):
2530 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
455a15e2 2531
c9969434
S
2532 def report_force_conversion(field, field_not, conversion):
2533 self.report_warning(
2534 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2535 % (field, field_not, conversion))
2536
2537 def sanitize_string_field(info, string_field):
2538 field = info.get(string_field)
14f25df2 2539 if field is None or isinstance(field, str):
c9969434
S
2540 return
2541 report_force_conversion(string_field, 'a string', 'string')
14f25df2 2542 info[string_field] = str(field)
c9969434
S
2543
2544 def sanitize_numeric_fields(info):
2545 for numeric_field in self._NUMERIC_FIELDS:
2546 field = info.get(numeric_field)
f9934b96 2547 if field is None or isinstance(field, (int, float)):
c9969434
S
2548 continue
2549 report_force_conversion(numeric_field, 'numeric', 'int')
2550 info[numeric_field] = int_or_none(field)
2551
2552 sanitize_string_field(info_dict, 'id')
2553 sanitize_numeric_fields(info_dict)
3975b4d2 2554 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2555 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
4c3f8c3f 2556 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
50e93e03 2557 self.report_warning('"duration" field is negative, there is an error in extractor')
be6217b2 2558
9eef7c4e 2559 chapters = info_dict.get('chapters') or []
a3976e07 2560 if chapters and chapters[0].get('start_time'):
2561 chapters.insert(0, {'start_time': 0})
2562
9eef7c4e 2563 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
a3976e07 2564 for idx, (prev, current, next_) in enumerate(zip(
2565 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
9eef7c4e 2566 if current.get('start_time') is None:
2567 current['start_time'] = prev.get('end_time')
2568 if not current.get('end_time'):
2569 current['end_time'] = next_.get('start_time')
a3976e07 2570 if not current.get('title'):
2571 current['title'] = f'<Untitled Chapter {idx}>'
9eef7c4e 2572
dd82ffea
JMF
2573 if 'playlist' not in info_dict:
2574 # It isn't part of a playlist
2575 info_dict['playlist'] = None
2576 info_dict['playlist_index'] = None
2577
bc516a3f 2578 self._sanitize_thumbnails(info_dict)
d5519808 2579
536a55da 2580 thumbnail = info_dict.get('thumbnail')
bc516a3f 2581 thumbnails = info_dict.get('thumbnails')
536a55da
S
2582 if thumbnail:
2583 info_dict['thumbnail'] = sanitize_url(thumbnail)
2584 elif thumbnails:
d5519808
PH
2585 info_dict['thumbnail'] = thumbnails[-1]['url']
2586
ae30b840 2587 if info_dict.get('display_id') is None and 'id' in info_dict:
0afef30b
PH
2588 info_dict['display_id'] = info_dict['id']
2589
03f83004 2590 self._fill_common_fields(info_dict)
33d2fc2f 2591
05108a49
S
2592 for cc_kind in ('subtitles', 'automatic_captions'):
2593 cc = info_dict.get(cc_kind)
2594 if cc:
2595 for _, subtitle in cc.items():
2596 for subtitle_format in subtitle:
2597 if subtitle_format.get('url'):
2598 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2599 if subtitle_format.get('ext') is None:
2600 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2601
2602 automatic_captions = info_dict.get('automatic_captions')
4bba3716 2603 subtitles = info_dict.get('subtitles')
4bba3716 2604
360e1ca5 2605 info_dict['requested_subtitles'] = self.process_subtitles(
05108a49 2606 info_dict['id'], subtitles, automatic_captions)
a504ced0 2607
aebb4f4b 2608 formats = self._get_formats(info_dict)
dd82ffea 2609
c154302c 2610 # Backward compatibility with InfoExtractor._sort_formats
9ebac355 2611 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
c154302c 2612 if field_preference:
2613 info_dict['_format_sort_fields'] = field_preference
2614
0a5a191a 2615 # or None ensures --clean-infojson removes it
2616 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
88acdbc2 2617 if not self.params.get('allow_unplayable_formats'):
2618 formats = [f for f in formats if not f.get('has_drm')]
17ffed18 2619
2620 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2621 self.report_warning(
2622 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2623 'only images are available for download. Use --list-formats to see them'.capitalize())
88acdbc2 2624
319b6059 2625 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2626 if not get_from_start:
2627 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2628 if info_dict.get('is_live') and formats:
adbc4ec4 2629 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
319b6059 2630 if get_from_start and not formats:
a44ca5a4 2631 self.raise_no_formats(info_dict, msg=(
2632 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2633 'If you want to download from the current time, use --no-live-from-start'))
adbc4ec4 2634
73af5cc8
S
2635 def is_wellformed(f):
2636 url = f.get('url')
a5ac0c47 2637 if not url:
73af5cc8
S
2638 self.report_warning(
2639 '"url" field is missing or empty - skipping format, '
2640 'there is an error in extractor')
a5ac0c47
S
2641 return False
2642 if isinstance(url, bytes):
2643 sanitize_string_field(f, 'url')
2644 return True
73af5cc8
S
2645
2646 # Filter out malformed formats for better extraction robustness
1ac7f461 2647 formats = list(filter(is_wellformed, formats or []))
2648
2649 if not formats:
2650 self.raise_no_formats(info_dict)
73af5cc8 2651
39f32f17 2652 for format in formats:
c9969434
S
2653 sanitize_string_field(format, 'format_id')
2654 sanitize_numeric_fields(format)
dcf77cf1 2655 format['url'] = sanitize_url(format['url'])
39f32f17 2656 if format.get('ext') is None:
2657 format['ext'] = determine_ext(format['url']).lower()
2658 if format.get('protocol') is None:
2659 format['protocol'] = determine_protocol(format)
2660 if format.get('resolution') is None:
2661 format['resolution'] = self.format_resolution(format, default=None)
2662 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2663 format['dynamic_range'] = 'SDR'
2664 if format.get('aspect_ratio') is None:
2665 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2666 if (info_dict.get('duration') and format.get('tbr')
2667 and not format.get('filesize') and not format.get('filesize_approx')):
2668 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2669 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2670
2671 # This is copied to http_headers by the above _calc_headers and can now be removed
2672 if '__x_forwarded_for_ip' in info_dict:
2673 del info_dict['__x_forwarded_for_ip']
2674
c154302c 2675 self.sort_formats({
2676 'formats': formats,
2677 '_format_sort_fields': info_dict.get('_format_sort_fields')
2678 })
39f32f17 2679
2680 # Sanitize and group by format_id
2681 formats_dict = {}
2682 for i, format in enumerate(formats):
e74e3b63 2683 if not format.get('format_id'):
14f25df2 2684 format['format_id'] = str(i)
e2effb08
S
2685 else:
2686 # Sanitize format_id from characters used in format selector expression
ec85ded8 2687 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
39f32f17 2688 formats_dict.setdefault(format['format_id'], []).append(format)
181c7053
S
2689
2690 # Make sure all formats have unique format_id
03b4de72 2691 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
181c7053 2692 for format_id, ambiguous_formats in formats_dict.items():
48ee10ee 2693 ambigious_id = len(ambiguous_formats) > 1
2694 for i, format in enumerate(ambiguous_formats):
2695 if ambigious_id:
181c7053 2696 format['format_id'] = '%s-%d' % (format_id, i)
48ee10ee 2697 # Ensure there is no conflict between id and ext in format selection
2698 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2699 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2700 format['format_id'] = 'f%s' % format['format_id']
181c7053 2701
39f32f17 2702 if format.get('format') is None:
2703 format['format'] = '{id} - {res}{note}'.format(
2704 id=format['format_id'],
2705 res=self.format_resolution(format),
2706 note=format_field(format, 'format_note', ' (%s)'),
2707 )
dd82ffea 2708
9f1a1c36 2709 if self.params.get('check_formats') is True:
282f5709 2710 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
9f1a1c36 2711
88acdbc2 2712 if not formats or formats[0] is not info_dict:
b3d9ef88
JMF
2713 # only set the 'formats' fields if the original info_dict list them
2714 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 2715 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 2716 # which can't be exported to json
b3d9ef88 2717 info_dict['formats'] = formats
4ec82a72 2718
2719 info_dict, _ = self.pre_process(info_dict)
2720
6db9c4d5 2721 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
09b49e1f 2722 return info_dict
2723
2724 self.post_extract(info_dict)
2725 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2726
093a1710 2727 # The pre-processors may have modified the formats
aebb4f4b 2728 formats = self._get_formats(info_dict)
093a1710 2729
e4221b70 2730 list_only = self.params.get('simulate') == 'list_only'
fa9f30b8 2731 interactive_format_selection = not list_only and self.format_selector == '-'
b7b04c78 2732 if self.params.get('list_thumbnails'):
2733 self.list_thumbnails(info_dict)
b7b04c78 2734 if self.params.get('listsubtitles'):
2735 if 'automatic_captions' in info_dict:
2736 self.list_subtitles(
2737 info_dict['id'], automatic_captions, 'automatic captions')
2738 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
fa9f30b8 2739 if self.params.get('listformats') or interactive_format_selection:
b69fd25c 2740 self.list_formats(info_dict)
169dbde9 2741 if list_only:
b7b04c78 2742 # Without this printing, -F --print-json will not work
17060584 2743 self.__forced_printings(info_dict)
c487cf00 2744 return info_dict
bfaae0a7 2745
187986a8 2746 format_selector = self.format_selector
fa9f30b8 2747 while True:
2748 if interactive_format_selection:
372a0f3b
IS
2749 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2750 + '(Press ENTER for default, or Ctrl+C to quit)'
2751 + self._format_screen(': ', self.Styles.EMPHASIS))
fa9f30b8 2752 try:
372a0f3b 2753 format_selector = self.build_format_selector(req_format) if req_format else None
fa9f30b8 2754 except SyntaxError as err:
2755 self.report_error(err, tb=False, is_error=False)
2756 continue
2757
372a0f3b
IS
2758 if format_selector is None:
2759 req_format = self._default_format_spec(info_dict, download=download)
2760 self.write_debug(f'Default format spec: {req_format}')
2761 format_selector = self.build_format_selector(req_format)
2762
85e801a9 2763 formats_to_download = list(format_selector({
fa9f30b8 2764 'formats': formats,
85e801a9 2765 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2766 'incomplete_formats': (
2767 # All formats are video-only or
2768 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2769 # all formats are audio-only
2770 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2771 }))
fa9f30b8 2772 if interactive_format_selection and not formats_to_download:
2773 self.report_error('Requested format is not available', tb=False, is_error=False)
2774 continue
2775 break
317f7ab6 2776
dd82ffea 2777 if not formats_to_download:
b7da73eb 2778 if not self.params.get('ignore_no_formats_error'):
c0b6e5c7 2779 raise ExtractorError(
2780 'Requested format is not available. Use --list-formats for a list of available formats',
2781 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
b62fa6d7 2782 self.report_warning('Requested format is not available')
2783 # Process what we can, even without any available formats.
2784 formats_to_download = [{}]
a13e6848 2785
0500ee3d 2786 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
5ec1b6b7 2787 best_format, downloaded_formats = formats_to_download[-1], []
b62fa6d7 2788 if download:
0500ee3d 2789 if best_format and requested_ranges:
5ec1b6b7 2790 def to_screen(*msg):
2791 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2792
2793 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2794 (f['format_id'] for f in formats_to_download))
0500ee3d 2795 if requested_ranges != ({}, ):
5ec1b6b7 2796 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
fc2ba496 2797 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
a13e6848 2798 max_downloads_reached = False
5ec1b6b7 2799
0500ee3d 2800 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
5ec1b6b7 2801 new_info = self._copy_infodict(info_dict)
b7da73eb 2802 new_info.update(fmt)
3975b4d2 2803 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
fc2ba496 2804 end_time = offset + min(chapter.get('end_time', duration), duration)
3975b4d2 2805 if chapter or offset:
5ec1b6b7 2806 new_info.update({
3975b4d2 2807 'section_start': offset + chapter.get('start_time', 0),
2576d53a 2808 # duration may not be accurate. So allow deviations <1sec
2809 'section_end': end_time if end_time <= offset + duration + 1 else None,
5ec1b6b7 2810 'section_title': chapter.get('title'),
2811 'section_number': chapter.get('index'),
2812 })
2813 downloaded_formats.append(new_info)
a13e6848 2814 try:
2815 self.process_info(new_info)
2816 except MaxDownloadsReached:
2817 max_downloads_reached = True
415f8d51 2818 self._raise_pending_errors(new_info)
f46e2f9d 2819 # Remove copied info
2820 for key, val in tuple(new_info.items()):
2821 if info_dict.get(key) == val:
2822 new_info.pop(key)
a13e6848 2823 if max_downloads_reached:
2824 break
ebed8b37 2825
5ec1b6b7 2826 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
a13e6848 2827 assert write_archive.issubset({True, False, 'ignore'})
2828 if True in write_archive and False not in write_archive:
2829 self.record_download_archive(info_dict)
be72c624 2830
5ec1b6b7 2831 info_dict['requested_downloads'] = downloaded_formats
ed5835b4 2832 info_dict = self.run_all_pps('after_video', info_dict)
a13e6848 2833 if max_downloads_reached:
2834 raise MaxDownloadsReached()
ebed8b37 2835
49a57e70 2836 # We update the info dict with the selected best quality format (backwards compatibility)
be72c624 2837 info_dict.update(best_format)
dd82ffea
JMF
2838 return info_dict
2839
98c70d6f 2840 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 2841 """Select the requested subtitles and their format"""
d8a58ddc 2842 available_subs, normal_sub_langs = {}, []
98c70d6f
JMF
2843 if normal_subtitles and self.params.get('writesubtitles'):
2844 available_subs.update(normal_subtitles)
d8a58ddc 2845 normal_sub_langs = tuple(normal_subtitles.keys())
98c70d6f
JMF
2846 if automatic_captions and self.params.get('writeautomaticsub'):
2847 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
2848 if lang not in available_subs:
2849 available_subs[lang] = cap_info
2850
d2c8aadf 2851 if not available_subs or (
2852 not self.params.get('writesubtitles')
2853 and not self.params.get('writeautomaticsub')):
4d171848 2854 return None
a504ced0 2855
d8a58ddc 2856 all_sub_langs = tuple(available_subs.keys())
a504ced0 2857 if self.params.get('allsubtitles', False):
c32b0aab 2858 requested_langs = all_sub_langs
2859 elif self.params.get('subtitleslangs', False):
5314b521 2860 try:
2861 requested_langs = orderedSet_from_options(
2862 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2863 except re.error as e:
2864 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
a504ced0 2865 else:
376aa24b
SS
2866 requested_langs = LazyList(itertools.chain(
2867 ['en'] if 'en' in normal_sub_langs else [],
2868 filter(lambda f: f.startswith('en'), normal_sub_langs),
2869 ['en'] if 'en' in all_sub_langs else [],
2870 filter(lambda f: f.startswith('en'), all_sub_langs),
2871 normal_sub_langs, all_sub_langs,
2872 ))[:1]
ad3dc496 2873 if requested_langs:
d2c8aadf 2874 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
a504ced0
JMF
2875
2876 formats_query = self.params.get('subtitlesformat', 'best')
2877 formats_preference = formats_query.split('/') if formats_query else []
2878 subs = {}
2879 for lang in requested_langs:
2880 formats = available_subs.get(lang)
2881 if formats is None:
86e5f3ed 2882 self.report_warning(f'{lang} subtitles not available for {video_id}')
a504ced0 2883 continue
a504ced0
JMF
2884 for ext in formats_preference:
2885 if ext == 'best':
2886 f = formats[-1]
2887 break
2888 matches = list(filter(lambda f: f['ext'] == ext, formats))
2889 if matches:
2890 f = matches[-1]
2891 break
2892 else:
2893 f = formats[-1]
2894 self.report_warning(
2895 'No subtitle format found matching "%s" for language %s, '
2896 'using %s' % (formats_query, lang, f['ext']))
2897 subs[lang] = f
2898 return subs
2899
bb66c247 2900 def _forceprint(self, key, info_dict):
2901 if info_dict is None:
2902 return
2903 info_copy = info_dict.copy()
17060584 2904 info_copy.setdefault('filename', self.prepare_filename(info_dict))
2905 if info_dict.get('requested_formats') is not None:
2906 # For RTMP URLs, also include the playpath
2907 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2908 elif info_dict.get('url'):
2909 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
bb66c247 2910 info_copy['formats_table'] = self.render_formats_table(info_dict)
2911 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2912 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2913 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2914
2915 def format_tmpl(tmpl):
48c8424b 2916 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
07a1250e 2917 if not mobj:
2918 return tmpl
48c8424b 2919
2920 fmt = '%({})s'
2921 if tmpl.startswith('{'):
6f2287cb 2922 tmpl, fmt = f'.{tmpl}', '%({})j'
48c8424b 2923 if tmpl.endswith('='):
2924 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2925 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
8130779d 2926
bb66c247 2927 for tmpl in self.params['forceprint'].get(key, []):
2928 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2929
2930 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
5127e92a 2931 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
bb66c247 2932 tmpl = format_tmpl(tmpl)
2933 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
8d93e69d 2934 if self._ensure_dir_exists(filename):
9874e82b 2935 with open(filename, 'a', encoding='utf-8', newline='') as f:
2936 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
ca30f449 2937
17060584 2938 return info_copy
2939
2940 def __forced_printings(self, info_dict, filename=None, incomplete=True):
bb66c247 2941 if (self.params.get('forcejson')
2942 or self.params['forceprint'].get('video')
2943 or self.params['print_to_file'].get('video')):
2b8a2973 2944 self.post_extract(info_dict)
17060584 2945 if filename:
2946 info_dict['filename'] = filename
b5f61b69 2947 info_copy = self._forceprint('video', info_dict)
2948
2949 def print_field(field, actual_field=None, optional=False):
2950 if actual_field is None:
2951 actual_field = field
2952 if self.params.get(f'force{field}') and (
2953 info_copy.get(field) is not None or (not optional and not incomplete)):
2954 self.to_stdout(info_copy[actual_field])
2955
2956 print_field('title')
2957 print_field('id')
2958 print_field('url', 'urls')
2959 print_field('thumbnail', optional=True)
2960 print_field('description', optional=True)
6f2287cb 2961 if filename:
2962 print_field('filename')
b5f61b69 2963 if self.params.get('forceduration') and info_copy.get('duration') is not None:
2964 self.to_stdout(formatSeconds(info_copy['duration']))
2965 print_field('format')
53c18592 2966
2b8a2973 2967 if self.params.get('forcejson'):
6e84b215 2968 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
d06daf23 2969
e8e73840 2970 def dl(self, name, info, subtitle=False, test=False):
88acdbc2 2971 if not info.get('url'):
1151c407 2972 self.raise_no_formats(info, True)
e8e73840 2973
2974 if test:
2975 verbose = self.params.get('verbose')
2976 params = {
2977 'test': True,
a169858f 2978 'quiet': self.params.get('quiet') or not verbose,
e8e73840 2979 'verbose': verbose,
2980 'noprogress': not verbose,
2981 'nopart': True,
2982 'skip_unavailable_fragments': False,
2983 'keep_fragments': False,
2984 'overwrites': True,
2985 '_no_ytdl_file': True,
2986 }
2987 else:
2988 params = self.params
96fccc10 2989 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
e8e73840 2990 if not test:
2991 for ph in self._progress_hooks:
2992 fd.add_progress_hook(ph)
42676437
M
2993 urls = '", "'.join(
2994 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2995 for f in info.get('requested_formats', []) or [info])
3a408f9d 2996 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
03b4de72 2997
adbc4ec4
THD
2998 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2999 # But it may contain objects that are not deep-copyable
3000 new_info = self._copy_infodict(info)
e8e73840 3001 if new_info.get('http_headers') is None:
3002 new_info['http_headers'] = self._calc_headers(new_info)
3003 return fd.download(name, new_info, subtitle)
3004
e04938ab 3005 def existing_file(self, filepaths, *, default_overwrite=True):
3006 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3007 if existing_files and not self.params.get('overwrites', default_overwrite):
3008 return existing_files[0]
3009
3010 for file in existing_files:
3011 self.report_file_delete(file)
3012 os.remove(file)
3013 return None
3014
8222d8de 3015 def process_info(self, info_dict):
09b49e1f 3016 """Process a single resolved IE result. (Modifies it in-place)"""
8222d8de
JMF
3017
3018 assert info_dict.get('_type', 'video') == 'video'
f46e2f9d 3019 original_infodict = info_dict
fd288278 3020
4513a41a 3021 if 'format' not in info_dict and 'ext' in info_dict:
8222d8de
JMF
3022 info_dict['format'] = info_dict['ext']
3023
c77495e3 3024 if self._match_entry(info_dict) is not None:
9e907ebd 3025 info_dict['__write_download_archive'] = 'ignore'
8222d8de
JMF
3026 return
3027
09b49e1f 3028 # Does nothing under normal operation - for backward compatibility of process_info
277d6ff5 3029 self.post_extract(info_dict)
119e40ef 3030
3031 def replace_info_dict(new_info):
3032 nonlocal info_dict
3033 if new_info == info_dict:
3034 return
3035 info_dict.clear()
3036 info_dict.update(new_info)
3037
3038 new_info, _ = self.pre_process(info_dict, 'video')
3039 replace_info_dict(new_info)
0c14d66a 3040 self._num_downloads += 1
8222d8de 3041
dcf64d43 3042 # info_dict['_filename'] needs to be set for backward compatibility
de6000d9 3043 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3044 temp_filename = self.prepare_filename(info_dict, 'temp')
0202b52a 3045 files_to_move = {}
8222d8de
JMF
3046
3047 # Forced printings
4513a41a 3048 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
8222d8de 3049
ca6d59d2 3050 def check_max_downloads():
3051 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3052 raise MaxDownloadsReached()
3053
b7b04c78 3054 if self.params.get('simulate'):
9e907ebd 3055 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
ca6d59d2 3056 check_max_downloads()
8222d8de
JMF
3057 return
3058
de6000d9 3059 if full_filename is None:
8222d8de 3060 return
e92caff5 3061 if not self._ensure_dir_exists(encodeFilename(full_filename)):
0202b52a 3062 return
e92caff5 3063 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
8222d8de
JMF
3064 return
3065
80c03fa9 3066 if self._write_description('video', info_dict,
3067 self.prepare_filename(info_dict, 'description')) is None:
3068 return
3069
3070 sub_files = self._write_subtitles(info_dict, temp_filename)
3071 if sub_files is None:
3072 return
3073 files_to_move.update(dict(sub_files))
3074
3075 thumb_files = self._write_thumbnails(
3076 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3077 if thumb_files is None:
3078 return
3079 files_to_move.update(dict(thumb_files))
8222d8de 3080
80c03fa9 3081 infofn = self.prepare_filename(info_dict, 'infojson')
3082 _infojson_written = self._write_info_json('video', info_dict, infofn)
3083 if _infojson_written:
dac5df5a 3084 info_dict['infojson_filename'] = infofn
e75bb0d6 3085 # For backward compatibility, even though it was a private field
80c03fa9 3086 info_dict['__infojson_filename'] = infofn
3087 elif _infojson_written is None:
3088 return
3089
3090 # Note: Annotations are deprecated
3091 annofn = None
1fb07d10 3092 if self.params.get('writeannotations', False):
de6000d9 3093 annofn = self.prepare_filename(info_dict, 'annotation')
80c03fa9 3094 if annofn:
e92caff5 3095 if not self._ensure_dir_exists(encodeFilename(annofn)):
0202b52a 3096 return
0c3d0f51 3097 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
6febd1c1 3098 self.to_screen('[info] Video annotations are already present')
ffddb112
RA
3099 elif not info_dict.get('annotations'):
3100 self.report_warning('There are no annotations to write.')
7b6fefc9
PH
3101 else:
3102 try:
6febd1c1 3103 self.to_screen('[info] Writing video annotations to: ' + annofn)
86e5f3ed 3104 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
7b6fefc9
PH
3105 annofile.write(info_dict['annotations'])
3106 except (KeyError, TypeError):
6febd1c1 3107 self.report_warning('There are no annotations to write.')
86e5f3ed 3108 except OSError:
6febd1c1 3109 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 3110 return
1fb07d10 3111
732044af 3112 # Write internet shortcut files
08438d2c 3113 def _write_link_file(link_type):
60f3e995 3114 url = try_get(info_dict['webpage_url'], iri_to_uri)
3115 if not url:
3116 self.report_warning(
3117 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3118 return True
08438d2c 3119 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
0e6b018a
Z
3120 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3121 return False
10e3742e 3122 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
08438d2c 3123 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3124 return True
3125 try:
3126 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
86e5f3ed 3127 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3128 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
60f3e995 3129 template_vars = {'url': url}
08438d2c 3130 if link_type == 'desktop':
3131 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3132 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
86e5f3ed 3133 except OSError:
08438d2c 3134 self.report_error(f'Cannot write internet shortcut {linkfn}')
3135 return False
732044af 3136 return True
3137
08438d2c 3138 write_links = {
3139 'url': self.params.get('writeurllink'),
3140 'webloc': self.params.get('writewebloclink'),
3141 'desktop': self.params.get('writedesktoplink'),
3142 }
3143 if self.params.get('writelink'):
3144 link_type = ('webloc' if sys.platform == 'darwin'
3145 else 'desktop' if sys.platform.startswith('linux')
3146 else 'url')
3147 write_links[link_type] = True
3148
3149 if any(should_write and not _write_link_file(link_type)
3150 for link_type, should_write in write_links.items()):
3151 return
732044af 3152
415f8d51 3153 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3154 replace_info_dict(new_info)
56d868db 3155
a13e6848 3156 if self.params.get('skip_download'):
56d868db 3157 info_dict['filepath'] = temp_filename
3158 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3159 info_dict['__files_to_move'] = files_to_move
f46e2f9d 3160 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
9e907ebd 3161 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
56d868db 3162 else:
3163 # Download
b868936c 3164 info_dict.setdefault('__postprocessors', [])
4340deca 3165 try:
0202b52a 3166
e04938ab 3167 def existing_video_file(*filepaths):
6b591b29 3168 ext = info_dict.get('ext')
e04938ab 3169 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3170 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3171 default_overwrite=False)
3172 if file:
3173 info_dict['ext'] = os.path.splitext(file)[1][1:]
3174 return file
0202b52a 3175
7b2c3f47 3176 fd, success = None, True
fccf90e7 3177 if info_dict.get('protocol') or info_dict.get('url'):
56ba69e4 3178 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
71df9b7f 3179 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
56ba69e4 3180 info_dict.get('section_start') or info_dict.get('section_end')):
7b2c3f47 3181 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
56ba69e4 3182 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3183 self.report_error(f'{msg}. Aborting')
5ec1b6b7 3184 return
5ec1b6b7 3185
4340deca 3186 if info_dict.get('requested_formats') is not None:
81cd954a 3187 requested_formats = info_dict['requested_formats']
0202b52a 3188 old_ext = info_dict['ext']
4e3b637d 3189 if self.params.get('merge_output_format') is None:
4e3b637d 3190 if (info_dict['ext'] == 'webm'
3191 and info_dict.get('thumbnails')
3192 # check with type instead of pp_key, __name__, or isinstance
3193 # since we dont want any custom PPs to trigger this
c487cf00 3194 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
4e3b637d 3195 info_dict['ext'] = 'mkv'
3196 self.report_warning(
3197 'webm doesn\'t support embedding a thumbnail, mkv will be used')
124bc071 3198 new_ext = info_dict['ext']
0202b52a 3199
124bc071 3200 def correct_ext(filename, ext=new_ext):
96fccc10 3201 if filename == '-':
3202 return filename
0202b52a 3203 filename_real_ext = os.path.splitext(filename)[1][1:]
3204 filename_wo_ext = (
3205 os.path.splitext(filename)[0]
124bc071 3206 if filename_real_ext in (old_ext, new_ext)
0202b52a 3207 else filename)
86e5f3ed 3208 return f'{filename_wo_ext}.{ext}'
0202b52a 3209
38c6902b 3210 # Ensure filename always has a correct extension for successful merge
0202b52a 3211 full_filename = correct_ext(full_filename)
3212 temp_filename = correct_ext(temp_filename)
e04938ab 3213 dl_filename = existing_video_file(full_filename, temp_filename)
1ea24129 3214 info_dict['__real_download'] = False
18e674b4 3215
7b2c3f47 3216 merger = FFmpegMergerPP(self)
adbc4ec4 3217 downloaded = []
dbf5416a 3218 if dl_filename is not None:
6c7274ec 3219 self.report_file_already_downloaded(dl_filename)
adbc4ec4
THD
3220 elif fd:
3221 for f in requested_formats if fd != FFmpegFD else []:
3222 f['filepath'] = fname = prepend_extension(
3223 correct_ext(temp_filename, info_dict['ext']),
3224 'f%s' % f['format_id'], info_dict['ext'])
3225 downloaded.append(fname)
dbf5416a 3226 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3227 success, real_download = self.dl(temp_filename, info_dict)
3228 info_dict['__real_download'] = real_download
18e674b4 3229 else:
18e674b4 3230 if self.params.get('allow_unplayable_formats'):
3231 self.report_warning(
3232 'You have requested merging of multiple formats '
3233 'while also allowing unplayable formats to be downloaded. '
3234 'The formats won\'t be merged to prevent data corruption.')
3235 elif not merger.available:
e8969bda 3236 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3237 if not self.params.get('ignoreerrors'):
3238 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3239 return
3240 self.report_warning(f'{msg}. The formats won\'t be merged')
18e674b4 3241
96fccc10 3242 if temp_filename == '-':
adbc4ec4 3243 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
96fccc10 3244 else 'but the formats are incompatible for simultaneous download' if merger.available
3245 else 'but ffmpeg is not installed')
3246 self.report_warning(
3247 f'You have requested downloading multiple formats to stdout {reason}. '
3248 'The formats will be streamed one after the other')
3249 fname = temp_filename
dbf5416a 3250 for f in requested_formats:
3251 new_info = dict(info_dict)
3252 del new_info['requested_formats']
3253 new_info.update(f)
96fccc10 3254 if temp_filename != '-':
124bc071 3255 fname = prepend_extension(
3256 correct_ext(temp_filename, new_info['ext']),
3257 'f%s' % f['format_id'], new_info['ext'])
96fccc10 3258 if not self._ensure_dir_exists(fname):
3259 return
a21e0ab1 3260 f['filepath'] = fname
96fccc10 3261 downloaded.append(fname)
dbf5416a 3262 partial_success, real_download = self.dl(fname, new_info)
3263 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3264 success = success and partial_success
adbc4ec4
THD
3265
3266 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3267 info_dict['__postprocessors'].append(merger)
3268 info_dict['__files_to_merge'] = downloaded
3269 # Even if there were no downloads, it is being merged only now
3270 info_dict['__real_download'] = True
3271 else:
3272 for file in downloaded:
3273 files_to_move[file] = None
4340deca
P
3274 else:
3275 # Just a single file
e04938ab 3276 dl_filename = existing_video_file(full_filename, temp_filename)
6c7274ec 3277 if dl_filename is None or dl_filename == temp_filename:
3278 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3279 # So we should try to resume the download
e8e73840 3280 success, real_download = self.dl(temp_filename, info_dict)
0202b52a 3281 info_dict['__real_download'] = real_download
6c7274ec 3282 else:
3283 self.report_file_already_downloaded(dl_filename)
0202b52a 3284
0202b52a 3285 dl_filename = dl_filename or temp_filename
c571435f 3286 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
0202b52a 3287
3158150c 3288 except network_exceptions as err:
7960b056 3289 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
4340deca 3290 return
86e5f3ed 3291 except OSError as err:
4340deca
P
3292 raise UnavailableVideoError(err)
3293 except (ContentTooShortError, ) as err:
86e5f3ed 3294 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
4340deca 3295 return
8222d8de 3296
415f8d51 3297 self._raise_pending_errors(info_dict)
de6000d9 3298 if success and full_filename != '-':
f17f8651 3299
fd7cfb64 3300 def fixup():
3301 do_fixup = True
3302 fixup_policy = self.params.get('fixup')
3303 vid = info_dict['id']
3304
3305 if fixup_policy in ('ignore', 'never'):
3306 return
3307 elif fixup_policy == 'warn':
3fe75fdc 3308 do_fixup = 'warn'
f89b3e2d 3309 elif fixup_policy != 'force':
3310 assert fixup_policy in ('detect_or_warn', None)
3311 if not info_dict.get('__real_download'):
3312 do_fixup = False
fd7cfb64 3313
3314 def ffmpeg_fixup(cndn, msg, cls):
3fe75fdc 3315 if not (do_fixup and cndn):
fd7cfb64 3316 return
3fe75fdc 3317 elif do_fixup == 'warn':
fd7cfb64 3318 self.report_warning(f'{vid}: {msg}')
3319 return
3320 pp = cls(self)
3321 if pp.available:
3322 info_dict['__postprocessors'].append(pp)
3323 else:
3324 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3325
3326 stretched_ratio = info_dict.get('stretched_ratio')
ca9def71
LNO
3327 ffmpeg_fixup(stretched_ratio not in (1, None),
3328 f'Non-uniform pixel ratio {stretched_ratio}',
3329 FFmpegFixupStretchedPP)
fd7cfb64 3330
993191c0 3331 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
24146491 3332 downloader = downloader.FD_NAME if downloader else None
adbc4ec4 3333
ca9def71
LNO
3334 ext = info_dict.get('ext')
3335 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3336 isinstance(pp, FFmpegVideoConvertorPP)
3337 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3338 ) for pp in self._pps['post_process'])
3339
3340 if not postprocessed_by_ffmpeg:
3341 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
f2df4071 3342 'writing DASH m4a. Only some players support this container',
3343 FFmpegFixupM4aPP)
24146491 3344 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
494f5230 3345 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
adbc4ec4
THD
3346 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3347 FFmpegFixupM3u8PP)
26010b5c 3348 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
adbc4ec4
THD
3349 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3350
24146491 3351 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3352 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
fd7cfb64 3353
3354 fixup()
8222d8de 3355 try:
f46e2f9d 3356 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
af819c21 3357 except PostProcessingError as err:
3358 self.report_error('Postprocessing: %s' % str(err))
8222d8de 3359 return
ab8e5e51
AM
3360 try:
3361 for ph in self._post_hooks:
23c1a667 3362 ph(info_dict['filepath'])
ab8e5e51
AM
3363 except Exception as err:
3364 self.report_error('post hooks: %s' % str(err))
3365 return
9e907ebd 3366 info_dict['__write_download_archive'] = True
2d30509f 3367
c487cf00 3368 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
a13e6848 3369 if self.params.get('force_write_download_archive'):
9e907ebd 3370 info_dict['__write_download_archive'] = True
ca6d59d2 3371 check_max_downloads()
8222d8de 3372
aa9369a2 3373 def __download_wrapper(self, func):
3374 @functools.wraps(func)
3375 def wrapper(*args, **kwargs):
3376 try:
3377 res = func(*args, **kwargs)
3378 except UnavailableVideoError as e:
3379 self.report_error(e)
b222c271 3380 except DownloadCancelled as e:
3381 self.to_screen(f'[info] {e}')
3382 if not self.params.get('break_per_url'):
3383 raise
fd404bec 3384 self._num_downloads = 0
aa9369a2 3385 else:
3386 if self.params.get('dump_single_json', False):
3387 self.post_extract(res)
3388 self.to_stdout(json.dumps(self.sanitize_info(res)))
3389 return wrapper
3390
8222d8de
JMF
3391 def download(self, url_list):
3392 """Download a given list of URLs."""
aa9369a2 3393 url_list = variadic(url_list) # Passing a single URL is a common mistake
bf1824b3 3394 outtmpl = self.params['outtmpl']['default']
3089bc74
S
3395 if (len(url_list) > 1
3396 and outtmpl != '-'
3397 and '%' not in outtmpl
3398 and self.params.get('max_downloads') != 1):
acd69589 3399 raise SameFileError(outtmpl)
8222d8de
JMF
3400
3401 for url in url_list:
aa9369a2 3402 self.__download_wrapper(self.extract_info)(
3403 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de
JMF
3404
3405 return self._download_retcode
3406
1dcc4c0c 3407 def download_with_info_file(self, info_filename):
31bd3925
JMF
3408 with contextlib.closing(fileinput.FileInput(
3409 [info_filename], mode='r',
3410 openhook=fileinput.hook_encoded('utf-8'))) as f:
3411 # FileInput doesn't have a read method, we can't call json.load
ab1de9cb 3412 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3413 for info in variadic(json.loads('\n'.join(f)))]
3414 for info in infos:
3415 try:
3416 self.__download_wrapper(self.process_ie_result)(info, download=True)
3417 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3418 if not isinstance(e, EntryNotInPlaylist):
3419 self.to_stderr('\r')
3420 webpage_url = info.get('webpage_url')
3421 if webpage_url is None:
3422 raise
aa9369a2 3423 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
ab1de9cb 3424 self.download([webpage_url])
d4943898 3425 return self._download_retcode
1dcc4c0c 3426
cb202fd2 3427 @staticmethod
8012d892 3428 def sanitize_info(info_dict, remove_private_keys=False):
3429 ''' Sanitize the infodict for converting to json '''
3ad56b42 3430 if info_dict is None:
3431 return info_dict
6e84b215 3432 info_dict.setdefault('epoch', int(time.time()))
6a5a30f9 3433 info_dict.setdefault('_type', 'video')
b5e7a2e6 3434 info_dict.setdefault('_version', {
3435 'version': __version__,
3436 'current_git_head': current_git_head(),
3437 'release_git_head': RELEASE_GIT_HEAD,
3438 'repository': REPOSITORY,
3439 })
09b49e1f 3440
8012d892 3441 if remove_private_keys:
0a5a191a 3442 reject = lambda k, v: v is None or k.startswith('__') or k in {
f46e2f9d 3443 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
6f2287cb 3444 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3445 'playlist_autonumber', '_format_sort_fields',
6e84b215 3446 }
ae8f99e6 3447 else:
09b49e1f 3448 reject = lambda k, v: False
adbc4ec4
THD
3449
3450 def filter_fn(obj):
3451 if isinstance(obj, dict):
3452 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3453 elif isinstance(obj, (list, tuple, set, LazyList)):
3454 return list(map(filter_fn, obj))
3455 elif obj is None or isinstance(obj, (str, int, float, bool)):
3456 return obj
3457 else:
3458 return repr(obj)
3459
5226731e 3460 return filter_fn(info_dict)
cb202fd2 3461
8012d892 3462 @staticmethod
3463 def filter_requested_info(info_dict, actually_filter=True):
3464 ''' Alias of sanitize_info for backward compatibility '''
3465 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3466
43d7f5a5 3467 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3468 for filename in set(filter(None, files_to_delete)):
3469 if msg:
3470 self.to_screen(msg % filename)
3471 try:
3472 os.remove(filename)
3473 except OSError:
3474 self.report_warning(f'Unable to delete file {filename}')
3475 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3476 del info['__files_to_move'][filename]
3477
ed5835b4 3478 @staticmethod
3479 def post_extract(info_dict):
3480 def actual_post_extract(info_dict):
3481 if info_dict.get('_type') in ('playlist', 'multi_video'):
3482 for video_dict in info_dict.get('entries', {}):
3483 actual_post_extract(video_dict or {})
3484 return
3485
09b49e1f 3486 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3487 info_dict.update(post_extractor())
ed5835b4 3488
3489 actual_post_extract(info_dict or {})
3490
dcf64d43 3491 def run_pp(self, pp, infodict):
5bfa4862 3492 files_to_delete = []
dcf64d43 3493 if '__files_to_move' not in infodict:
3494 infodict['__files_to_move'] = {}
b1940459 3495 try:
3496 files_to_delete, infodict = pp.run(infodict)
3497 except PostProcessingError as e:
3498 # Must be True and not 'only_download'
3499 if self.params.get('ignoreerrors') is True:
3500 self.report_error(e)
3501 return infodict
3502 raise
3503
5bfa4862 3504 if not files_to_delete:
dcf64d43 3505 return infodict
5bfa4862 3506 if self.params.get('keepvideo', False):
3507 for f in files_to_delete:
dcf64d43 3508 infodict['__files_to_move'].setdefault(f, '')
5bfa4862 3509 else:
43d7f5a5 3510 self._delete_downloaded_files(
3511 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
dcf64d43 3512 return infodict
5bfa4862 3513
6f2287cb 3514 def run_all_pps(self, key, info, *, additional_pps=None):
17ba4343 3515 if key != 'video':
3516 self._forceprint(key, info)
3517 for pp in (additional_pps or []) + self._pps[key]:
3518 info = self.run_pp(pp, info)
ed5835b4 3519 return info
277d6ff5 3520
56d868db 3521 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
5bfa4862 3522 info = dict(ie_info)
56d868db 3523 info['__files_to_move'] = files_to_move or {}
415f8d51 3524 try:
3525 info = self.run_all_pps(key, info)
3526 except PostProcessingError as err:
3527 msg = f'Preprocessing: {err}'
3528 info.setdefault('__pending_error', msg)
3529 self.report_error(msg, is_error=False)
56d868db 3530 return info, info.pop('__files_to_move', None)
5bfa4862 3531
f46e2f9d 3532 def post_process(self, filename, info, files_to_move=None):
8222d8de 3533 """Run all the postprocessors on the given file."""
8222d8de 3534 info['filepath'] = filename
dcf64d43 3535 info['__files_to_move'] = files_to_move or {}
ed5835b4 3536 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
dcf64d43 3537 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3538 del info['__files_to_move']
ed5835b4 3539 return self.run_all_pps('after_move', info)
c1c9a79c 3540
5db07df6 3541 def _make_archive_id(self, info_dict):
e9fef7ee
S
3542 video_id = info_dict.get('id')
3543 if not video_id:
3544 return
5db07df6
PH
3545 # Future-proof against any change in case
3546 # and backwards compatibility with prior versions
e9fef7ee 3547 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
7012b23c 3548 if extractor is None:
1211bb6d
S
3549 url = str_or_none(info_dict.get('url'))
3550 if not url:
3551 return
e9fef7ee 3552 # Try to find matching extractor for the URL and take its ie_key
8b7491c8 3553 for ie_key, ie in self._ies.items():
1211bb6d 3554 if ie.suitable(url):
8b7491c8 3555 extractor = ie_key
e9fef7ee
S
3556 break
3557 else:
3558 return
0647d925 3559 return make_archive_id(extractor, video_id)
5db07df6
PH
3560
3561 def in_download_archive(self, info_dict):
ae103564 3562 if not self.archive:
5db07df6
PH
3563 return False
3564
1e8fe57e 3565 vid_ids = [self._make_archive_id(info_dict)]
c200096c 3566 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
1e8fe57e 3567 return any(id_ in self.archive for id_ in vid_ids)
c1c9a79c
PH
3568
3569 def record_download_archive(self, info_dict):
3570 fn = self.params.get('download_archive')
3571 if fn is None:
3572 return
5db07df6
PH
3573 vid_id = self._make_archive_id(info_dict)
3574 assert vid_id
ae103564 3575
a13e6848 3576 self.write_debug(f'Adding to archive: {vid_id}')
9c935fbc 3577 if is_path_like(fn):
ae103564 3578 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3579 archive_file.write(vid_id + '\n')
a45e8619 3580 self.archive.add(vid_id)
dd82ffea 3581
8c51aa65 3582 @staticmethod
8abeeb94 3583 def format_resolution(format, default='unknown'):
9359f3d4 3584 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
fb04e403 3585 return 'audio only'
f49d89ee
PH
3586 if format.get('resolution') is not None:
3587 return format['resolution']
35615307 3588 if format.get('width') and format.get('height'):
ff51ed58 3589 return '%dx%d' % (format['width'], format['height'])
35615307 3590 elif format.get('height'):
ff51ed58 3591 return '%sp' % format['height']
35615307 3592 elif format.get('width'):
ff51ed58 3593 return '%dx?' % format['width']
3594 return default
8c51aa65 3595
8130779d 3596 def _list_format_headers(self, *headers):
3597 if self.params.get('listformats_table', True) is not False:
591bb9d3 3598 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
8130779d 3599 return headers
3600
c57f7757
PH
3601 def _format_note(self, fdict):
3602 res = ''
3603 if fdict.get('ext') in ['f4f', 'f4m']:
f304da8a 3604 res += '(unsupported)'
32f90364
PH
3605 if fdict.get('language'):
3606 if res:
3607 res += ' '
f304da8a 3608 res += '[%s]' % fdict['language']
c57f7757 3609 if fdict.get('format_note') is not None:
f304da8a 3610 if res:
3611 res += ' '
3612 res += fdict['format_note']
c57f7757 3613 if fdict.get('tbr') is not None:
f304da8a 3614 if res:
3615 res += ', '
3616 res += '%4dk' % fdict['tbr']
c57f7757
PH
3617 if fdict.get('container') is not None:
3618 if res:
3619 res += ', '
3620 res += '%s container' % fdict['container']
3089bc74
S
3621 if (fdict.get('vcodec') is not None
3622 and fdict.get('vcodec') != 'none'):
c57f7757
PH
3623 if res:
3624 res += ', '
3625 res += fdict['vcodec']
91c7271a 3626 if fdict.get('vbr') is not None:
c57f7757
PH
3627 res += '@'
3628 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3629 res += 'video@'
3630 if fdict.get('vbr') is not None:
3631 res += '%4dk' % fdict['vbr']
fbb21cf5 3632 if fdict.get('fps') is not None:
5d583bdf
S
3633 if res:
3634 res += ', '
3635 res += '%sfps' % fdict['fps']
c57f7757
PH
3636 if fdict.get('acodec') is not None:
3637 if res:
3638 res += ', '
3639 if fdict['acodec'] == 'none':
3640 res += 'video only'
3641 else:
3642 res += '%-5s' % fdict['acodec']
3643 elif fdict.get('abr') is not None:
3644 if res:
3645 res += ', '
3646 res += 'audio'
3647 if fdict.get('abr') is not None:
3648 res += '@%3dk' % fdict['abr']
3649 if fdict.get('asr') is not None:
3650 res += ' (%5dHz)' % fdict['asr']
3651 if fdict.get('filesize') is not None:
3652 if res:
3653 res += ', '
3654 res += format_bytes(fdict['filesize'])
9732d77e
PH
3655 elif fdict.get('filesize_approx') is not None:
3656 if res:
3657 res += ', '
3658 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 3659 return res
91c7271a 3660
aebb4f4b 3661 def _get_formats(self, info_dict):
3662 if info_dict.get('formats') is None:
3663 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3664 return [info_dict]
3665 return []
3666 return info_dict['formats']
b69fd25c 3667
aebb4f4b 3668 def render_formats_table(self, info_dict):
3669 formats = self._get_formats(info_dict)
3670 if not formats:
3671 return
8130779d 3672 if not self.params.get('listformats_table', True) is not False:
76d321f6 3673 table = [
3674 [
3675 format_field(f, 'format_id'),
3676 format_field(f, 'ext'),
3677 self.format_resolution(f),
8130779d 3678 self._format_note(f)
d5d1df8a 3679 ] for f in formats if (f.get('preference') or 0) >= -1000]
8130779d 3680 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3681
d816f61f 3682 def simplified_codec(f, field):
3683 assert field in ('acodec', 'vcodec')
3684 codec = f.get(field, 'unknown')
f5ea4748 3685 if not codec:
3686 return 'unknown'
3687 elif codec != 'none':
d816f61f 3688 return '.'.join(codec.split('.')[:4])
3689
3690 if field == 'vcodec' and f.get('acodec') == 'none':
3691 return 'images'
3692 elif field == 'acodec' and f.get('vcodec') == 'none':
3693 return ''
3694 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3695 self.Styles.SUPPRESS)
3696
591bb9d3 3697 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
8130779d 3698 table = [
3699 [
591bb9d3 3700 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
8130779d 3701 format_field(f, 'ext'),
3702 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
563e0bf8 3703 format_field(f, 'fps', '\t%d', func=round),
8130779d 3704 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
b8ed0f15 3705 format_field(f, 'audio_channels', '\t%s'),
8130779d 3706 delim,
3707 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
563e0bf8 3708 format_field(f, 'tbr', '\t%dk', func=round),
8130779d 3709 shorten_protocol_name(f.get('protocol', '')),
3710 delim,
d816f61f 3711 simplified_codec(f, 'vcodec'),
563e0bf8 3712 format_field(f, 'vbr', '\t%dk', func=round),
d816f61f 3713 simplified_codec(f, 'acodec'),
563e0bf8 3714 format_field(f, 'abr', '\t%dk', func=round),
ae61d108 3715 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
8130779d 3716 join_nonempty(
591bb9d3 3717 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
a5387729 3718 self._format_out('DRM', 'light red') if f.get('has_drm') else None,
8130779d 3719 format_field(f, 'language', '[%s]'),
3720 join_nonempty(format_field(f, 'format_note'),
3721 format_field(f, 'container', ignore=(None, f.get('ext'))),
3722 delim=', '),
3723 delim=' '),
3724 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3725 header_line = self._list_format_headers(
b8ed0f15 3726 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
8130779d 3727 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3728
3729 return render_table(
3730 header_line, table, hide_empty=True,
591bb9d3 3731 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
8130779d 3732
3733 def render_thumbnails_table(self, info_dict):
88f23a18 3734 thumbnails = list(info_dict.get('thumbnails') or [])
cfb56d1a 3735 if not thumbnails:
8130779d 3736 return None
3737 return render_table(
ec11a9f4 3738 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
177662e0 3739 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
2412044c 3740
8130779d 3741 def render_subtitles_table(self, video_id, subtitles):
2412044c 3742 def _row(lang, formats):
49c258e1 3743 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
2412044c 3744 if len(set(names)) == 1:
7aee40c1 3745 names = [] if names[0] == 'unknown' else names[:1]
2412044c 3746 return [lang, ', '.join(names), ', '.join(exts)]
3747
8130779d 3748 if not subtitles:
3749 return None
3750 return render_table(
ec11a9f4 3751 self._list_format_headers('Language', 'Name', 'Formats'),
2412044c 3752 [_row(lang, formats) for lang, formats in subtitles.items()],
8130779d 3753 hide_empty=True)
3754
3755 def __list_table(self, video_id, name, func, *args):
3756 table = func(*args)
3757 if not table:
3758 self.to_screen(f'{video_id} has no {name}')
3759 return
3760 self.to_screen(f'[info] Available {name} for {video_id}:')
3761 self.to_stdout(table)
3762
3763 def list_formats(self, info_dict):
3764 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3765
3766 def list_thumbnails(self, info_dict):
3767 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3768
3769 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3770 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
a504ced0 3771
dca08720
PH
3772 def urlopen(self, req):
3773 """ Start an HTTP download """
f9934b96 3774 if isinstance(req, str):
67dda517 3775 req = sanitized_Request(req)
19a41fc6 3776 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
3777
3778 def print_debug_header(self):
3779 if not self.params.get('verbose'):
3780 return
49a57e70 3781
a057779d 3782 from . import _IN_CLI # Must be delayed import
3783
560738f3 3784 # These imports can be slow. So import them only as needed
3785 from .extractor.extractors import _LAZY_LOADER
e756f45b
M
3786 from .extractor.extractors import (
3787 _PLUGIN_CLASSES as plugin_ies,
3788 _PLUGIN_OVERRIDES as plugin_ie_overrides
3789 )
560738f3 3790
49a57e70 3791 def get_encoding(stream):
2a938746 3792 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
8417f26b
SS
3793 additional_info = []
3794 if os.environ.get('TERM', '').lower() == 'dumb':
3795 additional_info.append('dumb')
49a57e70 3796 if not supports_terminal_sequences(stream):
53973b4d 3797 from .utils import WINDOWS_VT_MODE # Must be imported locally
8417f26b
SS
3798 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3799 if additional_info:
3800 ret = f'{ret} ({",".join(additional_info)})'
49a57e70 3801 return ret
3802
591bb9d3 3803 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
49a57e70 3804 locale.getpreferredencoding(),
3805 sys.getfilesystemencoding(),
591bb9d3 3806 self.get_encoding(),
3807 ', '.join(
64fa820c 3808 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
591bb9d3 3809 if stream is not None and key != 'console')
3810 )
883d4b1e 3811
3812 logger = self.params.get('logger')
3813 if logger:
3814 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3815 write_debug(encoding_str)
3816 else:
96565c7e 3817 write_string(f'[debug] {encoding_str}\n', encoding=None)
49a57e70 3818 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
734f90bb 3819
4c88ff87 3820 source = detect_variant()
70b23409 3821 if VARIANT not in (None, 'pip'):
3822 source += '*'
a5387729 3823 klass = type(self)
36eaf303 3824 write_debug(join_nonempty(
b5e7a2e6 3825 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
392389b7 3826 f'{CHANNEL}@{__version__}',
29cb20bd 3827 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
36eaf303 3828 '' if source == 'unknown' else f'({source})',
a5387729 3829 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
36eaf303 3830 delim=' '))
497074f0 3831
3832 if not _IN_CLI:
3833 write_debug(f'params: {self.params}')
3834
6e21fdd2 3835 if not _LAZY_LOADER:
3836 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
49a57e70 3837 write_debug('Lazy loading extractors is forcibly disabled')
6e21fdd2 3838 else:
49a57e70 3839 write_debug('Lazy loading extractors is disabled')
8a82af35 3840 if self.params['compat_opts']:
3841 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
36eaf303 3842
b5e7a2e6 3843 if current_git_head():
3844 write_debug(f'Git HEAD: {current_git_head()}')
b1f94422 3845 write_debug(system_identifier())
d28b5171 3846
8913ef74 3847 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3848 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3849 if ffmpeg_features:
19a03940 3850 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
8913ef74 3851
4c83c967 3852 exe_versions['rtmpdump'] = rtmpdump_version()
feee8d32 3853 exe_versions['phantomjs'] = PhantomJSwrapper._version()
d28b5171 3854 exe_str = ', '.join(
2831b468 3855 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3856 ) or 'none'
49a57e70 3857 write_debug('exe versions: %s' % exe_str)
dca08720 3858
1d485a1a 3859 from .compat.compat_utils import get_package_info
9b8ee23b 3860 from .dependencies import available_dependencies
3861
3862 write_debug('Optional libraries: %s' % (', '.join(sorted({
1d485a1a 3863 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
9b8ee23b 3864 })) or 'none'))
2831b468 3865
97ec5bc5 3866 self._setup_opener()
dca08720
PH
3867 proxy_map = {}
3868 for handler in self._opener.handlers:
3869 if hasattr(handler, 'proxies'):
3870 proxy_map.update(handler.proxies)
49a57e70 3871 write_debug(f'Proxy map: {proxy_map}')
dca08720 3872
e756f45b
M
3873 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3874 display_list = ['%s%s' % (
8e40b9d1 3875 klass.__name__, '' if klass.__name__ == name else f' as {name}')
e756f45b
M
3876 for name, klass in plugins.items()]
3877 if plugin_type == 'Extractor':
3878 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3879 for parent, plugins in plugin_ie_overrides.items())
3880 if not display_list:
3881 continue
3882 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3883
8e40b9d1
M
3884 plugin_dirs = plugin_directories()
3885 if plugin_dirs:
3886 write_debug(f'Plugin directories: {plugin_dirs}')
3887
49a57e70 3888 # Not implemented
3889 if False and self.params.get('call_home'):
0f06bcd7 3890 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
49a57e70 3891 write_debug('Public IP address: %s' % ipaddr)
58b1f00d 3892 latest_version = self.urlopen(
0f06bcd7 3893 'https://yt-dl.org/latest/version').read().decode()
58b1f00d
PH
3894 if version_tuple(latest_version) > version_tuple(__version__):
3895 self.report_warning(
3896 'You are using an outdated version (newest version: %s)! '
3897 'See https://yt-dl.org/update if you need help updating.' %
3898 latest_version)
3899
e344693b 3900 def _setup_opener(self):
97ec5bc5 3901 if hasattr(self, '_opener'):
3902 return
6ad14cab 3903 timeout_val = self.params.get('socket_timeout')
17bddf3e 3904 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
6ad14cab 3905
982ee69a 3906 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
dca08720
PH
3907 opts_cookiefile = self.params.get('cookiefile')
3908 opts_proxy = self.params.get('proxy')
3909
982ee69a 3910 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
dca08720 3911
6a3f4c3f 3912 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
3913 if opts_proxy is not None:
3914 if opts_proxy == '':
3915 proxies = {}
3916 else:
3917 proxies = {'http': opts_proxy, 'https': opts_proxy}
3918 else:
ac668111 3919 proxies = urllib.request.getproxies()
067aa17e 3920 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
dca08720
PH
3921 if 'http' in proxies and 'https' not in proxies:
3922 proxies['https'] = proxies['http']
91410c9b 3923 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
3924
3925 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
3926 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3927 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
fca6dba8 3928 redirect_handler = YoutubeDLRedirectHandler()
f9934b96 3929 data_handler = urllib.request.DataHandler()
6240b0a2
JMF
3930
3931 # When passing our own FileHandler instance, build_opener won't add the
3932 # default FileHandler and allows us to disable the file protocol, which
3933 # can be used for malicious purposes (see
067aa17e 3934 # https://github.com/ytdl-org/youtube-dl/issues/8227)
ac668111 3935 file_handler = urllib.request.FileHandler()
6240b0a2 3936
8300774c
M
3937 if not self.params.get('enable_file_urls'):
3938 def file_open(*args, **kwargs):
3939 raise urllib.error.URLError(
3940 'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
3941 'Use --enable-file-urls to enable at your own risk.')
3942 file_handler.file_open = file_open
6240b0a2 3943
ac668111 3944 opener = urllib.request.build_opener(
fca6dba8 3945 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2461f79d 3946
dca08720
PH
3947 # Delete the default user-agent header, which would otherwise apply in
3948 # cases where our custom HTTP handler doesn't come into play
067aa17e 3949 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
dca08720
PH
3950 opener.addheaders = []
3951 self._opener = opener
62fec3b2
PH
3952
3953 def encode(self, s):
3954 if isinstance(s, bytes):
3955 return s # Already encoded
3956
3957 try:
3958 return s.encode(self.get_encoding())
3959 except UnicodeEncodeError as err:
3960 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3961 raise
3962
3963 def get_encoding(self):
3964 encoding = self.params.get('encoding')
3965 if encoding is None:
3966 encoding = preferredencoding()
3967 return encoding
ec82d85a 3968
e08a85d8 3969 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
cb96c5be 3970 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
e08a85d8 3971 if overwrite is None:
3972 overwrite = self.params.get('overwrites', True)
80c03fa9 3973 if not self.params.get('writeinfojson'):
3974 return False
3975 elif not infofn:
3976 self.write_debug(f'Skipping writing {label} infojson')
3977 return False
3978 elif not self._ensure_dir_exists(infofn):
3979 return None
e08a85d8 3980 elif not overwrite and os.path.exists(infofn):
80c03fa9 3981 self.to_screen(f'[info] {label.title()} metadata is already present')
cb96c5be 3982 return 'exists'
3983
3984 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3985 try:
3986 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3987 return True
86e5f3ed 3988 except OSError:
cb96c5be 3989 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3990 return None
80c03fa9 3991
3992 def _write_description(self, label, ie_result, descfn):
3993 ''' Write description and returns True = written, False = skip, None = error '''
3994 if not self.params.get('writedescription'):
3995 return False
3996 elif not descfn:
3997 self.write_debug(f'Skipping writing {label} description')
3998 return False
3999 elif not self._ensure_dir_exists(descfn):
4000 return None
4001 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4002 self.to_screen(f'[info] {label.title()} description is already present')
4003 elif ie_result.get('description') is None:
88fb9425 4004 self.to_screen(f'[info] There\'s no {label} description to write')
80c03fa9 4005 return False
4006 else:
4007 try:
4008 self.to_screen(f'[info] Writing {label} description to: {descfn}')
86e5f3ed 4009 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
80c03fa9 4010 descfile.write(ie_result['description'])
86e5f3ed 4011 except OSError:
80c03fa9 4012 self.report_error(f'Cannot write {label} description file {descfn}')
4013 return None
4014 return True
4015
4016 def _write_subtitles(self, info_dict, filename):
4017 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4018 ret = []
4019 subtitles = info_dict.get('requested_subtitles')
88fb9425 4020 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
80c03fa9 4021 # subtitles download errors are already managed as troubles in relevant IE
4022 # that way it will silently go on when used with unsupporting IE
4023 return ret
88fb9425 4024 elif not subtitles:
c8bc203f 4025 self.to_screen('[info] There are no subtitles for the requested languages')
88fb9425 4026 return ret
80c03fa9 4027 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4028 if not sub_filename_base:
4029 self.to_screen('[info] Skipping writing video subtitles')
4030 return ret
88fb9425 4031
80c03fa9 4032 for sub_lang, sub_info in subtitles.items():
4033 sub_format = sub_info['ext']
4034 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4035 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
e04938ab 4036 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4037 if existing_sub:
80c03fa9 4038 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
e04938ab 4039 sub_info['filepath'] = existing_sub
4040 ret.append((existing_sub, sub_filename_final))
80c03fa9 4041 continue
4042
4043 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4044 if sub_info.get('data') is not None:
4045 try:
4046 # Use newline='' to prevent conversion of newline characters
4047 # See https://github.com/ytdl-org/youtube-dl/issues/10268
86e5f3ed 4048 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
80c03fa9 4049 subfile.write(sub_info['data'])
4050 sub_info['filepath'] = sub_filename
4051 ret.append((sub_filename, sub_filename_final))
4052 continue
86e5f3ed 4053 except OSError:
80c03fa9 4054 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4055 return None
4056
4057 try:
4058 sub_copy = sub_info.copy()
4059 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4060 self.dl(sub_filename, sub_copy, subtitle=True)
4061 sub_info['filepath'] = sub_filename
4062 ret.append((sub_filename, sub_filename_final))
6020e05d 4063 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
c70c418d 4064 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
6020e05d 4065 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
c70c418d 4066 if not self.params.get('ignoreerrors'):
4067 self.report_error(msg)
4068 raise DownloadError(msg)
4069 self.report_warning(msg)
519804a9 4070 return ret
80c03fa9 4071
4072 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4073 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
6c4fd172 4074 write_all = self.params.get('write_all_thumbnails', False)
80c03fa9 4075 thumbnails, ret = [], []
6c4fd172 4076 if write_all or self.params.get('writethumbnail', False):
0202b52a 4077 thumbnails = info_dict.get('thumbnails') or []
88fb9425 4078 if not thumbnails:
c8bc203f 4079 self.to_screen(f'[info] There are no {label} thumbnails to download')
88fb9425 4080 return ret
6c4fd172 4081 multiple = write_all and len(thumbnails) > 1
ec82d85a 4082
80c03fa9 4083 if thumb_filename_base is None:
4084 thumb_filename_base = filename
4085 if thumbnails and not thumb_filename_base:
4086 self.write_debug(f'Skipping writing {label} thumbnail')
4087 return ret
4088
dd0228ce 4089 for idx, t in list(enumerate(thumbnails))[::-1]:
80c03fa9 4090 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
aa9369a2 4091 thumb_display_id = f'{label} thumbnail {t["id"]}'
80c03fa9 4092 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4093 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
ec82d85a 4094
e04938ab 4095 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4096 if existing_thumb:
aa9369a2 4097 self.to_screen('[info] %s is already present' % (
4098 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
e04938ab 4099 t['filepath'] = existing_thumb
4100 ret.append((existing_thumb, thumb_filename_final))
ec82d85a 4101 else:
80c03fa9 4102 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
ec82d85a 4103 try:
297e9952 4104 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
80c03fa9 4105 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
d3d89c32 4106 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a 4107 shutil.copyfileobj(uf, thumbf)
80c03fa9 4108 ret.append((thumb_filename, thumb_filename_final))
885cc0b7 4109 t['filepath'] = thumb_filename
3158150c 4110 except network_exceptions as err:
dd0228ce 4111 thumbnails.pop(idx)
80c03fa9 4112 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
6c4fd172 4113 if ret and not write_all:
4114 break
0202b52a 4115 return ret