]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[outtmpl] Support multiplication
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import HEADRequest, Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 network_exceptions,
45 )
46 from .plugins import directories as plugin_directories
47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
48 from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
58 FFmpegVideoConvertorPP,
59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61 )
62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
63 from .update import REPOSITORY, _get_system_deprecation, _make_label, current_git_head, detect_variant
64 from .utils import (
65 DEFAULT_OUTTMPL,
66 IDENTITY,
67 LINK_TEMPLATES,
68 MEDIA_EXTENSIONS,
69 NO_DEFAULT,
70 NUMBER_RE,
71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
82 FormatSorter,
83 GeoRestrictedError,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 number_of_digits,
126 orderedSet,
127 orderedSet_from_options,
128 parse_filesize,
129 preferredencoding,
130 prepend_extension,
131 remove_terminal_sequences,
132 render_table,
133 replace_extension,
134 sanitize_filename,
135 sanitize_path,
136 sanitize_url,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils._utils import _YDLLogger
155 from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
159 std_headers,
160 )
161 from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL:
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 playlist_items: Specific indices of playlist to download.
260 playlistrandom: Download playlist items in random order.
261 lazy_playlist: Process playlist entries as they are received.
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
264 logger: Log messages to a logging.Logger instance.
265 logtostderr: Print everything to stderr instead of stdout.
266 consoletitle: Display progress in console window's titlebar.
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
269 clean_infojson: Remove internal metadata from the infojson
270 getcomments: Extract video comments. This will not be written to disk
271 unless writeinfojson is also given
272 writeannotations: Write the video annotations to a .annotations.xml file
273 writethumbnail: Write the thumbnail image to a file
274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
276 write_all_thumbnails: Write all thumbnail formats to files
277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
282 writesubtitles: Write the video subtitles to a file
283 writeautomaticsub: Write the automatically generated subtitles to a file
284 listsubtitles: Lists all available subtitles for the video
285 subtitlesformat: The format code for subtitles
286 subtitleslangs: List of languages of the subtitles to download (can be regex).
287 The list may contain "all" to refer to all the available
288 subtitles. The language can be prefixed with a "-" to
289 exclude it from the requested languages, e.g. ['all', '-live_chat']
290 keepvideo: Keep the video file after post-processing
291 daterange: A utils.DateRange object, download only if the upload_date is in the range.
292 skip_download: Skip the actual download of the video file
293 cachedir: Location of the cache files in the filesystem.
294 False to disable filesystem cache.
295 noplaylist: Download single video instead of a playlist if in doubt.
296 age_limit: An integer representing the user's age in years.
297 Unsuitable videos for the given age are skipped.
298 min_views: An integer representing the minimum view count the video
299 must have in order to not be skipped.
300 Videos without view count information are always
301 downloaded. None for no limit.
302 max_views: An integer representing the maximum view count.
303 Videos that are more popular than that are not
304 downloaded.
305 Videos without view count information are always
306 downloaded. None for no limit.
307 download_archive: A set, or the name of a file where all downloads are recorded.
308 Videos already present in the file are not downloaded again.
309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_per_url: Whether break_on_reject and break_on_existing
312 should act on each input URL as opposed to for the entire queue
313 cookiefile: File name or text stream from where cookies should be read and dumped to
314 cookiesfrombrowser: A tuple containing the name of the browser, the profile
315 name/path from where cookies are loaded, the name of the keyring,
316 and the container name, e.g. ('chrome', ) or
317 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
320 nocheckcertificate: Do not verify SSL certificates
321 client_certificate: Path to client certificate file in PEM format. May include the private key
322 client_certificate_key: Path to private key file for client certificate
323 client_certificate_password: Password for client certificate private key, if encrypted.
324 If not provided and the key is encrypted, yt-dlp will ask interactively
325 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
326 (Only supported by some extractors)
327 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
328 http_headers: A dictionary of custom headers to be used for all requests
329 proxy: URL of the proxy server to use
330 geo_verification_proxy: URL of the proxy to use for IP address verification
331 on geo-restricted sites.
332 socket_timeout: Time to wait for unresponsive hosts, in seconds
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
335 debug_printtraffic:Print out sent and received HTTP traffic
336 default_search: Prepend this string if an input url is not valid.
337 'auto' for elaborate guessing
338 encoding: Use this encoding instead of the system-specified.
339 extract_flat: Whether to resolve and process url_results further
340 * False: Always process. Default for API
341 * True: Never process
342 * 'in_playlist': Do not process inside playlist/multi_video
343 * 'discard': Always process, but don't return the result
344 from inside playlist/multi_video
345 * 'discard_in_playlist': Same as "discard", but only for
346 playlists (not multi_video). Default for CLI
347 wait_for_video: If given, wait for scheduled streams to become available.
348 The value should be a tuple containing the range
349 (min_secs, max_secs) to wait between retries
350 postprocessors: A list of dictionaries, each with an entry
351 * key: The name of the postprocessor. See
352 yt_dlp/postprocessor/__init__.py for a list.
353 * when: When to run the postprocessor. Allowed values are
354 the entries of utils.POSTPROCESS_WHEN
355 Assumed to be 'post_process' if not given
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
358 * status: One of "downloading", "error", or "finished".
359 Check this first and ignore unknown values.
360 * info_dict: The extracted info_dict
361
362 If status is one of "downloading", or "finished", the
363 following properties may also be present:
364 * filename: The final filename (always present)
365 * tmpfilename: The filename we're currently writing to
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
368 * total_bytes_estimate: Guess of the eventual file size,
369 None if unavailable.
370 * elapsed: The number of seconds since download started.
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
373 unknown
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
378
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
387
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
390 merge_output_format: "/" separated list of extensions to use when merging formats.
391 final_ext: Expected final extension; used to detect when the file was
392 already downloaded and converted
393 fixup: Automatically correct known faults of the file.
394 One of:
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
398 about it, warn otherwise (default)
399 source_address: Client-side IP address to bind to.
400 sleep_interval_requests: Number of seconds to sleep between requests
401 during extraction
402 sleep_interval: Number of seconds to sleep before each download when
403 used alone or a lower bound of a range for randomized
404 sleep before each download (minimum possible number
405 of seconds to sleep) when used along with
406 max_sleep_interval.
407 max_sleep_interval:Upper bound of a range for randomized sleep before each
408 download (maximum possible number of seconds to sleep).
409 Must only be used along with sleep_interval.
410 Actual sleep time will be a random float from range
411 [sleep_interval; max_sleep_interval].
412 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
413 listformats: Print an overview of available video formats and exit.
414 list_thumbnails: Print a table of all thumbnails and exit.
415 match_filter: A function that gets called for every video with the signature
416 (info_dict, *, incomplete: bool) -> Optional[str]
417 For backward compatibility with youtube-dl, the signature
418 (info_dict) -> Optional[str] is also allowed.
419 - If it returns a message, the video is ignored.
420 - If it returns None, the video is downloaded.
421 - If it returns utils.NO_DEFAULT, the user is interactively
422 asked whether to download the video.
423 - Raise utils.DownloadCancelled(msg) to abort remaining
424 downloads when a video is rejected.
425 match_filter_func in utils/_utils.py is one example for this.
426 color: A Dictionary with output stream names as keys
427 and their respective color policy as values.
428 Can also just be a single color policy,
429 in which case it applies to all outputs.
430 Valid stream names are 'stdout' and 'stderr'.
431 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
432 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
433 HTTP header
434 geo_bypass_country:
435 Two-letter ISO 3166-2 country code that will be used for
436 explicit geographic restriction bypassing via faking
437 X-Forwarded-For HTTP header
438 geo_bypass_ip_block:
439 IP range in CIDR notation that will be used similarly to
440 geo_bypass_country
441 external_downloader: A dictionary of protocol keys and the executable of the
442 external downloader to use for it. The allowed protocols
443 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
444 Set the value to 'native' to use the native downloader
445 compat_opts: Compatibility options. See "Differences in default behavior".
446 The following options do not work when used through the API:
447 filename, abort-on-error, multistreams, no-live-chat, format-sort
448 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
449 Refer __init__.py for their implementation
450 progress_template: Dictionary of templates for progress outputs.
451 Allowed keys are 'download', 'postprocess',
452 'download-title' (console title) and 'postprocess-title'.
453 The template is mapped on a dictionary with keys 'progress' and 'info'
454 retry_sleep_functions: Dictionary of functions that takes the number of attempts
455 as argument and returns the time to sleep in seconds.
456 Allowed keys are 'http', 'fragment', 'file_access'
457 download_ranges: A callback function that gets called for every video with
458 the signature (info_dict, ydl) -> Iterable[Section].
459 Only the returned sections will be downloaded.
460 Each Section is a dict with the following keys:
461 * start_time: Start time of the section in seconds
462 * end_time: End time of the section in seconds
463 * title: Section title (Optional)
464 * index: Section number (Optional)
465 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
466 noprogress: Do not print the progress bar
467 live_from_start: Whether to download livestreams videos from the start
468
469 The following parameters are not used by YoutubeDL itself, they are used by
470 the downloader (see yt_dlp/downloader/common.py):
471 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
472 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
473 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
474 external_downloader_args, concurrent_fragment_downloads.
475
476 The following options are used by the post processors:
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
485 can also be used
486
487 The following options are used by the extractors:
488 extractor_retries: Number of times to retry for known errors (default: 3)
489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
490 hls_split_discontinuity: Split HLS playlists to different formats at
491 discontinuities such as ad breaks (default: False)
492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
494 E.g. {'youtube': {'skip': ['dash', 'hls']}}
495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
496
497 The following options are deprecated and may be removed in the future:
498
499 break_on_reject: Stop the download process when encountering a video that
500 has been filtered out.
501 - `raise DownloadCancelled(msg)` in match_filter instead
502 force_generic_extractor: Force downloader to use the generic extractor
503 - Use allowed_extractors = ['generic', 'default']
504 playliststart: - Use playlist_items
505 Playlist item to start at.
506 playlistend: - Use playlist_items
507 Playlist item to end at.
508 playlistreverse: - Use playlist_items
509 Download playlist items in reverse order.
510 forceurl: - Use forceprint
511 Force printing final URL.
512 forcetitle: - Use forceprint
513 Force printing title.
514 forceid: - Use forceprint
515 Force printing ID.
516 forcethumbnail: - Use forceprint
517 Force printing thumbnail URL.
518 forcedescription: - Use forceprint
519 Force printing description.
520 forcefilename: - Use forceprint
521 Force printing final filename.
522 forceduration: - Use forceprint
523 Force printing duration.
524 allsubtitles: - Use subtitleslangs = ['all']
525 Downloads all the subtitles of the video
526 (requires writesubtitles or writeautomaticsub)
527 include_ads: - Doesn't work
528 Download ads as well
529 call_home: - Not implemented
530 Boolean, true iff we are allowed to contact the
531 yt-dlp servers for debugging.
532 post_hooks: - Register a custom postprocessor
533 A list of functions that get called as the final step
534 for each video file, after all postprocessors have been
535 called. The filename will be passed as the only argument.
536 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
537 Use the native HLS downloader instead of ffmpeg/avconv
538 if True, otherwise use ffmpeg/avconv if False, otherwise
539 use downloader suggested by extractor if None.
540 prefer_ffmpeg: - avconv support is deprecated
541 If False, use avconv instead of ffmpeg if both are available,
542 otherwise prefer ffmpeg.
543 youtube_include_dash_manifest: - Use extractor_args
544 If True (default), DASH manifests and related
545 data will be downloaded and processed by extractor.
546 You can reduce network I/O by disabling it if you don't
547 care about DASH. (only for youtube)
548 youtube_include_hls_manifest: - Use extractor_args
549 If True (default), HLS manifests and related
550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about HLS. (only for youtube)
553 no_color: Same as `color='no_color'`
554 no_overwrites: Same as `overwrites=False`
555 """
556
557 _NUMERIC_FIELDS = {
558 'width', 'height', 'asr', 'audio_channels', 'fps',
559 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
560 'timestamp', 'release_timestamp',
561 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
562 'average_rating', 'comment_count', 'age_limit',
563 'start_time', 'end_time',
564 'chapter_number', 'season_number', 'episode_number',
565 'track_number', 'disc_number', 'release_year',
566 }
567
568 _format_fields = {
569 # NB: Keep in sync with the docstring of extractor/common.py
570 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
571 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
572 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
573 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
574 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
575 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
576 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
577 }
578 _format_selection_exts = {
579 'audio': set(MEDIA_EXTENSIONS.common_audio),
580 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
581 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
582 }
583
584 def __init__(self, params=None, auto_init=True):
585 """Create a FileDownloader object with the given options.
586 @param auto_init Whether to load the default extractors and print header (if verbose).
587 Set to 'no_verbose_header' to not print the header
588 """
589 if params is None:
590 params = {}
591 self.params = params
592 self._ies = {}
593 self._ies_instances = {}
594 self._pps = {k: [] for k in POSTPROCESS_WHEN}
595 self._printed_messages = set()
596 self._first_webpage_request = True
597 self._post_hooks = []
598 self._progress_hooks = []
599 self._postprocessor_hooks = []
600 self._download_retcode = 0
601 self._num_downloads = 0
602 self._num_videos = 0
603 self._playlist_level = 0
604 self._playlist_urls = set()
605 self.cache = Cache(self)
606 self.__header_cookies = []
607
608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
615 )
616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
624 self.params.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
626 self.params['color'] = 'no_color'
627
628 term_allow_color = os.getenv('TERM', '').lower() != 'dumb'
629 no_color = bool(os.getenv('NO_COLOR'))
630
631 def process_color_policy(stream):
632 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
633 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
634 if policy in ('auto', None):
635 if term_allow_color and supports_terminal_sequences(stream):
636 return 'no_color' if no_color else True
637 return False
638 assert policy in ('always', 'never', 'no_color'), policy
639 return {'always': True, 'never': False}.get(policy, policy)
640
641 self._allow_colors = Namespace(**{
642 name: process_color_policy(stream)
643 for name, stream in self._out_files.items_ if name != 'console'
644 })
645
646 system_deprecation = _get_system_deprecation()
647 if system_deprecation:
648 self.deprecated_feature(system_deprecation.replace('\n', '\n '))
649
650 if self.params.get('allow_unplayable_formats'):
651 self.report_warning(
652 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
653 'This is a developer option intended for debugging. \n'
654 ' If you experience any issues while using this option, '
655 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
656
657 if self.params.get('bidi_workaround', False):
658 try:
659 import pty
660 master, slave = pty.openpty()
661 width = shutil.get_terminal_size().columns
662 width_args = [] if width is None else ['-w', str(width)]
663 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
664 try:
665 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
666 except OSError:
667 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
668 self._output_channel = os.fdopen(master, 'rb')
669 except OSError as ose:
670 if ose.errno == errno.ENOENT:
671 self.report_warning(
672 'Could not find fribidi executable, ignoring --bidi-workaround. '
673 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
674 else:
675 raise
676
677 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
678 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
679 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
680 self.params['http_headers'].pop('Cookie', None)
681 self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
682
683 if auto_init and auto_init != 'no_verbose_header':
684 self.print_debug_header()
685
686 def check_deprecated(param, option, suggestion):
687 if self.params.get(param) is not None:
688 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
689 return True
690 return False
691
692 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
693 if self.params.get('geo_verification_proxy') is None:
694 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
695
696 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
697 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
698 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
699
700 for msg in self.params.get('_warnings', []):
701 self.report_warning(msg)
702 for msg in self.params.get('_deprecation_warnings', []):
703 self.deprecated_feature(msg)
704
705 if 'list-formats' in self.params['compat_opts']:
706 self.params['listformats_table'] = False
707
708 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
709 # nooverwrites was unnecessarily changed to overwrites
710 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
711 # This ensures compatibility with both keys
712 self.params['overwrites'] = not self.params['nooverwrites']
713 elif self.params.get('overwrites') is None:
714 self.params.pop('overwrites', None)
715 else:
716 self.params['nooverwrites'] = not self.params['overwrites']
717
718 if self.params.get('simulate') is None and any((
719 self.params.get('list_thumbnails'),
720 self.params.get('listformats'),
721 self.params.get('listsubtitles'),
722 )):
723 self.params['simulate'] = 'list_only'
724
725 self.params.setdefault('forceprint', {})
726 self.params.setdefault('print_to_file', {})
727
728 # Compatibility with older syntax
729 if not isinstance(params['forceprint'], dict):
730 self.params['forceprint'] = {'video': params['forceprint']}
731
732 if auto_init:
733 self.add_default_info_extractors()
734
735 if (sys.platform != 'win32'
736 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
737 and not self.params.get('restrictfilenames', False)):
738 # Unicode filesystem API will throw errors (#1474, #13027)
739 self.report_warning(
740 'Assuming --restrict-filenames since file system encoding '
741 'cannot encode all characters. '
742 'Set the LC_ALL environment variable to fix this.')
743 self.params['restrictfilenames'] = True
744
745 self._parse_outtmpl()
746
747 # Creating format selector here allows us to catch syntax errors before the extraction
748 self.format_selector = (
749 self.params.get('format') if self.params.get('format') in (None, '-')
750 else self.params['format'] if callable(self.params['format'])
751 else self.build_format_selector(self.params['format']))
752
753 hooks = {
754 'post_hooks': self.add_post_hook,
755 'progress_hooks': self.add_progress_hook,
756 'postprocessor_hooks': self.add_postprocessor_hook,
757 }
758 for opt, fn in hooks.items():
759 for ph in self.params.get(opt, []):
760 fn(ph)
761
762 for pp_def_raw in self.params.get('postprocessors', []):
763 pp_def = dict(pp_def_raw)
764 when = pp_def.pop('when', 'post_process')
765 self.add_post_processor(
766 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
767 when=when)
768
769 def preload_download_archive(fn):
770 """Preload the archive, if any is specified"""
771 archive = set()
772 if fn is None:
773 return archive
774 elif not is_path_like(fn):
775 return fn
776
777 self.write_debug(f'Loading archive file {fn!r}')
778 try:
779 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
780 for line in archive_file:
781 archive.add(line.strip())
782 except OSError as ioe:
783 if ioe.errno != errno.ENOENT:
784 raise
785 return archive
786
787 self.archive = preload_download_archive(self.params.get('download_archive'))
788
789 def warn_if_short_id(self, argv):
790 # short YouTube ID starting with dash?
791 idxs = [
792 i for i, a in enumerate(argv)
793 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
794 if idxs:
795 correct_argv = (
796 ['yt-dlp']
797 + [a for i, a in enumerate(argv) if i not in idxs]
798 + ['--'] + [argv[i] for i in idxs]
799 )
800 self.report_warning(
801 'Long argument string detected. '
802 'Use -- to separate parameters and URLs, like this:\n%s' %
803 args_to_str(correct_argv))
804
805 def add_info_extractor(self, ie):
806 """Add an InfoExtractor object to the end of the list."""
807 ie_key = ie.ie_key()
808 self._ies[ie_key] = ie
809 if not isinstance(ie, type):
810 self._ies_instances[ie_key] = ie
811 ie.set_downloader(self)
812
813 def get_info_extractor(self, ie_key):
814 """
815 Get an instance of an IE with name ie_key, it will try to get one from
816 the _ies list, if there's no instance it will create a new one and add
817 it to the extractor list.
818 """
819 ie = self._ies_instances.get(ie_key)
820 if ie is None:
821 ie = get_info_extractor(ie_key)()
822 self.add_info_extractor(ie)
823 return ie
824
825 def add_default_info_extractors(self):
826 """
827 Add the InfoExtractors returned by gen_extractors to the end of the list
828 """
829 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
830 all_ies['end'] = UnsupportedURLIE()
831 try:
832 ie_names = orderedSet_from_options(
833 self.params.get('allowed_extractors', ['default']), {
834 'all': list(all_ies),
835 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
836 }, use_regex=True)
837 except re.error as e:
838 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
839 for name in ie_names:
840 self.add_info_extractor(all_ies[name])
841 self.write_debug(f'Loaded {len(ie_names)} extractors')
842
843 def add_post_processor(self, pp, when='post_process'):
844 """Add a PostProcessor object to the end of the chain."""
845 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
846 self._pps[when].append(pp)
847 pp.set_downloader(self)
848
849 def add_post_hook(self, ph):
850 """Add the post hook"""
851 self._post_hooks.append(ph)
852
853 def add_progress_hook(self, ph):
854 """Add the download progress hook"""
855 self._progress_hooks.append(ph)
856
857 def add_postprocessor_hook(self, ph):
858 """Add the postprocessing progress hook"""
859 self._postprocessor_hooks.append(ph)
860 for pps in self._pps.values():
861 for pp in pps:
862 pp.add_progress_hook(ph)
863
864 def _bidi_workaround(self, message):
865 if not hasattr(self, '_output_channel'):
866 return message
867
868 assert hasattr(self, '_output_process')
869 assert isinstance(message, str)
870 line_count = message.count('\n') + 1
871 self._output_process.stdin.write((message + '\n').encode())
872 self._output_process.stdin.flush()
873 res = ''.join(self._output_channel.readline().decode()
874 for _ in range(line_count))
875 return res[:-len('\n')]
876
877 def _write_string(self, message, out=None, only_once=False):
878 if only_once:
879 if message in self._printed_messages:
880 return
881 self._printed_messages.add(message)
882 write_string(message, out=out, encoding=self.params.get('encoding'))
883
884 def to_stdout(self, message, skip_eol=False, quiet=None):
885 """Print message to stdout"""
886 if quiet is not None:
887 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
888 'Use "YoutubeDL.to_screen" instead')
889 if skip_eol is not False:
890 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
891 'Use "YoutubeDL.to_screen" instead')
892 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
893
894 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
895 """Print message to screen if not in quiet mode"""
896 if self.params.get('logger'):
897 self.params['logger'].debug(message)
898 return
899 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
900 return
901 self._write_string(
902 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
903 self._out_files.screen, only_once=only_once)
904
905 def to_stderr(self, message, only_once=False):
906 """Print message to stderr"""
907 assert isinstance(message, str)
908 if self.params.get('logger'):
909 self.params['logger'].error(message)
910 else:
911 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
912
913 def _send_console_code(self, code):
914 if compat_os_name == 'nt' or not self._out_files.console:
915 return
916 self._write_string(code, self._out_files.console)
917
918 def to_console_title(self, message):
919 if not self.params.get('consoletitle', False):
920 return
921 message = remove_terminal_sequences(message)
922 if compat_os_name == 'nt':
923 if ctypes.windll.kernel32.GetConsoleWindow():
924 # c_wchar_p() might not be necessary if `message` is
925 # already of type unicode()
926 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
927 else:
928 self._send_console_code(f'\033]0;{message}\007')
929
930 def save_console_title(self):
931 if not self.params.get('consoletitle') or self.params.get('simulate'):
932 return
933 self._send_console_code('\033[22;0t') # Save the title on stack
934
935 def restore_console_title(self):
936 if not self.params.get('consoletitle') or self.params.get('simulate'):
937 return
938 self._send_console_code('\033[23;0t') # Restore the title from stack
939
940 def __enter__(self):
941 self.save_console_title()
942 return self
943
944 def save_cookies(self):
945 if self.params.get('cookiefile') is not None:
946 self.cookiejar.save()
947
948 def __exit__(self, *args):
949 self.restore_console_title()
950 self.close()
951
952 def close(self):
953 self.save_cookies()
954 self._request_director.close()
955
956 def trouble(self, message=None, tb=None, is_error=True):
957 """Determine action to take when a download problem appears.
958
959 Depending on if the downloader has been configured to ignore
960 download errors or not, this method may throw an exception or
961 not when errors are found, after printing the message.
962
963 @param tb If given, is additional traceback information
964 @param is_error Whether to raise error according to ignorerrors
965 """
966 if message is not None:
967 self.to_stderr(message)
968 if self.params.get('verbose'):
969 if tb is None:
970 if sys.exc_info()[0]: # if .trouble has been called from an except block
971 tb = ''
972 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
973 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
974 tb += encode_compat_str(traceback.format_exc())
975 else:
976 tb_data = traceback.format_list(traceback.extract_stack())
977 tb = ''.join(tb_data)
978 if tb:
979 self.to_stderr(tb)
980 if not is_error:
981 return
982 if not self.params.get('ignoreerrors'):
983 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
984 exc_info = sys.exc_info()[1].exc_info
985 else:
986 exc_info = sys.exc_info()
987 raise DownloadError(message, exc_info)
988 self._download_retcode = 1
989
990 Styles = Namespace(
991 HEADERS='yellow',
992 EMPHASIS='light blue',
993 FILENAME='green',
994 ID='green',
995 DELIM='blue',
996 ERROR='red',
997 BAD_FORMAT='light red',
998 WARNING='yellow',
999 SUPPRESS='light black',
1000 )
1001
1002 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1003 text = str(text)
1004 if test_encoding:
1005 original_text = text
1006 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1007 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1008 text = text.encode(encoding, 'ignore').decode(encoding)
1009 if fallback is not None and text != original_text:
1010 text = fallback
1011 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1012
1013 def _format_out(self, *args, **kwargs):
1014 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1015
1016 def _format_screen(self, *args, **kwargs):
1017 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1018
1019 def _format_err(self, *args, **kwargs):
1020 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1021
1022 def report_warning(self, message, only_once=False):
1023 '''
1024 Print the message to stderr, it will be prefixed with 'WARNING:'
1025 If stderr is a tty file the 'WARNING:' will be colored
1026 '''
1027 if self.params.get('logger') is not None:
1028 self.params['logger'].warning(message)
1029 else:
1030 if self.params.get('no_warnings'):
1031 return
1032 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1033
1034 def deprecation_warning(self, message, *, stacklevel=0):
1035 deprecation_warning(
1036 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1037
1038 def deprecated_feature(self, message):
1039 if self.params.get('logger') is not None:
1040 self.params['logger'].warning(f'Deprecated Feature: {message}')
1041 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1042
1043 def report_error(self, message, *args, **kwargs):
1044 '''
1045 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1046 in red if stderr is a tty file.
1047 '''
1048 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1049
1050 def write_debug(self, message, only_once=False):
1051 '''Log debug message or Print message to stderr'''
1052 if not self.params.get('verbose', False):
1053 return
1054 message = f'[debug] {message}'
1055 if self.params.get('logger'):
1056 self.params['logger'].debug(message)
1057 else:
1058 self.to_stderr(message, only_once)
1059
1060 def report_file_already_downloaded(self, file_name):
1061 """Report file has already been fully downloaded."""
1062 try:
1063 self.to_screen('[download] %s has already been downloaded' % file_name)
1064 except UnicodeEncodeError:
1065 self.to_screen('[download] The file has already been downloaded')
1066
1067 def report_file_delete(self, file_name):
1068 """Report that existing file will be deleted."""
1069 try:
1070 self.to_screen('Deleting existing file %s' % file_name)
1071 except UnicodeEncodeError:
1072 self.to_screen('Deleting existing file')
1073
1074 def raise_no_formats(self, info, forced=False, *, msg=None):
1075 has_drm = info.get('_has_drm')
1076 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1077 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1078 if forced or not ignored:
1079 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1080 expected=has_drm or ignored or expected)
1081 else:
1082 self.report_warning(msg)
1083
1084 def parse_outtmpl(self):
1085 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1086 self._parse_outtmpl()
1087 return self.params['outtmpl']
1088
1089 def _parse_outtmpl(self):
1090 sanitize = IDENTITY
1091 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1092 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1093
1094 outtmpl = self.params.setdefault('outtmpl', {})
1095 if not isinstance(outtmpl, dict):
1096 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1097 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1098
1099 def get_output_path(self, dir_type='', filename=None):
1100 paths = self.params.get('paths', {})
1101 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1102 path = os.path.join(
1103 expand_path(paths.get('home', '').strip()),
1104 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1105 filename or '')
1106 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1107
1108 @staticmethod
1109 def _outtmpl_expandpath(outtmpl):
1110 # expand_path translates '%%' into '%' and '$$' into '$'
1111 # correspondingly that is not what we want since we need to keep
1112 # '%%' intact for template dict substitution step. Working around
1113 # with boundary-alike separator hack.
1114 sep = ''.join(random.choices(string.ascii_letters, k=32))
1115 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1116
1117 # outtmpl should be expand_path'ed before template dict substitution
1118 # because meta fields may contain env variables we don't want to
1119 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1120 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1121 return expand_path(outtmpl).replace(sep, '')
1122
1123 @staticmethod
1124 def escape_outtmpl(outtmpl):
1125 ''' Escape any remaining strings like %s, %abc% etc. '''
1126 return re.sub(
1127 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1128 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1129 outtmpl)
1130
1131 @classmethod
1132 def validate_outtmpl(cls, outtmpl):
1133 ''' @return None or Exception object '''
1134 outtmpl = re.sub(
1135 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1136 lambda mobj: f'{mobj.group(0)[:-1]}s',
1137 cls._outtmpl_expandpath(outtmpl))
1138 try:
1139 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1140 return None
1141 except ValueError as err:
1142 return err
1143
1144 @staticmethod
1145 def _copy_infodict(info_dict):
1146 info_dict = dict(info_dict)
1147 info_dict.pop('__postprocessors', None)
1148 info_dict.pop('__pending_error', None)
1149 return info_dict
1150
1151 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1152 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1153 @param sanitize Whether to sanitize the output as a filename.
1154 For backward compatibility, a function can also be passed
1155 """
1156
1157 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1158
1159 info_dict = self._copy_infodict(info_dict)
1160 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1161 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1162 if info_dict.get('duration', None) is not None
1163 else None)
1164 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1165 info_dict['video_autonumber'] = self._num_videos
1166 if info_dict.get('resolution') is None:
1167 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1168
1169 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1170 # of %(field)s to %(field)0Nd for backward compatibility
1171 field_size_compat_map = {
1172 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1173 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1174 'autonumber': self.params.get('autonumber_size') or 5,
1175 }
1176
1177 TMPL_DICT = {}
1178 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1179 MATH_FUNCTIONS = {
1180 '+': float.__add__,
1181 '-': float.__sub__,
1182 '*': float.__mul__,
1183 }
1184 # Field is of the form key1.key2...
1185 # where keys (except first) can be string, int, slice or "{field, ...}"
1186 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1187 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1188 'inner': FIELD_INNER_RE,
1189 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1190 }
1191 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1192 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1193 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1194 (?P<negate>-)?
1195 (?P<fields>{FIELD_RE})
1196 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1197 (?:>(?P<strf_format>.+?))?
1198 (?P<remaining>
1199 (?P<alternate>(?<!\\),[^|&)]+)?
1200 (?:&(?P<replacement>.*?))?
1201 (?:\|(?P<default>.*?))?
1202 )$''')
1203
1204 def _traverse_infodict(fields):
1205 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1206 for f in ([x] if x.startswith('{') else x.split('.'))]
1207 for i in (0, -1):
1208 if fields and not fields[i]:
1209 fields.pop(i)
1210
1211 for i, f in enumerate(fields):
1212 if not f.startswith('{'):
1213 continue
1214 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1215 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1216
1217 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1218
1219 def get_value(mdict):
1220 # Object traversal
1221 value = _traverse_infodict(mdict['fields'])
1222 # Negative
1223 if mdict['negate']:
1224 value = float_or_none(value)
1225 if value is not None:
1226 value *= -1
1227 # Do maths
1228 offset_key = mdict['maths']
1229 if offset_key:
1230 value = float_or_none(value)
1231 operator = None
1232 while offset_key:
1233 item = re.match(
1234 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1235 offset_key).group(0)
1236 offset_key = offset_key[len(item):]
1237 if operator is None:
1238 operator = MATH_FUNCTIONS[item]
1239 continue
1240 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1241 offset = float_or_none(item)
1242 if offset is None:
1243 offset = float_or_none(_traverse_infodict(item))
1244 try:
1245 value = operator(value, multiplier * offset)
1246 except (TypeError, ZeroDivisionError):
1247 return None
1248 operator = None
1249 # Datetime formatting
1250 if mdict['strf_format']:
1251 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1252
1253 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1254 if sanitize and value == '':
1255 value = None
1256 return value
1257
1258 na = self.params.get('outtmpl_na_placeholder', 'NA')
1259
1260 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1261 return sanitize_filename(str(value), restricted=restricted, is_id=(
1262 bool(re.search(r'(^|[_.])id(\.|$)', key))
1263 if 'filename-sanitization' in self.params['compat_opts']
1264 else NO_DEFAULT))
1265
1266 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1267 sanitize = bool(sanitize)
1268
1269 def _dumpjson_default(obj):
1270 if isinstance(obj, (set, LazyList)):
1271 return list(obj)
1272 return repr(obj)
1273
1274 class _ReplacementFormatter(string.Formatter):
1275 def get_field(self, field_name, args, kwargs):
1276 if field_name.isdigit():
1277 return args[0], -1
1278 raise ValueError('Unsupported field')
1279
1280 replacement_formatter = _ReplacementFormatter()
1281
1282 def create_key(outer_mobj):
1283 if not outer_mobj.group('has_key'):
1284 return outer_mobj.group(0)
1285 key = outer_mobj.group('key')
1286 mobj = re.match(INTERNAL_FORMAT_RE, key)
1287 value, replacement, default, last_field = None, None, na, ''
1288 while mobj:
1289 mobj = mobj.groupdict()
1290 default = mobj['default'] if mobj['default'] is not None else default
1291 value = get_value(mobj)
1292 last_field, replacement = mobj['fields'], mobj['replacement']
1293 if value is None and mobj['alternate']:
1294 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1295 else:
1296 break
1297
1298 if None not in (value, replacement):
1299 try:
1300 value = replacement_formatter.format(replacement, value)
1301 except ValueError:
1302 value, default = None, na
1303
1304 fmt = outer_mobj.group('format')
1305 if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1306 fmt = f'0{field_size_compat_map[last_field]:d}d'
1307
1308 flags = outer_mobj.group('conversion') or ''
1309 str_fmt = f'{fmt[:-1]}s'
1310 if value is None:
1311 value, fmt = default, 's'
1312 elif fmt[-1] == 'l': # list
1313 delim = '\n' if '#' in flags else ', '
1314 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1315 elif fmt[-1] == 'j': # json
1316 value, fmt = json.dumps(
1317 value, default=_dumpjson_default,
1318 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1319 elif fmt[-1] == 'h': # html
1320 value, fmt = escapeHTML(str(value)), str_fmt
1321 elif fmt[-1] == 'q': # quoted
1322 value = map(str, variadic(value) if '#' in flags else [value])
1323 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1324 elif fmt[-1] == 'B': # bytes
1325 value = f'%{str_fmt}'.encode() % str(value).encode()
1326 value, fmt = value.decode('utf-8', 'ignore'), 's'
1327 elif fmt[-1] == 'U': # unicode normalized
1328 value, fmt = unicodedata.normalize(
1329 # "+" = compatibility equivalence, "#" = NFD
1330 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1331 value), str_fmt
1332 elif fmt[-1] == 'D': # decimal suffix
1333 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1334 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1335 factor=1024 if '#' in flags else 1000)
1336 elif fmt[-1] == 'S': # filename sanitization
1337 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1338 elif fmt[-1] == 'c':
1339 if value:
1340 value = str(value)[0]
1341 else:
1342 fmt = str_fmt
1343 elif fmt[-1] not in 'rsa': # numeric
1344 value = float_or_none(value)
1345 if value is None:
1346 value, fmt = default, 's'
1347
1348 if sanitize:
1349 # If value is an object, sanitize might convert it to a string
1350 # So we convert it to repr first
1351 if fmt[-1] == 'r':
1352 value, fmt = repr(value), str_fmt
1353 elif fmt[-1] == 'a':
1354 value, fmt = ascii(value), str_fmt
1355 if fmt[-1] in 'csra':
1356 value = sanitizer(last_field, value)
1357
1358 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1359 TMPL_DICT[key] = value
1360 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1361
1362 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1363
1364 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1365 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1366 return self.escape_outtmpl(outtmpl) % info_dict
1367
1368 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1369 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1370 if outtmpl is None:
1371 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1372 try:
1373 outtmpl = self._outtmpl_expandpath(outtmpl)
1374 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1375 if not filename:
1376 return None
1377
1378 if tmpl_type in ('', 'temp'):
1379 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1380 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1381 filename = replace_extension(filename, ext, final_ext)
1382 elif tmpl_type:
1383 force_ext = OUTTMPL_TYPES[tmpl_type]
1384 if force_ext:
1385 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1386
1387 # https://github.com/blackjack4494/youtube-dlc/issues/85
1388 trim_file_name = self.params.get('trim_file_name', False)
1389 if trim_file_name:
1390 no_ext, *ext = filename.rsplit('.', 2)
1391 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1392
1393 return filename
1394 except ValueError as err:
1395 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1396 return None
1397
1398 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1399 """Generate the output filename"""
1400 if outtmpl:
1401 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1402 dir_type = None
1403 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1404 if not filename and dir_type not in ('', 'temp'):
1405 return ''
1406
1407 if warn:
1408 if not self.params.get('paths'):
1409 pass
1410 elif filename == '-':
1411 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1412 elif os.path.isabs(filename):
1413 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1414 if filename == '-' or not filename:
1415 return filename
1416
1417 return self.get_output_path(dir_type, filename)
1418
1419 def _match_entry(self, info_dict, incomplete=False, silent=False):
1420 """Returns None if the file should be downloaded"""
1421 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1422 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1423
1424 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1425
1426 def check_filter():
1427 if _type in ('playlist', 'multi_video'):
1428 return
1429 elif _type in ('url', 'url_transparent') and not try_call(
1430 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1431 return
1432
1433 if 'title' in info_dict:
1434 # This can happen when we're just evaluating the playlist
1435 title = info_dict['title']
1436 matchtitle = self.params.get('matchtitle', False)
1437 if matchtitle:
1438 if not re.search(matchtitle, title, re.IGNORECASE):
1439 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1440 rejecttitle = self.params.get('rejecttitle', False)
1441 if rejecttitle:
1442 if re.search(rejecttitle, title, re.IGNORECASE):
1443 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1444
1445 date = info_dict.get('upload_date')
1446 if date is not None:
1447 dateRange = self.params.get('daterange', DateRange())
1448 if date not in dateRange:
1449 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1450 view_count = info_dict.get('view_count')
1451 if view_count is not None:
1452 min_views = self.params.get('min_views')
1453 if min_views is not None and view_count < min_views:
1454 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1455 max_views = self.params.get('max_views')
1456 if max_views is not None and view_count > max_views:
1457 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1458 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1459 return 'Skipping "%s" because it is age restricted' % video_title
1460
1461 match_filter = self.params.get('match_filter')
1462 if match_filter is None:
1463 return None
1464
1465 cancelled = None
1466 try:
1467 try:
1468 ret = match_filter(info_dict, incomplete=incomplete)
1469 except TypeError:
1470 # For backward compatibility
1471 ret = None if incomplete else match_filter(info_dict)
1472 except DownloadCancelled as err:
1473 if err.msg is not NO_DEFAULT:
1474 raise
1475 ret, cancelled = err.msg, err
1476
1477 if ret is NO_DEFAULT:
1478 while True:
1479 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1480 reply = input(self._format_screen(
1481 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1482 if reply in {'y', ''}:
1483 return None
1484 elif reply == 'n':
1485 if cancelled:
1486 raise type(cancelled)(f'Skipping {video_title}')
1487 return f'Skipping {video_title}'
1488 return ret
1489
1490 if self.in_download_archive(info_dict):
1491 reason = ''.join((
1492 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1493 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1494 'has already been recorded in the archive'))
1495 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1496 else:
1497 try:
1498 reason = check_filter()
1499 except DownloadCancelled as e:
1500 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1501 else:
1502 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1503 if reason is not None:
1504 if not silent:
1505 self.to_screen('[download] ' + reason)
1506 if self.params.get(break_opt, False):
1507 raise break_err()
1508 return reason
1509
1510 @staticmethod
1511 def add_extra_info(info_dict, extra_info):
1512 '''Set the keys from extra_info in info dict if they are missing'''
1513 for key, value in extra_info.items():
1514 info_dict.setdefault(key, value)
1515
1516 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1517 process=True, force_generic_extractor=False):
1518 """
1519 Extract and return the information dictionary of the URL
1520
1521 Arguments:
1522 @param url URL to extract
1523
1524 Keyword arguments:
1525 @param download Whether to download videos
1526 @param process Whether to resolve all unresolved references (URLs, playlist items).
1527 Must be True for download to work
1528 @param ie_key Use only the extractor with this key
1529
1530 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1531 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1532 """
1533
1534 if extra_info is None:
1535 extra_info = {}
1536
1537 if not ie_key and force_generic_extractor:
1538 ie_key = 'Generic'
1539
1540 if ie_key:
1541 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1542 else:
1543 ies = self._ies
1544
1545 for key, ie in ies.items():
1546 if not ie.suitable(url):
1547 continue
1548
1549 if not ie.working():
1550 self.report_warning('The program functionality for this site has been marked as broken, '
1551 'and will probably not work.')
1552
1553 temp_id = ie.get_temp_id(url)
1554 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1555 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1556 'has already been recorded in the archive')
1557 if self.params.get('break_on_existing', False):
1558 raise ExistingVideoReached()
1559 break
1560 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1561 else:
1562 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1563 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1564 tb=False if extractors_restricted else None)
1565
1566 def _handle_extraction_exceptions(func):
1567 @functools.wraps(func)
1568 def wrapper(self, *args, **kwargs):
1569 while True:
1570 try:
1571 return func(self, *args, **kwargs)
1572 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1573 raise
1574 except ReExtractInfo as e:
1575 if e.expected:
1576 self.to_screen(f'{e}; Re-extracting data')
1577 else:
1578 self.to_stderr('\r')
1579 self.report_warning(f'{e}; Re-extracting data')
1580 continue
1581 except GeoRestrictedError as e:
1582 msg = e.msg
1583 if e.countries:
1584 msg += '\nThis video is available in %s.' % ', '.join(
1585 map(ISO3166Utils.short2full, e.countries))
1586 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1587 self.report_error(msg)
1588 except ExtractorError as e: # An error we somewhat expected
1589 self.report_error(str(e), e.format_traceback())
1590 except Exception as e:
1591 if self.params.get('ignoreerrors'):
1592 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1593 else:
1594 raise
1595 break
1596 return wrapper
1597
1598 def _wait_for_video(self, ie_result={}):
1599 if (not self.params.get('wait_for_video')
1600 or ie_result.get('_type', 'video') != 'video'
1601 or ie_result.get('formats') or ie_result.get('url')):
1602 return
1603
1604 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1605 last_msg = ''
1606
1607 def progress(msg):
1608 nonlocal last_msg
1609 full_msg = f'{msg}\n'
1610 if not self.params.get('noprogress'):
1611 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1612 elif last_msg:
1613 return
1614 self.to_screen(full_msg, skip_eol=True)
1615 last_msg = msg
1616
1617 min_wait, max_wait = self.params.get('wait_for_video')
1618 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1619 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1620 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1621 self.report_warning('Release time of video is not known')
1622 elif ie_result and (diff or 0) <= 0:
1623 self.report_warning('Video should already be available according to extracted info')
1624 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1625 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1626
1627 wait_till = time.time() + diff
1628 try:
1629 while True:
1630 diff = wait_till - time.time()
1631 if diff <= 0:
1632 progress('')
1633 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1634 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1635 time.sleep(1)
1636 except KeyboardInterrupt:
1637 progress('')
1638 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1639 except BaseException as e:
1640 if not isinstance(e, ReExtractInfo):
1641 self.to_screen('')
1642 raise
1643
1644 def _load_cookies(self, data, *, autoscope=True):
1645 """Loads cookies from a `Cookie` header
1646
1647 This tries to work around the security vulnerability of passing cookies to every domain.
1648 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1649
1650 @param data The Cookie header as string to load the cookies from
1651 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1652 If `True`, save cookies for later to be stored in the jar with a limited scope
1653 If a URL, save cookies in the jar with the domain of the URL
1654 """
1655 for cookie in LenientSimpleCookie(data).values():
1656 if autoscope and any(cookie.values()):
1657 raise ValueError('Invalid syntax in Cookie Header')
1658
1659 domain = cookie.get('domain') or ''
1660 expiry = cookie.get('expires')
1661 if expiry == '': # 0 is valid
1662 expiry = None
1663 prepared_cookie = http.cookiejar.Cookie(
1664 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1665 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1666 cookie.get('secure') or False, expiry, False, None, None, {})
1667
1668 if domain:
1669 self.cookiejar.set_cookie(prepared_cookie)
1670 elif autoscope is True:
1671 self.deprecated_feature(
1672 'Passing cookies as a header is a potential security risk; '
1673 'they will be scoped to the domain of the downloaded urls. '
1674 'Please consider loading cookies from a file or browser instead.')
1675 self.__header_cookies.append(prepared_cookie)
1676 elif autoscope:
1677 self.report_warning(
1678 'The extractor result contains an unscoped cookie as an HTTP header. '
1679 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1680 only_once=True)
1681 self._apply_header_cookies(autoscope, [prepared_cookie])
1682 else:
1683 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1684 tb=False, is_error=False)
1685
1686 def _apply_header_cookies(self, url, cookies=None):
1687 """Applies stray header cookies to the provided url
1688
1689 This loads header cookies and scopes them to the domain provided in `url`.
1690 While this is not ideal, it helps reduce the risk of them being sent
1691 to an unintended destination while mostly maintaining compatibility.
1692 """
1693 parsed = urllib.parse.urlparse(url)
1694 if not parsed.hostname:
1695 return
1696
1697 for cookie in map(copy.copy, cookies or self.__header_cookies):
1698 cookie.domain = f'.{parsed.hostname}'
1699 self.cookiejar.set_cookie(cookie)
1700
1701 @_handle_extraction_exceptions
1702 def __extract_info(self, url, ie, download, extra_info, process):
1703 self._apply_header_cookies(url)
1704
1705 try:
1706 ie_result = ie.extract(url)
1707 except UserNotLive as e:
1708 if process:
1709 if self.params.get('wait_for_video'):
1710 self.report_warning(e)
1711 self._wait_for_video()
1712 raise
1713 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1714 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1715 return
1716 if isinstance(ie_result, list):
1717 # Backwards compatibility: old IE result format
1718 ie_result = {
1719 '_type': 'compat_list',
1720 'entries': ie_result,
1721 }
1722 if extra_info.get('original_url'):
1723 ie_result.setdefault('original_url', extra_info['original_url'])
1724 self.add_default_extra_info(ie_result, ie, url)
1725 if process:
1726 self._wait_for_video(ie_result)
1727 return self.process_ie_result(ie_result, download, extra_info)
1728 else:
1729 return ie_result
1730
1731 def add_default_extra_info(self, ie_result, ie, url):
1732 if url is not None:
1733 self.add_extra_info(ie_result, {
1734 'webpage_url': url,
1735 'original_url': url,
1736 })
1737 webpage_url = ie_result.get('webpage_url')
1738 if webpage_url:
1739 self.add_extra_info(ie_result, {
1740 'webpage_url_basename': url_basename(webpage_url),
1741 'webpage_url_domain': get_domain(webpage_url),
1742 })
1743 if ie is not None:
1744 self.add_extra_info(ie_result, {
1745 'extractor': ie.IE_NAME,
1746 'extractor_key': ie.ie_key(),
1747 })
1748
1749 def process_ie_result(self, ie_result, download=True, extra_info=None):
1750 """
1751 Take the result of the ie(may be modified) and resolve all unresolved
1752 references (URLs, playlist items).
1753
1754 It will also download the videos if 'download'.
1755 Returns the resolved ie_result.
1756 """
1757 if extra_info is None:
1758 extra_info = {}
1759 result_type = ie_result.get('_type', 'video')
1760
1761 if result_type in ('url', 'url_transparent'):
1762 ie_result['url'] = sanitize_url(
1763 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1764 if ie_result.get('original_url') and not extra_info.get('original_url'):
1765 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1766
1767 extract_flat = self.params.get('extract_flat', False)
1768 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1769 or extract_flat is True):
1770 info_copy = ie_result.copy()
1771 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1772 if ie and not ie_result.get('id'):
1773 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1774 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1775 self.add_extra_info(info_copy, extra_info)
1776 info_copy, _ = self.pre_process(info_copy)
1777 self._fill_common_fields(info_copy, False)
1778 self.__forced_printings(info_copy)
1779 self._raise_pending_errors(info_copy)
1780 if self.params.get('force_write_download_archive', False):
1781 self.record_download_archive(info_copy)
1782 return ie_result
1783
1784 if result_type == 'video':
1785 self.add_extra_info(ie_result, extra_info)
1786 ie_result = self.process_video_result(ie_result, download=download)
1787 self._raise_pending_errors(ie_result)
1788 additional_urls = (ie_result or {}).get('additional_urls')
1789 if additional_urls:
1790 # TODO: Improve MetadataParserPP to allow setting a list
1791 if isinstance(additional_urls, str):
1792 additional_urls = [additional_urls]
1793 self.to_screen(
1794 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1795 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1796 ie_result['additional_entries'] = [
1797 self.extract_info(
1798 url, download, extra_info=extra_info,
1799 force_generic_extractor=self.params.get('force_generic_extractor'))
1800 for url in additional_urls
1801 ]
1802 return ie_result
1803 elif result_type == 'url':
1804 # We have to add extra_info to the results because it may be
1805 # contained in a playlist
1806 return self.extract_info(
1807 ie_result['url'], download,
1808 ie_key=ie_result.get('ie_key'),
1809 extra_info=extra_info)
1810 elif result_type == 'url_transparent':
1811 # Use the information from the embedding page
1812 info = self.extract_info(
1813 ie_result['url'], ie_key=ie_result.get('ie_key'),
1814 extra_info=extra_info, download=False, process=False)
1815
1816 # extract_info may return None when ignoreerrors is enabled and
1817 # extraction failed with an error, don't crash and return early
1818 # in this case
1819 if not info:
1820 return info
1821
1822 exempted_fields = {'_type', 'url', 'ie_key'}
1823 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1824 # For video clips, the id etc of the clip extractor should be used
1825 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1826
1827 new_result = info.copy()
1828 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1829
1830 # Extracted info may not be a video result (i.e.
1831 # info.get('_type', 'video') != video) but rather an url or
1832 # url_transparent. In such cases outer metadata (from ie_result)
1833 # should be propagated to inner one (info). For this to happen
1834 # _type of info should be overridden with url_transparent. This
1835 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1836 if new_result.get('_type') == 'url':
1837 new_result['_type'] = 'url_transparent'
1838
1839 return self.process_ie_result(
1840 new_result, download=download, extra_info=extra_info)
1841 elif result_type in ('playlist', 'multi_video'):
1842 # Protect from infinite recursion due to recursively nested playlists
1843 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1844 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1845 if webpage_url and webpage_url in self._playlist_urls:
1846 self.to_screen(
1847 '[download] Skipping already downloaded playlist: %s'
1848 % ie_result.get('title') or ie_result.get('id'))
1849 return
1850
1851 self._playlist_level += 1
1852 self._playlist_urls.add(webpage_url)
1853 self._fill_common_fields(ie_result, False)
1854 self._sanitize_thumbnails(ie_result)
1855 try:
1856 return self.__process_playlist(ie_result, download)
1857 finally:
1858 self._playlist_level -= 1
1859 if not self._playlist_level:
1860 self._playlist_urls.clear()
1861 elif result_type == 'compat_list':
1862 self.report_warning(
1863 'Extractor %s returned a compat_list result. '
1864 'It needs to be updated.' % ie_result.get('extractor'))
1865
1866 def _fixup(r):
1867 self.add_extra_info(r, {
1868 'extractor': ie_result['extractor'],
1869 'webpage_url': ie_result['webpage_url'],
1870 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1871 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1872 'extractor_key': ie_result['extractor_key'],
1873 })
1874 return r
1875 ie_result['entries'] = [
1876 self.process_ie_result(_fixup(r), download, extra_info)
1877 for r in ie_result['entries']
1878 ]
1879 return ie_result
1880 else:
1881 raise Exception('Invalid result type: %s' % result_type)
1882
1883 def _ensure_dir_exists(self, path):
1884 return make_dir(path, self.report_error)
1885
1886 @staticmethod
1887 def _playlist_infodict(ie_result, strict=False, **kwargs):
1888 info = {
1889 'playlist_count': ie_result.get('playlist_count'),
1890 'playlist': ie_result.get('title') or ie_result.get('id'),
1891 'playlist_id': ie_result.get('id'),
1892 'playlist_title': ie_result.get('title'),
1893 'playlist_uploader': ie_result.get('uploader'),
1894 'playlist_uploader_id': ie_result.get('uploader_id'),
1895 **kwargs,
1896 }
1897 if strict:
1898 return info
1899 if ie_result.get('webpage_url'):
1900 info.update({
1901 'webpage_url': ie_result['webpage_url'],
1902 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1903 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1904 })
1905 return {
1906 **info,
1907 'playlist_index': 0,
1908 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1909 'extractor': ie_result['extractor'],
1910 'extractor_key': ie_result['extractor_key'],
1911 }
1912
1913 def __process_playlist(self, ie_result, download):
1914 """Process each entry in the playlist"""
1915 assert ie_result['_type'] in ('playlist', 'multi_video')
1916
1917 common_info = self._playlist_infodict(ie_result, strict=True)
1918 title = common_info.get('playlist') or '<Untitled>'
1919 if self._match_entry(common_info, incomplete=True) is not None:
1920 return
1921 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1922
1923 all_entries = PlaylistEntries(self, ie_result)
1924 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1925
1926 lazy = self.params.get('lazy_playlist')
1927 if lazy:
1928 resolved_entries, n_entries = [], 'N/A'
1929 ie_result['requested_entries'], ie_result['entries'] = None, None
1930 else:
1931 entries = resolved_entries = list(entries)
1932 n_entries = len(resolved_entries)
1933 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1934 if not ie_result.get('playlist_count'):
1935 # Better to do this after potentially exhausting entries
1936 ie_result['playlist_count'] = all_entries.get_full_count()
1937
1938 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1939 ie_copy = collections.ChainMap(ie_result, extra)
1940
1941 _infojson_written = False
1942 write_playlist_files = self.params.get('allow_playlist_files', True)
1943 if write_playlist_files and self.params.get('list_thumbnails'):
1944 self.list_thumbnails(ie_result)
1945 if write_playlist_files and not self.params.get('simulate'):
1946 _infojson_written = self._write_info_json(
1947 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1948 if _infojson_written is None:
1949 return
1950 if self._write_description('playlist', ie_result,
1951 self.prepare_filename(ie_copy, 'pl_description')) is None:
1952 return
1953 # TODO: This should be passed to ThumbnailsConvertor if necessary
1954 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1955
1956 if lazy:
1957 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1958 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1959 elif self.params.get('playlistreverse'):
1960 entries.reverse()
1961 elif self.params.get('playlistrandom'):
1962 random.shuffle(entries)
1963
1964 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1965 f'{format_field(ie_result, "playlist_count", " of %s")}')
1966
1967 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1968 if self.params.get('extract_flat') == 'discard_in_playlist':
1969 keep_resolved_entries = ie_result['_type'] != 'playlist'
1970 if keep_resolved_entries:
1971 self.write_debug('The information of all playlist entries will be held in memory')
1972
1973 failures = 0
1974 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1975 for i, (playlist_index, entry) in enumerate(entries):
1976 if lazy:
1977 resolved_entries.append((playlist_index, entry))
1978 if not entry:
1979 continue
1980
1981 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1982 if not lazy and 'playlist-index' in self.params['compat_opts']:
1983 playlist_index = ie_result['requested_entries'][i]
1984
1985 entry_copy = collections.ChainMap(entry, {
1986 **common_info,
1987 'n_entries': int_or_none(n_entries),
1988 'playlist_index': playlist_index,
1989 'playlist_autonumber': i + 1,
1990 })
1991
1992 if self._match_entry(entry_copy, incomplete=True) is not None:
1993 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1994 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1995 continue
1996
1997 self.to_screen('[download] Downloading item %s of %s' % (
1998 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1999
2000 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2001 'playlist_index': playlist_index,
2002 'playlist_autonumber': i + 1,
2003 }, extra))
2004 if not entry_result:
2005 failures += 1
2006 if failures >= max_failures:
2007 self.report_error(
2008 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2009 break
2010 if keep_resolved_entries:
2011 resolved_entries[i] = (playlist_index, entry_result)
2012
2013 # Update with processed data
2014 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2015 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2016 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2017 # Do not set for full playlist
2018 ie_result.pop('requested_entries')
2019
2020 # Write the updated info to json
2021 if _infojson_written is True and self._write_info_json(
2022 'updated playlist', ie_result,
2023 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2024 return
2025
2026 ie_result = self.run_all_pps('playlist', ie_result)
2027 self.to_screen(f'[download] Finished downloading playlist: {title}')
2028 return ie_result
2029
2030 @_handle_extraction_exceptions
2031 def __process_iterable_entry(self, entry, download, extra_info):
2032 return self.process_ie_result(
2033 entry, download=download, extra_info=extra_info)
2034
2035 def _build_format_filter(self, filter_spec):
2036 " Returns a function to filter the formats according to the filter_spec "
2037
2038 OPERATORS = {
2039 '<': operator.lt,
2040 '<=': operator.le,
2041 '>': operator.gt,
2042 '>=': operator.ge,
2043 '=': operator.eq,
2044 '!=': operator.ne,
2045 }
2046 operator_rex = re.compile(r'''(?x)\s*
2047 (?P<key>[\w.-]+)\s*
2048 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2049 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2050 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2051 m = operator_rex.fullmatch(filter_spec)
2052 if m:
2053 try:
2054 comparison_value = int(m.group('value'))
2055 except ValueError:
2056 comparison_value = parse_filesize(m.group('value'))
2057 if comparison_value is None:
2058 comparison_value = parse_filesize(m.group('value') + 'B')
2059 if comparison_value is None:
2060 raise ValueError(
2061 'Invalid value %r in format specification %r' % (
2062 m.group('value'), filter_spec))
2063 op = OPERATORS[m.group('op')]
2064
2065 if not m:
2066 STR_OPERATORS = {
2067 '=': operator.eq,
2068 '^=': lambda attr, value: attr.startswith(value),
2069 '$=': lambda attr, value: attr.endswith(value),
2070 '*=': lambda attr, value: value in attr,
2071 '~=': lambda attr, value: value.search(attr) is not None
2072 }
2073 str_operator_rex = re.compile(r'''(?x)\s*
2074 (?P<key>[a-zA-Z0-9._-]+)\s*
2075 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2076 (?P<quote>["'])?
2077 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2078 (?(quote)(?P=quote))\s*
2079 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2080 m = str_operator_rex.fullmatch(filter_spec)
2081 if m:
2082 if m.group('op') == '~=':
2083 comparison_value = re.compile(m.group('value'))
2084 else:
2085 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2086 str_op = STR_OPERATORS[m.group('op')]
2087 if m.group('negation'):
2088 op = lambda attr, value: not str_op(attr, value)
2089 else:
2090 op = str_op
2091
2092 if not m:
2093 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2094
2095 def _filter(f):
2096 actual_value = f.get(m.group('key'))
2097 if actual_value is None:
2098 return m.group('none_inclusive')
2099 return op(actual_value, comparison_value)
2100 return _filter
2101
2102 def _check_formats(self, formats):
2103 for f in formats:
2104 self.to_screen('[info] Testing format %s' % f['format_id'])
2105 path = self.get_output_path('temp')
2106 if not self._ensure_dir_exists(f'{path}/'):
2107 continue
2108 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2109 temp_file.close()
2110 try:
2111 success, _ = self.dl(temp_file.name, f, test=True)
2112 except (DownloadError, OSError, ValueError) + network_exceptions:
2113 success = False
2114 finally:
2115 if os.path.exists(temp_file.name):
2116 try:
2117 os.remove(temp_file.name)
2118 except OSError:
2119 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2120 if success:
2121 yield f
2122 else:
2123 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2124
2125 def _default_format_spec(self, info_dict, download=True):
2126
2127 def can_merge():
2128 merger = FFmpegMergerPP(self)
2129 return merger.available and merger.can_merge()
2130
2131 prefer_best = (
2132 not self.params.get('simulate')
2133 and download
2134 and (
2135 not can_merge()
2136 or info_dict.get('is_live') and not self.params.get('live_from_start')
2137 or self.params['outtmpl']['default'] == '-'))
2138 compat = (
2139 prefer_best
2140 or self.params.get('allow_multiple_audio_streams', False)
2141 or 'format-spec' in self.params['compat_opts'])
2142
2143 return (
2144 'best/bestvideo+bestaudio' if prefer_best
2145 else 'bestvideo*+bestaudio/best' if not compat
2146 else 'bestvideo+bestaudio/best')
2147
2148 def build_format_selector(self, format_spec):
2149 def syntax_error(note, start):
2150 message = (
2151 'Invalid format specification: '
2152 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2153 return SyntaxError(message)
2154
2155 PICKFIRST = 'PICKFIRST'
2156 MERGE = 'MERGE'
2157 SINGLE = 'SINGLE'
2158 GROUP = 'GROUP'
2159 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2160
2161 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2162 'video': self.params.get('allow_multiple_video_streams', False)}
2163
2164 def _parse_filter(tokens):
2165 filter_parts = []
2166 for type, string_, start, _, _ in tokens:
2167 if type == tokenize.OP and string_ == ']':
2168 return ''.join(filter_parts)
2169 else:
2170 filter_parts.append(string_)
2171
2172 def _remove_unused_ops(tokens):
2173 # Remove operators that we don't use and join them with the surrounding strings.
2174 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2175 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2176 last_string, last_start, last_end, last_line = None, None, None, None
2177 for type, string_, start, end, line in tokens:
2178 if type == tokenize.OP and string_ == '[':
2179 if last_string:
2180 yield tokenize.NAME, last_string, last_start, last_end, last_line
2181 last_string = None
2182 yield type, string_, start, end, line
2183 # everything inside brackets will be handled by _parse_filter
2184 for type, string_, start, end, line in tokens:
2185 yield type, string_, start, end, line
2186 if type == tokenize.OP and string_ == ']':
2187 break
2188 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2189 if last_string:
2190 yield tokenize.NAME, last_string, last_start, last_end, last_line
2191 last_string = None
2192 yield type, string_, start, end, line
2193 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2194 if not last_string:
2195 last_string = string_
2196 last_start = start
2197 last_end = end
2198 else:
2199 last_string += string_
2200 if last_string:
2201 yield tokenize.NAME, last_string, last_start, last_end, last_line
2202
2203 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2204 selectors = []
2205 current_selector = None
2206 for type, string_, start, _, _ in tokens:
2207 # ENCODING is only defined in python 3.x
2208 if type == getattr(tokenize, 'ENCODING', None):
2209 continue
2210 elif type in [tokenize.NAME, tokenize.NUMBER]:
2211 current_selector = FormatSelector(SINGLE, string_, [])
2212 elif type == tokenize.OP:
2213 if string_ == ')':
2214 if not inside_group:
2215 # ')' will be handled by the parentheses group
2216 tokens.restore_last_token()
2217 break
2218 elif inside_merge and string_ in ['/', ',']:
2219 tokens.restore_last_token()
2220 break
2221 elif inside_choice and string_ == ',':
2222 tokens.restore_last_token()
2223 break
2224 elif string_ == ',':
2225 if not current_selector:
2226 raise syntax_error('"," must follow a format selector', start)
2227 selectors.append(current_selector)
2228 current_selector = None
2229 elif string_ == '/':
2230 if not current_selector:
2231 raise syntax_error('"/" must follow a format selector', start)
2232 first_choice = current_selector
2233 second_choice = _parse_format_selection(tokens, inside_choice=True)
2234 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2235 elif string_ == '[':
2236 if not current_selector:
2237 current_selector = FormatSelector(SINGLE, 'best', [])
2238 format_filter = _parse_filter(tokens)
2239 current_selector.filters.append(format_filter)
2240 elif string_ == '(':
2241 if current_selector:
2242 raise syntax_error('Unexpected "("', start)
2243 group = _parse_format_selection(tokens, inside_group=True)
2244 current_selector = FormatSelector(GROUP, group, [])
2245 elif string_ == '+':
2246 if not current_selector:
2247 raise syntax_error('Unexpected "+"', start)
2248 selector_1 = current_selector
2249 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2250 if not selector_2:
2251 raise syntax_error('Expected a selector', start)
2252 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2253 else:
2254 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2255 elif type == tokenize.ENDMARKER:
2256 break
2257 if current_selector:
2258 selectors.append(current_selector)
2259 return selectors
2260
2261 def _merge(formats_pair):
2262 format_1, format_2 = formats_pair
2263
2264 formats_info = []
2265 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2266 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2267
2268 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2269 get_no_more = {'video': False, 'audio': False}
2270 for (i, fmt_info) in enumerate(formats_info):
2271 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2272 formats_info.pop(i)
2273 continue
2274 for aud_vid in ['audio', 'video']:
2275 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2276 if get_no_more[aud_vid]:
2277 formats_info.pop(i)
2278 break
2279 get_no_more[aud_vid] = True
2280
2281 if len(formats_info) == 1:
2282 return formats_info[0]
2283
2284 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2285 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2286
2287 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2288 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2289
2290 output_ext = get_compatible_ext(
2291 vcodecs=[f.get('vcodec') for f in video_fmts],
2292 acodecs=[f.get('acodec') for f in audio_fmts],
2293 vexts=[f['ext'] for f in video_fmts],
2294 aexts=[f['ext'] for f in audio_fmts],
2295 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2296 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2297
2298 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2299
2300 new_dict = {
2301 'requested_formats': formats_info,
2302 'format': '+'.join(filtered('format')),
2303 'format_id': '+'.join(filtered('format_id')),
2304 'ext': output_ext,
2305 'protocol': '+'.join(map(determine_protocol, formats_info)),
2306 'language': '+'.join(orderedSet(filtered('language'))) or None,
2307 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2308 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2309 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2310 }
2311
2312 if the_only_video:
2313 new_dict.update({
2314 'width': the_only_video.get('width'),
2315 'height': the_only_video.get('height'),
2316 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2317 'fps': the_only_video.get('fps'),
2318 'dynamic_range': the_only_video.get('dynamic_range'),
2319 'vcodec': the_only_video.get('vcodec'),
2320 'vbr': the_only_video.get('vbr'),
2321 'stretched_ratio': the_only_video.get('stretched_ratio'),
2322 'aspect_ratio': the_only_video.get('aspect_ratio'),
2323 })
2324
2325 if the_only_audio:
2326 new_dict.update({
2327 'acodec': the_only_audio.get('acodec'),
2328 'abr': the_only_audio.get('abr'),
2329 'asr': the_only_audio.get('asr'),
2330 'audio_channels': the_only_audio.get('audio_channels')
2331 })
2332
2333 return new_dict
2334
2335 def _check_formats(formats):
2336 if self.params.get('check_formats') == 'selected':
2337 yield from self._check_formats(formats)
2338 return
2339 elif (self.params.get('check_formats') is not None
2340 or self.params.get('allow_unplayable_formats')):
2341 yield from formats
2342 return
2343
2344 for f in formats:
2345 if f.get('has_drm') or f.get('__needs_testing'):
2346 yield from self._check_formats([f])
2347 else:
2348 yield f
2349
2350 def _build_selector_function(selector):
2351 if isinstance(selector, list): # ,
2352 fs = [_build_selector_function(s) for s in selector]
2353
2354 def selector_function(ctx):
2355 for f in fs:
2356 yield from f(ctx)
2357 return selector_function
2358
2359 elif selector.type == GROUP: # ()
2360 selector_function = _build_selector_function(selector.selector)
2361
2362 elif selector.type == PICKFIRST: # /
2363 fs = [_build_selector_function(s) for s in selector.selector]
2364
2365 def selector_function(ctx):
2366 for f in fs:
2367 picked_formats = list(f(ctx))
2368 if picked_formats:
2369 return picked_formats
2370 return []
2371
2372 elif selector.type == MERGE: # +
2373 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2374
2375 def selector_function(ctx):
2376 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2377 yield _merge(pair)
2378
2379 elif selector.type == SINGLE: # atom
2380 format_spec = selector.selector or 'best'
2381
2382 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2383 if format_spec == 'all':
2384 def selector_function(ctx):
2385 yield from _check_formats(ctx['formats'][::-1])
2386 elif format_spec == 'mergeall':
2387 def selector_function(ctx):
2388 formats = list(_check_formats(
2389 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2390 if not formats:
2391 return
2392 merged_format = formats[-1]
2393 for f in formats[-2::-1]:
2394 merged_format = _merge((merged_format, f))
2395 yield merged_format
2396
2397 else:
2398 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2399 mobj = re.match(
2400 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2401 format_spec)
2402 if mobj is not None:
2403 format_idx = int_or_none(mobj.group('n'), default=1)
2404 format_reverse = mobj.group('bw')[0] == 'b'
2405 format_type = (mobj.group('type') or [None])[0]
2406 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2407 format_modified = mobj.group('mod') is not None
2408
2409 format_fallback = not format_type and not format_modified # for b, w
2410 _filter_f = (
2411 (lambda f: f.get('%scodec' % format_type) != 'none')
2412 if format_type and format_modified # bv*, ba*, wv*, wa*
2413 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2414 if format_type # bv, ba, wv, wa
2415 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2416 if not format_modified # b, w
2417 else lambda f: True) # b*, w*
2418 filter_f = lambda f: _filter_f(f) and (
2419 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2420 else:
2421 if format_spec in self._format_selection_exts['audio']:
2422 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2423 elif format_spec in self._format_selection_exts['video']:
2424 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2425 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2426 elif format_spec in self._format_selection_exts['storyboards']:
2427 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2428 else:
2429 filter_f = lambda f: f.get('format_id') == format_spec # id
2430
2431 def selector_function(ctx):
2432 formats = list(ctx['formats'])
2433 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2434 if not matches:
2435 if format_fallback and ctx['incomplete_formats']:
2436 # for extractors with incomplete formats (audio only (soundcloud)
2437 # or video only (imgur)) best/worst will fallback to
2438 # best/worst {video,audio}-only format
2439 matches = formats
2440 elif seperate_fallback and not ctx['has_merged_format']:
2441 # for compatibility with youtube-dl when there is no pre-merged format
2442 matches = list(filter(seperate_fallback, formats))
2443 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2444 try:
2445 yield matches[format_idx - 1]
2446 except LazyList.IndexError:
2447 return
2448
2449 filters = [self._build_format_filter(f) for f in selector.filters]
2450
2451 def final_selector(ctx):
2452 ctx_copy = dict(ctx)
2453 for _filter in filters:
2454 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2455 return selector_function(ctx_copy)
2456 return final_selector
2457
2458 stream = io.BytesIO(format_spec.encode())
2459 try:
2460 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2461 except tokenize.TokenError:
2462 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2463
2464 class TokenIterator:
2465 def __init__(self, tokens):
2466 self.tokens = tokens
2467 self.counter = 0
2468
2469 def __iter__(self):
2470 return self
2471
2472 def __next__(self):
2473 if self.counter >= len(self.tokens):
2474 raise StopIteration()
2475 value = self.tokens[self.counter]
2476 self.counter += 1
2477 return value
2478
2479 next = __next__
2480
2481 def restore_last_token(self):
2482 self.counter -= 1
2483
2484 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2485 return _build_selector_function(parsed_selector)
2486
2487 def _calc_headers(self, info_dict, load_cookies=False):
2488 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2489 clean_headers(res)
2490
2491 if load_cookies: # For --load-info-json
2492 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2493 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2494 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2495 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2496 res.pop('Cookie', None)
2497 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2498 if cookies:
2499 encoder = LenientSimpleCookie()
2500 values = []
2501 for cookie in cookies:
2502 _, value = encoder.value_encode(cookie.value)
2503 values.append(f'{cookie.name}={value}')
2504 if cookie.domain:
2505 values.append(f'Domain={cookie.domain}')
2506 if cookie.path:
2507 values.append(f'Path={cookie.path}')
2508 if cookie.secure:
2509 values.append('Secure')
2510 if cookie.expires:
2511 values.append(f'Expires={cookie.expires}')
2512 if cookie.version:
2513 values.append(f'Version={cookie.version}')
2514 info_dict['cookies'] = '; '.join(values)
2515
2516 if 'X-Forwarded-For' not in res:
2517 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2518 if x_forwarded_for_ip:
2519 res['X-Forwarded-For'] = x_forwarded_for_ip
2520
2521 return res
2522
2523 def _calc_cookies(self, url):
2524 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2525 return self.cookiejar.get_cookie_header(url)
2526
2527 def _sort_thumbnails(self, thumbnails):
2528 thumbnails.sort(key=lambda t: (
2529 t.get('preference') if t.get('preference') is not None else -1,
2530 t.get('width') if t.get('width') is not None else -1,
2531 t.get('height') if t.get('height') is not None else -1,
2532 t.get('id') if t.get('id') is not None else '',
2533 t.get('url')))
2534
2535 def _sanitize_thumbnails(self, info_dict):
2536 thumbnails = info_dict.get('thumbnails')
2537 if thumbnails is None:
2538 thumbnail = info_dict.get('thumbnail')
2539 if thumbnail:
2540 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2541 if not thumbnails:
2542 return
2543
2544 def check_thumbnails(thumbnails):
2545 for t in thumbnails:
2546 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2547 try:
2548 self.urlopen(HEADRequest(t['url']))
2549 except network_exceptions as err:
2550 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2551 continue
2552 yield t
2553
2554 self._sort_thumbnails(thumbnails)
2555 for i, t in enumerate(thumbnails):
2556 if t.get('id') is None:
2557 t['id'] = '%d' % i
2558 if t.get('width') and t.get('height'):
2559 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2560 t['url'] = sanitize_url(t['url'])
2561
2562 if self.params.get('check_formats') is True:
2563 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2564 else:
2565 info_dict['thumbnails'] = thumbnails
2566
2567 def _fill_common_fields(self, info_dict, final=True):
2568 # TODO: move sanitization here
2569 if final:
2570 title = info_dict['fulltitle'] = info_dict.get('title')
2571 if not title:
2572 if title == '':
2573 self.write_debug('Extractor gave empty title. Creating a generic title')
2574 else:
2575 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2576 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2577
2578 if info_dict.get('duration') is not None:
2579 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2580
2581 for ts_key, date_key in (
2582 ('timestamp', 'upload_date'),
2583 ('release_timestamp', 'release_date'),
2584 ('modified_timestamp', 'modified_date'),
2585 ):
2586 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2587 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2588 # see http://bugs.python.org/issue1646728)
2589 with contextlib.suppress(ValueError, OverflowError, OSError):
2590 upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
2591 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2592
2593 if not info_dict.get('release_year'):
2594 info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])}))
2595
2596 live_keys = ('is_live', 'was_live')
2597 live_status = info_dict.get('live_status')
2598 if live_status is None:
2599 for key in live_keys:
2600 if info_dict.get(key) is False:
2601 continue
2602 if info_dict.get(key):
2603 live_status = key
2604 break
2605 if all(info_dict.get(key) is False for key in live_keys):
2606 live_status = 'not_live'
2607 if live_status:
2608 info_dict['live_status'] = live_status
2609 for key in live_keys:
2610 if info_dict.get(key) is None:
2611 info_dict[key] = (live_status == key)
2612 if live_status == 'post_live':
2613 info_dict['was_live'] = True
2614
2615 # Auto generate title fields corresponding to the *_number fields when missing
2616 # in order to always have clean titles. This is very common for TV series.
2617 for field in ('chapter', 'season', 'episode'):
2618 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2619 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2620
2621 def _raise_pending_errors(self, info):
2622 err = info.pop('__pending_error', None)
2623 if err:
2624 self.report_error(err, tb=False)
2625
2626 def sort_formats(self, info_dict):
2627 formats = self._get_formats(info_dict)
2628 formats.sort(key=FormatSorter(
2629 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2630
2631 def process_video_result(self, info_dict, download=True):
2632 assert info_dict.get('_type', 'video') == 'video'
2633 self._num_videos += 1
2634
2635 if 'id' not in info_dict:
2636 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2637 elif not info_dict.get('id'):
2638 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2639
2640 def report_force_conversion(field, field_not, conversion):
2641 self.report_warning(
2642 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2643 % (field, field_not, conversion))
2644
2645 def sanitize_string_field(info, string_field):
2646 field = info.get(string_field)
2647 if field is None or isinstance(field, str):
2648 return
2649 report_force_conversion(string_field, 'a string', 'string')
2650 info[string_field] = str(field)
2651
2652 def sanitize_numeric_fields(info):
2653 for numeric_field in self._NUMERIC_FIELDS:
2654 field = info.get(numeric_field)
2655 if field is None or isinstance(field, (int, float)):
2656 continue
2657 report_force_conversion(numeric_field, 'numeric', 'int')
2658 info[numeric_field] = int_or_none(field)
2659
2660 sanitize_string_field(info_dict, 'id')
2661 sanitize_numeric_fields(info_dict)
2662 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2663 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2664 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2665 self.report_warning('"duration" field is negative, there is an error in extractor')
2666
2667 chapters = info_dict.get('chapters') or []
2668 if chapters and chapters[0].get('start_time'):
2669 chapters.insert(0, {'start_time': 0})
2670
2671 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2672 for idx, (prev, current, next_) in enumerate(zip(
2673 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2674 if current.get('start_time') is None:
2675 current['start_time'] = prev.get('end_time')
2676 if not current.get('end_time'):
2677 current['end_time'] = next_.get('start_time')
2678 if not current.get('title'):
2679 current['title'] = f'<Untitled Chapter {idx}>'
2680
2681 if 'playlist' not in info_dict:
2682 # It isn't part of a playlist
2683 info_dict['playlist'] = None
2684 info_dict['playlist_index'] = None
2685
2686 self._sanitize_thumbnails(info_dict)
2687
2688 thumbnail = info_dict.get('thumbnail')
2689 thumbnails = info_dict.get('thumbnails')
2690 if thumbnail:
2691 info_dict['thumbnail'] = sanitize_url(thumbnail)
2692 elif thumbnails:
2693 info_dict['thumbnail'] = thumbnails[-1]['url']
2694
2695 if info_dict.get('display_id') is None and 'id' in info_dict:
2696 info_dict['display_id'] = info_dict['id']
2697
2698 self._fill_common_fields(info_dict)
2699
2700 for cc_kind in ('subtitles', 'automatic_captions'):
2701 cc = info_dict.get(cc_kind)
2702 if cc:
2703 for _, subtitle in cc.items():
2704 for subtitle_format in subtitle:
2705 if subtitle_format.get('url'):
2706 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2707 if subtitle_format.get('ext') is None:
2708 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2709
2710 automatic_captions = info_dict.get('automatic_captions')
2711 subtitles = info_dict.get('subtitles')
2712
2713 info_dict['requested_subtitles'] = self.process_subtitles(
2714 info_dict['id'], subtitles, automatic_captions)
2715
2716 formats = self._get_formats(info_dict)
2717
2718 # Backward compatibility with InfoExtractor._sort_formats
2719 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2720 if field_preference:
2721 info_dict['_format_sort_fields'] = field_preference
2722
2723 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2724 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2725 if not self.params.get('allow_unplayable_formats'):
2726 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2727
2728 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2729 self.report_warning(
2730 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2731 'only images are available for download. Use --list-formats to see them'.capitalize())
2732
2733 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2734 if not get_from_start:
2735 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2736 if info_dict.get('is_live') and formats:
2737 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2738 if get_from_start and not formats:
2739 self.raise_no_formats(info_dict, msg=(
2740 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2741 'If you want to download from the current time, use --no-live-from-start'))
2742
2743 def is_wellformed(f):
2744 url = f.get('url')
2745 if not url:
2746 self.report_warning(
2747 '"url" field is missing or empty - skipping format, '
2748 'there is an error in extractor')
2749 return False
2750 if isinstance(url, bytes):
2751 sanitize_string_field(f, 'url')
2752 return True
2753
2754 # Filter out malformed formats for better extraction robustness
2755 formats = list(filter(is_wellformed, formats or []))
2756
2757 if not formats:
2758 self.raise_no_formats(info_dict)
2759
2760 for format in formats:
2761 sanitize_string_field(format, 'format_id')
2762 sanitize_numeric_fields(format)
2763 format['url'] = sanitize_url(format['url'])
2764 if format.get('ext') is None:
2765 format['ext'] = determine_ext(format['url']).lower()
2766 if format.get('protocol') is None:
2767 format['protocol'] = determine_protocol(format)
2768 if format.get('resolution') is None:
2769 format['resolution'] = self.format_resolution(format, default=None)
2770 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2771 format['dynamic_range'] = 'SDR'
2772 if format.get('aspect_ratio') is None:
2773 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2774 # For fragmented formats, "tbr" is often max bitrate and not average
2775 if (('manifest-filesize-approx' in self.params['compat_opts'] or not format.get('manifest_url'))
2776 and info_dict.get('duration') and format.get('tbr')
2777 and not format.get('filesize') and not format.get('filesize_approx')):
2778 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2779 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2780
2781 # Safeguard against old/insecure infojson when using --load-info-json
2782 if info_dict.get('http_headers'):
2783 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2784 info_dict['http_headers'].pop('Cookie', None)
2785
2786 # This is copied to http_headers by the above _calc_headers and can now be removed
2787 if '__x_forwarded_for_ip' in info_dict:
2788 del info_dict['__x_forwarded_for_ip']
2789
2790 self.sort_formats({
2791 'formats': formats,
2792 '_format_sort_fields': info_dict.get('_format_sort_fields')
2793 })
2794
2795 # Sanitize and group by format_id
2796 formats_dict = {}
2797 for i, format in enumerate(formats):
2798 if not format.get('format_id'):
2799 format['format_id'] = str(i)
2800 else:
2801 # Sanitize format_id from characters used in format selector expression
2802 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2803 formats_dict.setdefault(format['format_id'], []).append(format)
2804
2805 # Make sure all formats have unique format_id
2806 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2807 for format_id, ambiguous_formats in formats_dict.items():
2808 ambigious_id = len(ambiguous_formats) > 1
2809 for i, format in enumerate(ambiguous_formats):
2810 if ambigious_id:
2811 format['format_id'] = '%s-%d' % (format_id, i)
2812 # Ensure there is no conflict between id and ext in format selection
2813 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2814 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2815 format['format_id'] = 'f%s' % format['format_id']
2816
2817 if format.get('format') is None:
2818 format['format'] = '{id} - {res}{note}'.format(
2819 id=format['format_id'],
2820 res=self.format_resolution(format),
2821 note=format_field(format, 'format_note', ' (%s)'),
2822 )
2823
2824 if self.params.get('check_formats') is True:
2825 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2826
2827 if not formats or formats[0] is not info_dict:
2828 # only set the 'formats' fields if the original info_dict list them
2829 # otherwise we end up with a circular reference, the first (and unique)
2830 # element in the 'formats' field in info_dict is info_dict itself,
2831 # which can't be exported to json
2832 info_dict['formats'] = formats
2833
2834 info_dict, _ = self.pre_process(info_dict)
2835
2836 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2837 return info_dict
2838
2839 self.post_extract(info_dict)
2840 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2841
2842 # The pre-processors may have modified the formats
2843 formats = self._get_formats(info_dict)
2844
2845 list_only = self.params.get('simulate') == 'list_only'
2846 interactive_format_selection = not list_only and self.format_selector == '-'
2847 if self.params.get('list_thumbnails'):
2848 self.list_thumbnails(info_dict)
2849 if self.params.get('listsubtitles'):
2850 if 'automatic_captions' in info_dict:
2851 self.list_subtitles(
2852 info_dict['id'], automatic_captions, 'automatic captions')
2853 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2854 if self.params.get('listformats') or interactive_format_selection:
2855 self.list_formats(info_dict)
2856 if list_only:
2857 # Without this printing, -F --print-json will not work
2858 self.__forced_printings(info_dict)
2859 return info_dict
2860
2861 format_selector = self.format_selector
2862 while True:
2863 if interactive_format_selection:
2864 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2865 + '(Press ENTER for default, or Ctrl+C to quit)'
2866 + self._format_screen(': ', self.Styles.EMPHASIS))
2867 try:
2868 format_selector = self.build_format_selector(req_format) if req_format else None
2869 except SyntaxError as err:
2870 self.report_error(err, tb=False, is_error=False)
2871 continue
2872
2873 if format_selector is None:
2874 req_format = self._default_format_spec(info_dict, download=download)
2875 self.write_debug(f'Default format spec: {req_format}')
2876 format_selector = self.build_format_selector(req_format)
2877
2878 formats_to_download = list(format_selector({
2879 'formats': formats,
2880 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2881 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2882 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2883 }))
2884 if interactive_format_selection and not formats_to_download:
2885 self.report_error('Requested format is not available', tb=False, is_error=False)
2886 continue
2887 break
2888
2889 if not formats_to_download:
2890 if not self.params.get('ignore_no_formats_error'):
2891 raise ExtractorError(
2892 'Requested format is not available. Use --list-formats for a list of available formats',
2893 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2894 self.report_warning('Requested format is not available')
2895 # Process what we can, even without any available formats.
2896 formats_to_download = [{}]
2897
2898 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2899 best_format, downloaded_formats = formats_to_download[-1], []
2900 if download:
2901 if best_format and requested_ranges:
2902 def to_screen(*msg):
2903 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2904
2905 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2906 (f['format_id'] for f in formats_to_download))
2907 if requested_ranges != ({}, ):
2908 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2909 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2910 max_downloads_reached = False
2911
2912 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2913 new_info = self._copy_infodict(info_dict)
2914 new_info.update(fmt)
2915 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2916 end_time = offset + min(chapter.get('end_time', duration), duration)
2917 # duration may not be accurate. So allow deviations <1sec
2918 if end_time == float('inf') or end_time > offset + duration + 1:
2919 end_time = None
2920 if chapter or offset:
2921 new_info.update({
2922 'section_start': offset + chapter.get('start_time', 0),
2923 'section_end': end_time,
2924 'section_title': chapter.get('title'),
2925 'section_number': chapter.get('index'),
2926 })
2927 downloaded_formats.append(new_info)
2928 try:
2929 self.process_info(new_info)
2930 except MaxDownloadsReached:
2931 max_downloads_reached = True
2932 self._raise_pending_errors(new_info)
2933 # Remove copied info
2934 for key, val in tuple(new_info.items()):
2935 if info_dict.get(key) == val:
2936 new_info.pop(key)
2937 if max_downloads_reached:
2938 break
2939
2940 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2941 assert write_archive.issubset({True, False, 'ignore'})
2942 if True in write_archive and False not in write_archive:
2943 self.record_download_archive(info_dict)
2944
2945 info_dict['requested_downloads'] = downloaded_formats
2946 info_dict = self.run_all_pps('after_video', info_dict)
2947 if max_downloads_reached:
2948 raise MaxDownloadsReached()
2949
2950 # We update the info dict with the selected best quality format (backwards compatibility)
2951 info_dict.update(best_format)
2952 return info_dict
2953
2954 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2955 """Select the requested subtitles and their format"""
2956 available_subs, normal_sub_langs = {}, []
2957 if normal_subtitles and self.params.get('writesubtitles'):
2958 available_subs.update(normal_subtitles)
2959 normal_sub_langs = tuple(normal_subtitles.keys())
2960 if automatic_captions and self.params.get('writeautomaticsub'):
2961 for lang, cap_info in automatic_captions.items():
2962 if lang not in available_subs:
2963 available_subs[lang] = cap_info
2964
2965 if not available_subs or (
2966 not self.params.get('writesubtitles')
2967 and not self.params.get('writeautomaticsub')):
2968 return None
2969
2970 all_sub_langs = tuple(available_subs.keys())
2971 if self.params.get('allsubtitles', False):
2972 requested_langs = all_sub_langs
2973 elif self.params.get('subtitleslangs', False):
2974 try:
2975 requested_langs = orderedSet_from_options(
2976 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2977 except re.error as e:
2978 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2979 else:
2980 requested_langs = LazyList(itertools.chain(
2981 ['en'] if 'en' in normal_sub_langs else [],
2982 filter(lambda f: f.startswith('en'), normal_sub_langs),
2983 ['en'] if 'en' in all_sub_langs else [],
2984 filter(lambda f: f.startswith('en'), all_sub_langs),
2985 normal_sub_langs, all_sub_langs,
2986 ))[:1]
2987 if requested_langs:
2988 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2989
2990 formats_query = self.params.get('subtitlesformat', 'best')
2991 formats_preference = formats_query.split('/') if formats_query else []
2992 subs = {}
2993 for lang in requested_langs:
2994 formats = available_subs.get(lang)
2995 if formats is None:
2996 self.report_warning(f'{lang} subtitles not available for {video_id}')
2997 continue
2998 for ext in formats_preference:
2999 if ext == 'best':
3000 f = formats[-1]
3001 break
3002 matches = list(filter(lambda f: f['ext'] == ext, formats))
3003 if matches:
3004 f = matches[-1]
3005 break
3006 else:
3007 f = formats[-1]
3008 self.report_warning(
3009 'No subtitle format found matching "%s" for language %s, '
3010 'using %s' % (formats_query, lang, f['ext']))
3011 subs[lang] = f
3012 return subs
3013
3014 def _forceprint(self, key, info_dict):
3015 if info_dict is None:
3016 return
3017 info_copy = info_dict.copy()
3018 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3019 if info_dict.get('requested_formats') is not None:
3020 # For RTMP URLs, also include the playpath
3021 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3022 elif info_dict.get('url'):
3023 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3024 info_copy['formats_table'] = self.render_formats_table(info_dict)
3025 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3026 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3027 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3028
3029 def format_tmpl(tmpl):
3030 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3031 if not mobj:
3032 return tmpl
3033
3034 fmt = '%({})s'
3035 if tmpl.startswith('{'):
3036 tmpl, fmt = f'.{tmpl}', '%({})j'
3037 if tmpl.endswith('='):
3038 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3039 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3040
3041 for tmpl in self.params['forceprint'].get(key, []):
3042 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3043
3044 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3045 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3046 tmpl = format_tmpl(tmpl)
3047 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3048 if self._ensure_dir_exists(filename):
3049 with open(filename, 'a', encoding='utf-8', newline='') as f:
3050 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3051
3052 return info_copy
3053
3054 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3055 if (self.params.get('forcejson')
3056 or self.params['forceprint'].get('video')
3057 or self.params['print_to_file'].get('video')):
3058 self.post_extract(info_dict)
3059 if filename:
3060 info_dict['filename'] = filename
3061 info_copy = self._forceprint('video', info_dict)
3062
3063 def print_field(field, actual_field=None, optional=False):
3064 if actual_field is None:
3065 actual_field = field
3066 if self.params.get(f'force{field}') and (
3067 info_copy.get(field) is not None or (not optional and not incomplete)):
3068 self.to_stdout(info_copy[actual_field])
3069
3070 print_field('title')
3071 print_field('id')
3072 print_field('url', 'urls')
3073 print_field('thumbnail', optional=True)
3074 print_field('description', optional=True)
3075 print_field('filename')
3076 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3077 self.to_stdout(formatSeconds(info_copy['duration']))
3078 print_field('format')
3079
3080 if self.params.get('forcejson'):
3081 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3082
3083 def dl(self, name, info, subtitle=False, test=False):
3084 if not info.get('url'):
3085 self.raise_no_formats(info, True)
3086
3087 if test:
3088 verbose = self.params.get('verbose')
3089 params = {
3090 'test': True,
3091 'quiet': self.params.get('quiet') or not verbose,
3092 'verbose': verbose,
3093 'noprogress': not verbose,
3094 'nopart': True,
3095 'skip_unavailable_fragments': False,
3096 'keep_fragments': False,
3097 'overwrites': True,
3098 '_no_ytdl_file': True,
3099 }
3100 else:
3101 params = self.params
3102 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3103 if not test:
3104 for ph in self._progress_hooks:
3105 fd.add_progress_hook(ph)
3106 urls = '", "'.join(
3107 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3108 for f in info.get('requested_formats', []) or [info])
3109 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3110
3111 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3112 # But it may contain objects that are not deep-copyable
3113 new_info = self._copy_infodict(info)
3114 if new_info.get('http_headers') is None:
3115 new_info['http_headers'] = self._calc_headers(new_info)
3116 return fd.download(name, new_info, subtitle)
3117
3118 def existing_file(self, filepaths, *, default_overwrite=True):
3119 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3120 if existing_files and not self.params.get('overwrites', default_overwrite):
3121 return existing_files[0]
3122
3123 for file in existing_files:
3124 self.report_file_delete(file)
3125 os.remove(file)
3126 return None
3127
3128 def process_info(self, info_dict):
3129 """Process a single resolved IE result. (Modifies it in-place)"""
3130
3131 assert info_dict.get('_type', 'video') == 'video'
3132 original_infodict = info_dict
3133
3134 if 'format' not in info_dict and 'ext' in info_dict:
3135 info_dict['format'] = info_dict['ext']
3136
3137 if self._match_entry(info_dict) is not None:
3138 info_dict['__write_download_archive'] = 'ignore'
3139 return
3140
3141 # Does nothing under normal operation - for backward compatibility of process_info
3142 self.post_extract(info_dict)
3143
3144 def replace_info_dict(new_info):
3145 nonlocal info_dict
3146 if new_info == info_dict:
3147 return
3148 info_dict.clear()
3149 info_dict.update(new_info)
3150
3151 new_info, _ = self.pre_process(info_dict, 'video')
3152 replace_info_dict(new_info)
3153 self._num_downloads += 1
3154
3155 # info_dict['_filename'] needs to be set for backward compatibility
3156 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3157 temp_filename = self.prepare_filename(info_dict, 'temp')
3158 files_to_move = {}
3159
3160 # Forced printings
3161 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3162
3163 def check_max_downloads():
3164 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3165 raise MaxDownloadsReached()
3166
3167 if self.params.get('simulate'):
3168 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3169 check_max_downloads()
3170 return
3171
3172 if full_filename is None:
3173 return
3174 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3175 return
3176 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3177 return
3178
3179 if self._write_description('video', info_dict,
3180 self.prepare_filename(info_dict, 'description')) is None:
3181 return
3182
3183 sub_files = self._write_subtitles(info_dict, temp_filename)
3184 if sub_files is None:
3185 return
3186 files_to_move.update(dict(sub_files))
3187
3188 thumb_files = self._write_thumbnails(
3189 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3190 if thumb_files is None:
3191 return
3192 files_to_move.update(dict(thumb_files))
3193
3194 infofn = self.prepare_filename(info_dict, 'infojson')
3195 _infojson_written = self._write_info_json('video', info_dict, infofn)
3196 if _infojson_written:
3197 info_dict['infojson_filename'] = infofn
3198 # For backward compatibility, even though it was a private field
3199 info_dict['__infojson_filename'] = infofn
3200 elif _infojson_written is None:
3201 return
3202
3203 # Note: Annotations are deprecated
3204 annofn = None
3205 if self.params.get('writeannotations', False):
3206 annofn = self.prepare_filename(info_dict, 'annotation')
3207 if annofn:
3208 if not self._ensure_dir_exists(encodeFilename(annofn)):
3209 return
3210 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3211 self.to_screen('[info] Video annotations are already present')
3212 elif not info_dict.get('annotations'):
3213 self.report_warning('There are no annotations to write.')
3214 else:
3215 try:
3216 self.to_screen('[info] Writing video annotations to: ' + annofn)
3217 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3218 annofile.write(info_dict['annotations'])
3219 except (KeyError, TypeError):
3220 self.report_warning('There are no annotations to write.')
3221 except OSError:
3222 self.report_error('Cannot write annotations file: ' + annofn)
3223 return
3224
3225 # Write internet shortcut files
3226 def _write_link_file(link_type):
3227 url = try_get(info_dict['webpage_url'], iri_to_uri)
3228 if not url:
3229 self.report_warning(
3230 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3231 return True
3232 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3233 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3234 return False
3235 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3236 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3237 return True
3238 try:
3239 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3240 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3241 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3242 template_vars = {'url': url}
3243 if link_type == 'desktop':
3244 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3245 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3246 except OSError:
3247 self.report_error(f'Cannot write internet shortcut {linkfn}')
3248 return False
3249 return True
3250
3251 write_links = {
3252 'url': self.params.get('writeurllink'),
3253 'webloc': self.params.get('writewebloclink'),
3254 'desktop': self.params.get('writedesktoplink'),
3255 }
3256 if self.params.get('writelink'):
3257 link_type = ('webloc' if sys.platform == 'darwin'
3258 else 'desktop' if sys.platform.startswith('linux')
3259 else 'url')
3260 write_links[link_type] = True
3261
3262 if any(should_write and not _write_link_file(link_type)
3263 for link_type, should_write in write_links.items()):
3264 return
3265
3266 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3267 replace_info_dict(new_info)
3268
3269 if self.params.get('skip_download'):
3270 info_dict['filepath'] = temp_filename
3271 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3272 info_dict['__files_to_move'] = files_to_move
3273 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3274 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3275 else:
3276 # Download
3277 info_dict.setdefault('__postprocessors', [])
3278 try:
3279
3280 def existing_video_file(*filepaths):
3281 ext = info_dict.get('ext')
3282 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3283 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3284 default_overwrite=False)
3285 if file:
3286 info_dict['ext'] = os.path.splitext(file)[1][1:]
3287 return file
3288
3289 fd, success = None, True
3290 if info_dict.get('protocol') or info_dict.get('url'):
3291 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3292 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3293 info_dict.get('section_start') or info_dict.get('section_end')):
3294 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3295 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3296 self.report_error(f'{msg}. Aborting')
3297 return
3298
3299 if info_dict.get('requested_formats') is not None:
3300 old_ext = info_dict['ext']
3301 if self.params.get('merge_output_format') is None:
3302 if (info_dict['ext'] == 'webm'
3303 and info_dict.get('thumbnails')
3304 # check with type instead of pp_key, __name__, or isinstance
3305 # since we dont want any custom PPs to trigger this
3306 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3307 info_dict['ext'] = 'mkv'
3308 self.report_warning(
3309 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3310 new_ext = info_dict['ext']
3311
3312 def correct_ext(filename, ext=new_ext):
3313 if filename == '-':
3314 return filename
3315 filename_real_ext = os.path.splitext(filename)[1][1:]
3316 filename_wo_ext = (
3317 os.path.splitext(filename)[0]
3318 if filename_real_ext in (old_ext, new_ext)
3319 else filename)
3320 return f'{filename_wo_ext}.{ext}'
3321
3322 # Ensure filename always has a correct extension for successful merge
3323 full_filename = correct_ext(full_filename)
3324 temp_filename = correct_ext(temp_filename)
3325 dl_filename = existing_video_file(full_filename, temp_filename)
3326
3327 info_dict['__real_download'] = False
3328 # NOTE: Copy so that original format dicts are not modified
3329 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3330
3331 merger = FFmpegMergerPP(self)
3332 downloaded = []
3333 if dl_filename is not None:
3334 self.report_file_already_downloaded(dl_filename)
3335 elif fd:
3336 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3337 f['filepath'] = fname = prepend_extension(
3338 correct_ext(temp_filename, info_dict['ext']),
3339 'f%s' % f['format_id'], info_dict['ext'])
3340 downloaded.append(fname)
3341 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3342 success, real_download = self.dl(temp_filename, info_dict)
3343 info_dict['__real_download'] = real_download
3344 else:
3345 if self.params.get('allow_unplayable_formats'):
3346 self.report_warning(
3347 'You have requested merging of multiple formats '
3348 'while also allowing unplayable formats to be downloaded. '
3349 'The formats won\'t be merged to prevent data corruption.')
3350 elif not merger.available:
3351 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3352 if not self.params.get('ignoreerrors'):
3353 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3354 return
3355 self.report_warning(f'{msg}. The formats won\'t be merged')
3356
3357 if temp_filename == '-':
3358 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3359 else 'but the formats are incompatible for simultaneous download' if merger.available
3360 else 'but ffmpeg is not installed')
3361 self.report_warning(
3362 f'You have requested downloading multiple formats to stdout {reason}. '
3363 'The formats will be streamed one after the other')
3364 fname = temp_filename
3365 for f in info_dict['requested_formats']:
3366 new_info = dict(info_dict)
3367 del new_info['requested_formats']
3368 new_info.update(f)
3369 if temp_filename != '-':
3370 fname = prepend_extension(
3371 correct_ext(temp_filename, new_info['ext']),
3372 'f%s' % f['format_id'], new_info['ext'])
3373 if not self._ensure_dir_exists(fname):
3374 return
3375 f['filepath'] = fname
3376 downloaded.append(fname)
3377 partial_success, real_download = self.dl(fname, new_info)
3378 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3379 success = success and partial_success
3380
3381 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3382 info_dict['__postprocessors'].append(merger)
3383 info_dict['__files_to_merge'] = downloaded
3384 # Even if there were no downloads, it is being merged only now
3385 info_dict['__real_download'] = True
3386 else:
3387 for file in downloaded:
3388 files_to_move[file] = None
3389 else:
3390 # Just a single file
3391 dl_filename = existing_video_file(full_filename, temp_filename)
3392 if dl_filename is None or dl_filename == temp_filename:
3393 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3394 # So we should try to resume the download
3395 success, real_download = self.dl(temp_filename, info_dict)
3396 info_dict['__real_download'] = real_download
3397 else:
3398 self.report_file_already_downloaded(dl_filename)
3399
3400 dl_filename = dl_filename or temp_filename
3401 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3402
3403 except network_exceptions as err:
3404 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3405 return
3406 except OSError as err:
3407 raise UnavailableVideoError(err)
3408 except (ContentTooShortError, ) as err:
3409 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3410 return
3411
3412 self._raise_pending_errors(info_dict)
3413 if success and full_filename != '-':
3414
3415 def fixup():
3416 do_fixup = True
3417 fixup_policy = self.params.get('fixup')
3418 vid = info_dict['id']
3419
3420 if fixup_policy in ('ignore', 'never'):
3421 return
3422 elif fixup_policy == 'warn':
3423 do_fixup = 'warn'
3424 elif fixup_policy != 'force':
3425 assert fixup_policy in ('detect_or_warn', None)
3426 if not info_dict.get('__real_download'):
3427 do_fixup = False
3428
3429 def ffmpeg_fixup(cndn, msg, cls):
3430 if not (do_fixup and cndn):
3431 return
3432 elif do_fixup == 'warn':
3433 self.report_warning(f'{vid}: {msg}')
3434 return
3435 pp = cls(self)
3436 if pp.available:
3437 info_dict['__postprocessors'].append(pp)
3438 else:
3439 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3440
3441 stretched_ratio = info_dict.get('stretched_ratio')
3442 ffmpeg_fixup(stretched_ratio not in (1, None),
3443 f'Non-uniform pixel ratio {stretched_ratio}',
3444 FFmpegFixupStretchedPP)
3445
3446 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3447 downloader = downloader.FD_NAME if downloader else None
3448
3449 ext = info_dict.get('ext')
3450 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3451 isinstance(pp, FFmpegVideoConvertorPP)
3452 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3453 ) for pp in self._pps['post_process'])
3454
3455 if not postprocessed_by_ffmpeg:
3456 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3457 and info_dict.get('container') == 'm4a_dash',
3458 'writing DASH m4a. Only some players support this container',
3459 FFmpegFixupM4aPP)
3460 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3461 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3462 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3463 FFmpegFixupM3u8PP)
3464 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3465 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3466
3467 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3468 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3469
3470 fixup()
3471 try:
3472 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3473 except PostProcessingError as err:
3474 self.report_error('Postprocessing: %s' % str(err))
3475 return
3476 try:
3477 for ph in self._post_hooks:
3478 ph(info_dict['filepath'])
3479 except Exception as err:
3480 self.report_error('post hooks: %s' % str(err))
3481 return
3482 info_dict['__write_download_archive'] = True
3483
3484 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3485 if self.params.get('force_write_download_archive'):
3486 info_dict['__write_download_archive'] = True
3487 check_max_downloads()
3488
3489 def __download_wrapper(self, func):
3490 @functools.wraps(func)
3491 def wrapper(*args, **kwargs):
3492 try:
3493 res = func(*args, **kwargs)
3494 except UnavailableVideoError as e:
3495 self.report_error(e)
3496 except DownloadCancelled as e:
3497 self.to_screen(f'[info] {e}')
3498 if not self.params.get('break_per_url'):
3499 raise
3500 self._num_downloads = 0
3501 else:
3502 if self.params.get('dump_single_json', False):
3503 self.post_extract(res)
3504 self.to_stdout(json.dumps(self.sanitize_info(res)))
3505 return wrapper
3506
3507 def download(self, url_list):
3508 """Download a given list of URLs."""
3509 url_list = variadic(url_list) # Passing a single URL is a common mistake
3510 outtmpl = self.params['outtmpl']['default']
3511 if (len(url_list) > 1
3512 and outtmpl != '-'
3513 and '%' not in outtmpl
3514 and self.params.get('max_downloads') != 1):
3515 raise SameFileError(outtmpl)
3516
3517 for url in url_list:
3518 self.__download_wrapper(self.extract_info)(
3519 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3520
3521 return self._download_retcode
3522
3523 def download_with_info_file(self, info_filename):
3524 with contextlib.closing(fileinput.FileInput(
3525 [info_filename], mode='r',
3526 openhook=fileinput.hook_encoded('utf-8'))) as f:
3527 # FileInput doesn't have a read method, we can't call json.load
3528 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3529 for info in variadic(json.loads('\n'.join(f)))]
3530 for info in infos:
3531 try:
3532 self.__download_wrapper(self.process_ie_result)(info, download=True)
3533 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3534 if not isinstance(e, EntryNotInPlaylist):
3535 self.to_stderr('\r')
3536 webpage_url = info.get('webpage_url')
3537 if webpage_url is None:
3538 raise
3539 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3540 self.download([webpage_url])
3541 return self._download_retcode
3542
3543 @staticmethod
3544 def sanitize_info(info_dict, remove_private_keys=False):
3545 ''' Sanitize the infodict for converting to json '''
3546 if info_dict is None:
3547 return info_dict
3548 info_dict.setdefault('epoch', int(time.time()))
3549 info_dict.setdefault('_type', 'video')
3550 info_dict.setdefault('_version', {
3551 'version': __version__,
3552 'current_git_head': current_git_head(),
3553 'release_git_head': RELEASE_GIT_HEAD,
3554 'repository': ORIGIN,
3555 })
3556
3557 if remove_private_keys:
3558 reject = lambda k, v: v is None or k.startswith('__') or k in {
3559 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3560 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3561 'playlist_autonumber',
3562 }
3563 else:
3564 reject = lambda k, v: False
3565
3566 def filter_fn(obj):
3567 if isinstance(obj, dict):
3568 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3569 elif isinstance(obj, (list, tuple, set, LazyList)):
3570 return list(map(filter_fn, obj))
3571 elif obj is None or isinstance(obj, (str, int, float, bool)):
3572 return obj
3573 else:
3574 return repr(obj)
3575
3576 return filter_fn(info_dict)
3577
3578 @staticmethod
3579 def filter_requested_info(info_dict, actually_filter=True):
3580 ''' Alias of sanitize_info for backward compatibility '''
3581 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3582
3583 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3584 for filename in set(filter(None, files_to_delete)):
3585 if msg:
3586 self.to_screen(msg % filename)
3587 try:
3588 os.remove(filename)
3589 except OSError:
3590 self.report_warning(f'Unable to delete file {filename}')
3591 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3592 del info['__files_to_move'][filename]
3593
3594 @staticmethod
3595 def post_extract(info_dict):
3596 def actual_post_extract(info_dict):
3597 if info_dict.get('_type') in ('playlist', 'multi_video'):
3598 for video_dict in info_dict.get('entries', {}):
3599 actual_post_extract(video_dict or {})
3600 return
3601
3602 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3603 info_dict.update(post_extractor())
3604
3605 actual_post_extract(info_dict or {})
3606
3607 def run_pp(self, pp, infodict):
3608 files_to_delete = []
3609 if '__files_to_move' not in infodict:
3610 infodict['__files_to_move'] = {}
3611 try:
3612 files_to_delete, infodict = pp.run(infodict)
3613 except PostProcessingError as e:
3614 # Must be True and not 'only_download'
3615 if self.params.get('ignoreerrors') is True:
3616 self.report_error(e)
3617 return infodict
3618 raise
3619
3620 if not files_to_delete:
3621 return infodict
3622 if self.params.get('keepvideo', False):
3623 for f in files_to_delete:
3624 infodict['__files_to_move'].setdefault(f, '')
3625 else:
3626 self._delete_downloaded_files(
3627 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3628 return infodict
3629
3630 def run_all_pps(self, key, info, *, additional_pps=None):
3631 if key != 'video':
3632 self._forceprint(key, info)
3633 for pp in (additional_pps or []) + self._pps[key]:
3634 info = self.run_pp(pp, info)
3635 return info
3636
3637 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3638 info = dict(ie_info)
3639 info['__files_to_move'] = files_to_move or {}
3640 try:
3641 info = self.run_all_pps(key, info)
3642 except PostProcessingError as err:
3643 msg = f'Preprocessing: {err}'
3644 info.setdefault('__pending_error', msg)
3645 self.report_error(msg, is_error=False)
3646 return info, info.pop('__files_to_move', None)
3647
3648 def post_process(self, filename, info, files_to_move=None):
3649 """Run all the postprocessors on the given file."""
3650 info['filepath'] = filename
3651 info['__files_to_move'] = files_to_move or {}
3652 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3653 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3654 del info['__files_to_move']
3655 return self.run_all_pps('after_move', info)
3656
3657 def _make_archive_id(self, info_dict):
3658 video_id = info_dict.get('id')
3659 if not video_id:
3660 return
3661 # Future-proof against any change in case
3662 # and backwards compatibility with prior versions
3663 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3664 if extractor is None:
3665 url = str_or_none(info_dict.get('url'))
3666 if not url:
3667 return
3668 # Try to find matching extractor for the URL and take its ie_key
3669 for ie_key, ie in self._ies.items():
3670 if ie.suitable(url):
3671 extractor = ie_key
3672 break
3673 else:
3674 return
3675 return make_archive_id(extractor, video_id)
3676
3677 def in_download_archive(self, info_dict):
3678 if not self.archive:
3679 return False
3680
3681 vid_ids = [self._make_archive_id(info_dict)]
3682 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3683 return any(id_ in self.archive for id_ in vid_ids)
3684
3685 def record_download_archive(self, info_dict):
3686 fn = self.params.get('download_archive')
3687 if fn is None:
3688 return
3689 vid_id = self._make_archive_id(info_dict)
3690 assert vid_id
3691
3692 self.write_debug(f'Adding to archive: {vid_id}')
3693 if is_path_like(fn):
3694 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3695 archive_file.write(vid_id + '\n')
3696 self.archive.add(vid_id)
3697
3698 @staticmethod
3699 def format_resolution(format, default='unknown'):
3700 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3701 return 'audio only'
3702 if format.get('resolution') is not None:
3703 return format['resolution']
3704 if format.get('width') and format.get('height'):
3705 return '%dx%d' % (format['width'], format['height'])
3706 elif format.get('height'):
3707 return '%sp' % format['height']
3708 elif format.get('width'):
3709 return '%dx?' % format['width']
3710 return default
3711
3712 def _list_format_headers(self, *headers):
3713 if self.params.get('listformats_table', True) is not False:
3714 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3715 return headers
3716
3717 def _format_note(self, fdict):
3718 res = ''
3719 if fdict.get('ext') in ['f4f', 'f4m']:
3720 res += '(unsupported)'
3721 if fdict.get('language'):
3722 if res:
3723 res += ' '
3724 res += '[%s]' % fdict['language']
3725 if fdict.get('format_note') is not None:
3726 if res:
3727 res += ' '
3728 res += fdict['format_note']
3729 if fdict.get('tbr') is not None:
3730 if res:
3731 res += ', '
3732 res += '%4dk' % fdict['tbr']
3733 if fdict.get('container') is not None:
3734 if res:
3735 res += ', '
3736 res += '%s container' % fdict['container']
3737 if (fdict.get('vcodec') is not None
3738 and fdict.get('vcodec') != 'none'):
3739 if res:
3740 res += ', '
3741 res += fdict['vcodec']
3742 if fdict.get('vbr') is not None:
3743 res += '@'
3744 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3745 res += 'video@'
3746 if fdict.get('vbr') is not None:
3747 res += '%4dk' % fdict['vbr']
3748 if fdict.get('fps') is not None:
3749 if res:
3750 res += ', '
3751 res += '%sfps' % fdict['fps']
3752 if fdict.get('acodec') is not None:
3753 if res:
3754 res += ', '
3755 if fdict['acodec'] == 'none':
3756 res += 'video only'
3757 else:
3758 res += '%-5s' % fdict['acodec']
3759 elif fdict.get('abr') is not None:
3760 if res:
3761 res += ', '
3762 res += 'audio'
3763 if fdict.get('abr') is not None:
3764 res += '@%3dk' % fdict['abr']
3765 if fdict.get('asr') is not None:
3766 res += ' (%5dHz)' % fdict['asr']
3767 if fdict.get('filesize') is not None:
3768 if res:
3769 res += ', '
3770 res += format_bytes(fdict['filesize'])
3771 elif fdict.get('filesize_approx') is not None:
3772 if res:
3773 res += ', '
3774 res += '~' + format_bytes(fdict['filesize_approx'])
3775 return res
3776
3777 def _get_formats(self, info_dict):
3778 if info_dict.get('formats') is None:
3779 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3780 return [info_dict]
3781 return []
3782 return info_dict['formats']
3783
3784 def render_formats_table(self, info_dict):
3785 formats = self._get_formats(info_dict)
3786 if not formats:
3787 return
3788 if not self.params.get('listformats_table', True) is not False:
3789 table = [
3790 [
3791 format_field(f, 'format_id'),
3792 format_field(f, 'ext'),
3793 self.format_resolution(f),
3794 self._format_note(f)
3795 ] for f in formats if (f.get('preference') or 0) >= -1000]
3796 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3797
3798 def simplified_codec(f, field):
3799 assert field in ('acodec', 'vcodec')
3800 codec = f.get(field)
3801 if not codec:
3802 return 'unknown'
3803 elif codec != 'none':
3804 return '.'.join(codec.split('.')[:4])
3805
3806 if field == 'vcodec' and f.get('acodec') == 'none':
3807 return 'images'
3808 elif field == 'acodec' and f.get('vcodec') == 'none':
3809 return ''
3810 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3811 self.Styles.SUPPRESS)
3812
3813 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3814 table = [
3815 [
3816 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3817 format_field(f, 'ext'),
3818 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3819 format_field(f, 'fps', '\t%d', func=round),
3820 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3821 format_field(f, 'audio_channels', '\t%s'),
3822 delim, (
3823 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3824 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3825 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3826 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3827 format_field(f, 'tbr', '\t%dk', func=round),
3828 shorten_protocol_name(f.get('protocol', '')),
3829 delim,
3830 simplified_codec(f, 'vcodec'),
3831 format_field(f, 'vbr', '\t%dk', func=round),
3832 simplified_codec(f, 'acodec'),
3833 format_field(f, 'abr', '\t%dk', func=round),
3834 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3835 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3836 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3837 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3838 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3839 format_field(f, 'format_note'),
3840 format_field(f, 'container', ignore=(None, f.get('ext'))),
3841 delim=', '), delim=' '),
3842 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3843 header_line = self._list_format_headers(
3844 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3845 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3846
3847 return render_table(
3848 header_line, table, hide_empty=True,
3849 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3850
3851 def render_thumbnails_table(self, info_dict):
3852 thumbnails = list(info_dict.get('thumbnails') or [])
3853 if not thumbnails:
3854 return None
3855 return render_table(
3856 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3857 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3858
3859 def render_subtitles_table(self, video_id, subtitles):
3860 def _row(lang, formats):
3861 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3862 if len(set(names)) == 1:
3863 names = [] if names[0] == 'unknown' else names[:1]
3864 return [lang, ', '.join(names), ', '.join(exts)]
3865
3866 if not subtitles:
3867 return None
3868 return render_table(
3869 self._list_format_headers('Language', 'Name', 'Formats'),
3870 [_row(lang, formats) for lang, formats in subtitles.items()],
3871 hide_empty=True)
3872
3873 def __list_table(self, video_id, name, func, *args):
3874 table = func(*args)
3875 if not table:
3876 self.to_screen(f'{video_id} has no {name}')
3877 return
3878 self.to_screen(f'[info] Available {name} for {video_id}:')
3879 self.to_stdout(table)
3880
3881 def list_formats(self, info_dict):
3882 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3883
3884 def list_thumbnails(self, info_dict):
3885 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3886
3887 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3888 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3889
3890 def print_debug_header(self):
3891 if not self.params.get('verbose'):
3892 return
3893
3894 from . import _IN_CLI # Must be delayed import
3895
3896 # These imports can be slow. So import them only as needed
3897 from .extractor.extractors import _LAZY_LOADER
3898 from .extractor.extractors import (
3899 _PLUGIN_CLASSES as plugin_ies,
3900 _PLUGIN_OVERRIDES as plugin_ie_overrides
3901 )
3902
3903 def get_encoding(stream):
3904 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3905 additional_info = []
3906 if os.environ.get('TERM', '').lower() == 'dumb':
3907 additional_info.append('dumb')
3908 if not supports_terminal_sequences(stream):
3909 from .utils import WINDOWS_VT_MODE # Must be imported locally
3910 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3911 if additional_info:
3912 ret = f'{ret} ({",".join(additional_info)})'
3913 return ret
3914
3915 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3916 locale.getpreferredencoding(),
3917 sys.getfilesystemencoding(),
3918 self.get_encoding(),
3919 ', '.join(
3920 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3921 if stream is not None and key != 'console')
3922 )
3923
3924 logger = self.params.get('logger')
3925 if logger:
3926 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3927 write_debug(encoding_str)
3928 else:
3929 write_string(f'[debug] {encoding_str}\n', encoding=None)
3930 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3931
3932 source = detect_variant()
3933 if VARIANT not in (None, 'pip'):
3934 source += '*'
3935 klass = type(self)
3936 write_debug(join_nonempty(
3937 f'{REPOSITORY.rpartition("/")[2]} version',
3938 _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__),
3939 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3940 '' if source == 'unknown' else f'({source})',
3941 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3942 delim=' '))
3943
3944 if not _IN_CLI:
3945 write_debug(f'params: {self.params}')
3946
3947 if not _LAZY_LOADER:
3948 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3949 write_debug('Lazy loading extractors is forcibly disabled')
3950 else:
3951 write_debug('Lazy loading extractors is disabled')
3952 if self.params['compat_opts']:
3953 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3954
3955 if current_git_head():
3956 write_debug(f'Git HEAD: {current_git_head()}')
3957 write_debug(system_identifier())
3958
3959 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3960 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3961 if ffmpeg_features:
3962 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3963
3964 exe_versions['rtmpdump'] = rtmpdump_version()
3965 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3966 exe_str = ', '.join(
3967 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3968 ) or 'none'
3969 write_debug('exe versions: %s' % exe_str)
3970
3971 from .compat.compat_utils import get_package_info
3972 from .dependencies import available_dependencies
3973
3974 write_debug('Optional libraries: %s' % (', '.join(sorted({
3975 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3976 })) or 'none'))
3977
3978 write_debug(f'Proxy map: {self.proxies}')
3979 write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
3980 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3981 display_list = ['%s%s' % (
3982 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3983 for name, klass in plugins.items()]
3984 if plugin_type == 'Extractor':
3985 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3986 for parent, plugins in plugin_ie_overrides.items())
3987 if not display_list:
3988 continue
3989 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3990
3991 plugin_dirs = plugin_directories()
3992 if plugin_dirs:
3993 write_debug(f'Plugin directories: {plugin_dirs}')
3994
3995 # Not implemented
3996 if False and self.params.get('call_home'):
3997 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3998 write_debug('Public IP address: %s' % ipaddr)
3999 latest_version = self.urlopen(
4000 'https://yt-dl.org/latest/version').read().decode()
4001 if version_tuple(latest_version) > version_tuple(__version__):
4002 self.report_warning(
4003 'You are using an outdated version (newest version: %s)! '
4004 'See https://yt-dl.org/update if you need help updating.' %
4005 latest_version)
4006
4007 @functools.cached_property
4008 def proxies(self):
4009 """Global proxy configuration"""
4010 opts_proxy = self.params.get('proxy')
4011 if opts_proxy is not None:
4012 if opts_proxy == '':
4013 opts_proxy = '__noproxy__'
4014 proxies = {'all': opts_proxy}
4015 else:
4016 proxies = urllib.request.getproxies()
4017 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4018 if 'http' in proxies and 'https' not in proxies:
4019 proxies['https'] = proxies['http']
4020
4021 return proxies
4022
4023 @functools.cached_property
4024 def cookiejar(self):
4025 """Global cookiejar instance"""
4026 return load_cookies(
4027 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4028
4029 @property
4030 def _opener(self):
4031 """
4032 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4033 """
4034 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4035 handler = self._request_director.handlers['Urllib']
4036 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4037
4038 def urlopen(self, req):
4039 """ Start an HTTP download """
4040 if isinstance(req, str):
4041 req = Request(req)
4042 elif isinstance(req, urllib.request.Request):
4043 self.deprecation_warning(
4044 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4045 'Use yt_dlp.networking.common.Request instead.')
4046 req = urllib_req_to_req(req)
4047 assert isinstance(req, Request)
4048
4049 # compat: Assume user:pass url params are basic auth
4050 url, basic_auth_header = extract_basic_auth(req.url)
4051 if basic_auth_header:
4052 req.headers['Authorization'] = basic_auth_header
4053 req.url = sanitize_url(url)
4054
4055 clean_proxies(proxies=req.proxies, headers=req.headers)
4056 clean_headers(req.headers)
4057
4058 try:
4059 return self._request_director.send(req)
4060 except NoSupportingHandlers as e:
4061 for ue in e.unsupported_errors:
4062 # FIXME: This depends on the order of errors.
4063 if not (ue.handler and ue.msg):
4064 continue
4065 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4066 raise RequestError(
4067 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4068 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4069 if 'unsupported proxy type: "https"' in ue.msg.lower():
4070 raise RequestError(
4071 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests')
4072
4073 elif (
4074 re.match(r'unsupported url scheme: "wss?"', ue.msg.lower())
4075 and 'websockets' not in self._request_director.handlers
4076 ):
4077 raise RequestError(
4078 'This request requires WebSocket support. '
4079 'Ensure one of the following dependencies are installed: websockets',
4080 cause=ue) from ue
4081 raise
4082 except SSLError as e:
4083 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4084 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4085 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4086 raise RequestError(
4087 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4088 'Try using --legacy-server-connect', cause=e) from e
4089 raise
4090 except HTTPError as e: # TODO: Remove in a future release
4091 raise _CompatHTTPError(e) from e
4092
4093 def build_request_director(self, handlers, preferences=None):
4094 logger = _YDLLogger(self)
4095 headers = self.params['http_headers'].copy()
4096 proxies = self.proxies.copy()
4097 clean_headers(headers)
4098 clean_proxies(proxies, headers)
4099
4100 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4101 for handler in handlers:
4102 director.add_handler(handler(
4103 logger=logger,
4104 headers=headers,
4105 cookiejar=self.cookiejar,
4106 proxies=proxies,
4107 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4108 verify=not self.params.get('nocheckcertificate'),
4109 **traverse_obj(self.params, {
4110 'verbose': 'debug_printtraffic',
4111 'source_address': 'source_address',
4112 'timeout': 'socket_timeout',
4113 'legacy_ssl_support': 'legacyserverconnect',
4114 'enable_file_urls': 'enable_file_urls',
4115 'client_cert': {
4116 'client_certificate': 'client_certificate',
4117 'client_certificate_key': 'client_certificate_key',
4118 'client_certificate_password': 'client_certificate_password',
4119 },
4120 }),
4121 ))
4122 director.preferences.update(preferences or [])
4123 if 'prefer-legacy-http-handler' in self.params['compat_opts']:
4124 director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0)
4125 return director
4126
4127 def encode(self, s):
4128 if isinstance(s, bytes):
4129 return s # Already encoded
4130
4131 try:
4132 return s.encode(self.get_encoding())
4133 except UnicodeEncodeError as err:
4134 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4135 raise
4136
4137 def get_encoding(self):
4138 encoding = self.params.get('encoding')
4139 if encoding is None:
4140 encoding = preferredencoding()
4141 return encoding
4142
4143 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4144 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4145 if overwrite is None:
4146 overwrite = self.params.get('overwrites', True)
4147 if not self.params.get('writeinfojson'):
4148 return False
4149 elif not infofn:
4150 self.write_debug(f'Skipping writing {label} infojson')
4151 return False
4152 elif not self._ensure_dir_exists(infofn):
4153 return None
4154 elif not overwrite and os.path.exists(infofn):
4155 self.to_screen(f'[info] {label.title()} metadata is already present')
4156 return 'exists'
4157
4158 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4159 try:
4160 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4161 return True
4162 except OSError:
4163 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4164 return None
4165
4166 def _write_description(self, label, ie_result, descfn):
4167 ''' Write description and returns True = written, False = skip, None = error '''
4168 if not self.params.get('writedescription'):
4169 return False
4170 elif not descfn:
4171 self.write_debug(f'Skipping writing {label} description')
4172 return False
4173 elif not self._ensure_dir_exists(descfn):
4174 return None
4175 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4176 self.to_screen(f'[info] {label.title()} description is already present')
4177 elif ie_result.get('description') is None:
4178 self.to_screen(f'[info] There\'s no {label} description to write')
4179 return False
4180 else:
4181 try:
4182 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4183 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4184 descfile.write(ie_result['description'])
4185 except OSError:
4186 self.report_error(f'Cannot write {label} description file {descfn}')
4187 return None
4188 return True
4189
4190 def _write_subtitles(self, info_dict, filename):
4191 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4192 ret = []
4193 subtitles = info_dict.get('requested_subtitles')
4194 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4195 # subtitles download errors are already managed as troubles in relevant IE
4196 # that way it will silently go on when used with unsupporting IE
4197 return ret
4198 elif not subtitles:
4199 self.to_screen('[info] There are no subtitles for the requested languages')
4200 return ret
4201 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4202 if not sub_filename_base:
4203 self.to_screen('[info] Skipping writing video subtitles')
4204 return ret
4205
4206 for sub_lang, sub_info in subtitles.items():
4207 sub_format = sub_info['ext']
4208 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4209 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4210 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4211 if existing_sub:
4212 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4213 sub_info['filepath'] = existing_sub
4214 ret.append((existing_sub, sub_filename_final))
4215 continue
4216
4217 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4218 if sub_info.get('data') is not None:
4219 try:
4220 # Use newline='' to prevent conversion of newline characters
4221 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4222 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4223 subfile.write(sub_info['data'])
4224 sub_info['filepath'] = sub_filename
4225 ret.append((sub_filename, sub_filename_final))
4226 continue
4227 except OSError:
4228 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4229 return None
4230
4231 try:
4232 sub_copy = sub_info.copy()
4233 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4234 self.dl(sub_filename, sub_copy, subtitle=True)
4235 sub_info['filepath'] = sub_filename
4236 ret.append((sub_filename, sub_filename_final))
4237 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4238 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4239 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4240 if not self.params.get('ignoreerrors'):
4241 self.report_error(msg)
4242 raise DownloadError(msg)
4243 self.report_warning(msg)
4244 return ret
4245
4246 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4247 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error '''
4248 write_all = self.params.get('write_all_thumbnails', False)
4249 thumbnails, ret = [], []
4250 if write_all or self.params.get('writethumbnail', False):
4251 thumbnails = info_dict.get('thumbnails') or []
4252 if not thumbnails:
4253 self.to_screen(f'[info] There are no {label} thumbnails to download')
4254 return ret
4255 multiple = write_all and len(thumbnails) > 1
4256
4257 if thumb_filename_base is None:
4258 thumb_filename_base = filename
4259 if thumbnails and not thumb_filename_base:
4260 self.write_debug(f'Skipping writing {label} thumbnail')
4261 return ret
4262
4263 if thumbnails and not self._ensure_dir_exists(filename):
4264 return None
4265
4266 for idx, t in list(enumerate(thumbnails))[::-1]:
4267 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4268 thumb_display_id = f'{label} thumbnail {t["id"]}'
4269 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4270 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4271
4272 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4273 if existing_thumb:
4274 self.to_screen('[info] %s is already present' % (
4275 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4276 t['filepath'] = existing_thumb
4277 ret.append((existing_thumb, thumb_filename_final))
4278 else:
4279 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4280 try:
4281 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4282 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4283 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4284 shutil.copyfileobj(uf, thumbf)
4285 ret.append((thumb_filename, thumb_filename_final))
4286 t['filepath'] = thumb_filename
4287 except network_exceptions as err:
4288 if isinstance(err, HTTPError) and err.status == 404:
4289 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4290 else:
4291 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4292 thumbnails.pop(idx)
4293 if ret and not write_all:
4294 break
4295 return ret