]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] Misc (#8182)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import HEADRequest, Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 network_exceptions,
45 )
46 from .plugins import directories as plugin_directories
47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
48 from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
58 FFmpegVideoConvertorPP,
59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61 )
62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
63 from .update import REPOSITORY, current_git_head, detect_variant
64 from .utils import (
65 DEFAULT_OUTTMPL,
66 IDENTITY,
67 LINK_TEMPLATES,
68 MEDIA_EXTENSIONS,
69 NO_DEFAULT,
70 NUMBER_RE,
71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
82 FormatSorter,
83 GeoRestrictedError,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 number_of_digits,
126 orderedSet,
127 orderedSet_from_options,
128 parse_filesize,
129 preferredencoding,
130 prepend_extension,
131 remove_terminal_sequences,
132 render_table,
133 replace_extension,
134 sanitize_filename,
135 sanitize_path,
136 sanitize_url,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils._utils import _YDLLogger
155 from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
159 std_headers,
160 )
161 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL:
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 playlist_items: Specific indices of playlist to download.
260 playlistrandom: Download playlist items in random order.
261 lazy_playlist: Process playlist entries as they are received.
262 matchtitle: Download only matching titles.
263 rejecttitle: Reject downloads for matching titles.
264 logger: Log messages to a logging.Logger instance.
265 logtostderr: Print everything to stderr instead of stdout.
266 consoletitle: Display progress in console window's titlebar.
267 writedescription: Write the video description to a .description file
268 writeinfojson: Write the video description to a .info.json file
269 clean_infojson: Remove internal metadata from the infojson
270 getcomments: Extract video comments. This will not be written to disk
271 unless writeinfojson is also given
272 writeannotations: Write the video annotations to a .annotations.xml file
273 writethumbnail: Write the thumbnail image to a file
274 allow_playlist_files: Whether to write playlists' description, infojson etc
275 also to disk when using the 'write*' options
276 write_all_thumbnails: Write all thumbnail formats to files
277 writelink: Write an internet shortcut file, depending on the
278 current platform (.url/.webloc/.desktop)
279 writeurllink: Write a Windows internet shortcut file (.url)
280 writewebloclink: Write a macOS internet shortcut file (.webloc)
281 writedesktoplink: Write a Linux internet shortcut file (.desktop)
282 writesubtitles: Write the video subtitles to a file
283 writeautomaticsub: Write the automatically generated subtitles to a file
284 listsubtitles: Lists all available subtitles for the video
285 subtitlesformat: The format code for subtitles
286 subtitleslangs: List of languages of the subtitles to download (can be regex).
287 The list may contain "all" to refer to all the available
288 subtitles. The language can be prefixed with a "-" to
289 exclude it from the requested languages, e.g. ['all', '-live_chat']
290 keepvideo: Keep the video file after post-processing
291 daterange: A utils.DateRange object, download only if the upload_date is in the range.
292 skip_download: Skip the actual download of the video file
293 cachedir: Location of the cache files in the filesystem.
294 False to disable filesystem cache.
295 noplaylist: Download single video instead of a playlist if in doubt.
296 age_limit: An integer representing the user's age in years.
297 Unsuitable videos for the given age are skipped.
298 min_views: An integer representing the minimum view count the video
299 must have in order to not be skipped.
300 Videos without view count information are always
301 downloaded. None for no limit.
302 max_views: An integer representing the maximum view count.
303 Videos that are more popular than that are not
304 downloaded.
305 Videos without view count information are always
306 downloaded. None for no limit.
307 download_archive: A set, or the name of a file where all downloads are recorded.
308 Videos already present in the file are not downloaded again.
309 break_on_existing: Stop the download process after attempting to download a
310 file that is in the archive.
311 break_per_url: Whether break_on_reject and break_on_existing
312 should act on each input URL as opposed to for the entire queue
313 cookiefile: File name or text stream from where cookies should be read and dumped to
314 cookiesfrombrowser: A tuple containing the name of the browser, the profile
315 name/path from where cookies are loaded, the name of the keyring,
316 and the container name, e.g. ('chrome', ) or
317 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
320 nocheckcertificate: Do not verify SSL certificates
321 client_certificate: Path to client certificate file in PEM format. May include the private key
322 client_certificate_key: Path to private key file for client certificate
323 client_certificate_password: Password for client certificate private key, if encrypted.
324 If not provided and the key is encrypted, yt-dlp will ask interactively
325 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
326 (Only supported by some extractors)
327 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
328 http_headers: A dictionary of custom headers to be used for all requests
329 proxy: URL of the proxy server to use
330 geo_verification_proxy: URL of the proxy to use for IP address verification
331 on geo-restricted sites.
332 socket_timeout: Time to wait for unresponsive hosts, in seconds
333 bidi_workaround: Work around buggy terminals without bidirectional text
334 support, using fridibi
335 debug_printtraffic:Print out sent and received HTTP traffic
336 default_search: Prepend this string if an input url is not valid.
337 'auto' for elaborate guessing
338 encoding: Use this encoding instead of the system-specified.
339 extract_flat: Whether to resolve and process url_results further
340 * False: Always process. Default for API
341 * True: Never process
342 * 'in_playlist': Do not process inside playlist/multi_video
343 * 'discard': Always process, but don't return the result
344 from inside playlist/multi_video
345 * 'discard_in_playlist': Same as "discard", but only for
346 playlists (not multi_video). Default for CLI
347 wait_for_video: If given, wait for scheduled streams to become available.
348 The value should be a tuple containing the range
349 (min_secs, max_secs) to wait between retries
350 postprocessors: A list of dictionaries, each with an entry
351 * key: The name of the postprocessor. See
352 yt_dlp/postprocessor/__init__.py for a list.
353 * when: When to run the postprocessor. Allowed values are
354 the entries of utils.POSTPROCESS_WHEN
355 Assumed to be 'post_process' if not given
356 progress_hooks: A list of functions that get called on download
357 progress, with a dictionary with the entries
358 * status: One of "downloading", "error", or "finished".
359 Check this first and ignore unknown values.
360 * info_dict: The extracted info_dict
361
362 If status is one of "downloading", or "finished", the
363 following properties may also be present:
364 * filename: The final filename (always present)
365 * tmpfilename: The filename we're currently writing to
366 * downloaded_bytes: Bytes on disk
367 * total_bytes: Size of the whole file, None if unknown
368 * total_bytes_estimate: Guess of the eventual file size,
369 None if unavailable.
370 * elapsed: The number of seconds since download started.
371 * eta: The estimated time in seconds, None if unknown
372 * speed: The download speed in bytes/second, None if
373 unknown
374 * fragment_index: The counter of the currently
375 downloaded video fragment.
376 * fragment_count: The number of fragments (= individual
377 files that will be merged)
378
379 Progress hooks are guaranteed to be called at least once
380 (with status "finished") if the download is successful.
381 postprocessor_hooks: A list of functions that get called on postprocessing
382 progress, with a dictionary with the entries
383 * status: One of "started", "processing", or "finished".
384 Check this first and ignore unknown values.
385 * postprocessor: Name of the postprocessor
386 * info_dict: The extracted info_dict
387
388 Progress hooks are guaranteed to be called at least twice
389 (with status "started" and "finished") if the processing is successful.
390 merge_output_format: "/" separated list of extensions to use when merging formats.
391 final_ext: Expected final extension; used to detect when the file was
392 already downloaded and converted
393 fixup: Automatically correct known faults of the file.
394 One of:
395 - "never": do nothing
396 - "warn": only emit a warning
397 - "detect_or_warn": check whether we can do anything
398 about it, warn otherwise (default)
399 source_address: Client-side IP address to bind to.
400 sleep_interval_requests: Number of seconds to sleep between requests
401 during extraction
402 sleep_interval: Number of seconds to sleep before each download when
403 used alone or a lower bound of a range for randomized
404 sleep before each download (minimum possible number
405 of seconds to sleep) when used along with
406 max_sleep_interval.
407 max_sleep_interval:Upper bound of a range for randomized sleep before each
408 download (maximum possible number of seconds to sleep).
409 Must only be used along with sleep_interval.
410 Actual sleep time will be a random float from range
411 [sleep_interval; max_sleep_interval].
412 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
413 listformats: Print an overview of available video formats and exit.
414 list_thumbnails: Print a table of all thumbnails and exit.
415 match_filter: A function that gets called for every video with the signature
416 (info_dict, *, incomplete: bool) -> Optional[str]
417 For backward compatibility with youtube-dl, the signature
418 (info_dict) -> Optional[str] is also allowed.
419 - If it returns a message, the video is ignored.
420 - If it returns None, the video is downloaded.
421 - If it returns utils.NO_DEFAULT, the user is interactively
422 asked whether to download the video.
423 - Raise utils.DownloadCancelled(msg) to abort remaining
424 downloads when a video is rejected.
425 match_filter_func in utils/_utils.py is one example for this.
426 color: A Dictionary with output stream names as keys
427 and their respective color policy as values.
428 Can also just be a single color policy,
429 in which case it applies to all outputs.
430 Valid stream names are 'stdout' and 'stderr'.
431 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
432 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
433 HTTP header
434 geo_bypass_country:
435 Two-letter ISO 3166-2 country code that will be used for
436 explicit geographic restriction bypassing via faking
437 X-Forwarded-For HTTP header
438 geo_bypass_ip_block:
439 IP range in CIDR notation that will be used similarly to
440 geo_bypass_country
441 external_downloader: A dictionary of protocol keys and the executable of the
442 external downloader to use for it. The allowed protocols
443 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
444 Set the value to 'native' to use the native downloader
445 compat_opts: Compatibility options. See "Differences in default behavior".
446 The following options do not work when used through the API:
447 filename, abort-on-error, multistreams, no-live-chat, format-sort
448 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
449 Refer __init__.py for their implementation
450 progress_template: Dictionary of templates for progress outputs.
451 Allowed keys are 'download', 'postprocess',
452 'download-title' (console title) and 'postprocess-title'.
453 The template is mapped on a dictionary with keys 'progress' and 'info'
454 retry_sleep_functions: Dictionary of functions that takes the number of attempts
455 as argument and returns the time to sleep in seconds.
456 Allowed keys are 'http', 'fragment', 'file_access'
457 download_ranges: A callback function that gets called for every video with
458 the signature (info_dict, ydl) -> Iterable[Section].
459 Only the returned sections will be downloaded.
460 Each Section is a dict with the following keys:
461 * start_time: Start time of the section in seconds
462 * end_time: End time of the section in seconds
463 * title: Section title (Optional)
464 * index: Section number (Optional)
465 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
466 noprogress: Do not print the progress bar
467 live_from_start: Whether to download livestreams videos from the start
468
469 The following parameters are not used by YoutubeDL itself, they are used by
470 the downloader (see yt_dlp/downloader/common.py):
471 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
472 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
473 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
474 external_downloader_args, concurrent_fragment_downloads.
475
476 The following options are used by the post processors:
477 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
478 to the binary or its containing directory.
479 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
480 and a list of additional command-line arguments for the
481 postprocessor/executable. The dict can also have "PP+EXE" keys
482 which are used when the given exe is used by the given PP.
483 Use 'default' as the name for arguments to passed to all PP
484 For compatibility with youtube-dl, a single list of args
485 can also be used
486
487 The following options are used by the extractors:
488 extractor_retries: Number of times to retry for known errors (default: 3)
489 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
490 hls_split_discontinuity: Split HLS playlists to different formats at
491 discontinuities such as ad breaks (default: False)
492 extractor_args: A dictionary of arguments to be passed to the extractors.
493 See "EXTRACTOR ARGUMENTS" for details.
494 E.g. {'youtube': {'skip': ['dash', 'hls']}}
495 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
496
497 The following options are deprecated and may be removed in the future:
498
499 break_on_reject: Stop the download process when encountering a video that
500 has been filtered out.
501 - `raise DownloadCancelled(msg)` in match_filter instead
502 force_generic_extractor: Force downloader to use the generic extractor
503 - Use allowed_extractors = ['generic', 'default']
504 playliststart: - Use playlist_items
505 Playlist item to start at.
506 playlistend: - Use playlist_items
507 Playlist item to end at.
508 playlistreverse: - Use playlist_items
509 Download playlist items in reverse order.
510 forceurl: - Use forceprint
511 Force printing final URL.
512 forcetitle: - Use forceprint
513 Force printing title.
514 forceid: - Use forceprint
515 Force printing ID.
516 forcethumbnail: - Use forceprint
517 Force printing thumbnail URL.
518 forcedescription: - Use forceprint
519 Force printing description.
520 forcefilename: - Use forceprint
521 Force printing final filename.
522 forceduration: - Use forceprint
523 Force printing duration.
524 allsubtitles: - Use subtitleslangs = ['all']
525 Downloads all the subtitles of the video
526 (requires writesubtitles or writeautomaticsub)
527 include_ads: - Doesn't work
528 Download ads as well
529 call_home: - Not implemented
530 Boolean, true iff we are allowed to contact the
531 yt-dlp servers for debugging.
532 post_hooks: - Register a custom postprocessor
533 A list of functions that get called as the final step
534 for each video file, after all postprocessors have been
535 called. The filename will be passed as the only argument.
536 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
537 Use the native HLS downloader instead of ffmpeg/avconv
538 if True, otherwise use ffmpeg/avconv if False, otherwise
539 use downloader suggested by extractor if None.
540 prefer_ffmpeg: - avconv support is deprecated
541 If False, use avconv instead of ffmpeg if both are available,
542 otherwise prefer ffmpeg.
543 youtube_include_dash_manifest: - Use extractor_args
544 If True (default), DASH manifests and related
545 data will be downloaded and processed by extractor.
546 You can reduce network I/O by disabling it if you don't
547 care about DASH. (only for youtube)
548 youtube_include_hls_manifest: - Use extractor_args
549 If True (default), HLS manifests and related
550 data will be downloaded and processed by extractor.
551 You can reduce network I/O by disabling it if you don't
552 care about HLS. (only for youtube)
553 no_color: Same as `color='no_color'`
554 no_overwrites: Same as `overwrites=False`
555 """
556
557 _NUMERIC_FIELDS = {
558 'width', 'height', 'asr', 'audio_channels', 'fps',
559 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
560 'timestamp', 'release_timestamp',
561 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
562 'average_rating', 'comment_count', 'age_limit',
563 'start_time', 'end_time',
564 'chapter_number', 'season_number', 'episode_number',
565 'track_number', 'disc_number', 'release_year',
566 }
567
568 _format_fields = {
569 # NB: Keep in sync with the docstring of extractor/common.py
570 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
571 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
572 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
573 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
574 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
575 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
576 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
577 }
578 _format_selection_exts = {
579 'audio': set(MEDIA_EXTENSIONS.common_audio),
580 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
581 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
582 }
583
584 def __init__(self, params=None, auto_init=True):
585 """Create a FileDownloader object with the given options.
586 @param auto_init Whether to load the default extractors and print header (if verbose).
587 Set to 'no_verbose_header' to not print the header
588 """
589 if params is None:
590 params = {}
591 self.params = params
592 self._ies = {}
593 self._ies_instances = {}
594 self._pps = {k: [] for k in POSTPROCESS_WHEN}
595 self._printed_messages = set()
596 self._first_webpage_request = True
597 self._post_hooks = []
598 self._progress_hooks = []
599 self._postprocessor_hooks = []
600 self._download_retcode = 0
601 self._num_downloads = 0
602 self._num_videos = 0
603 self._playlist_level = 0
604 self._playlist_urls = set()
605 self.cache = Cache(self)
606 self.__header_cookies = []
607
608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
615 )
616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
624 self.params.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
626 self.params['color'] = 'no_color'
627
628 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
629
630 def process_color_policy(stream):
631 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
632 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
633 if policy in ('auto', None):
634 return term_allow_color and supports_terminal_sequences(stream)
635 assert policy in ('always', 'never', 'no_color'), policy
636 return {'always': True, 'never': False}.get(policy, policy)
637
638 self._allow_colors = Namespace(**{
639 name: process_color_policy(stream)
640 for name, stream in self._out_files.items_ if name != 'console'
641 })
642
643 # The code is left like this to be reused for future deprecations
644 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
645 current_version = sys.version_info[:2]
646 if current_version < MIN_RECOMMENDED:
647 msg = ('Support for Python version %d.%d has been deprecated. '
648 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
649 '\n You will no longer receive updates on this version')
650 if current_version < MIN_SUPPORTED:
651 msg = 'Python version %d.%d is no longer supported'
652 self.deprecated_feature(
653 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
654
655 if self.params.get('allow_unplayable_formats'):
656 self.report_warning(
657 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
658 'This is a developer option intended for debugging. \n'
659 ' If you experience any issues while using this option, '
660 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
661
662 if self.params.get('bidi_workaround', False):
663 try:
664 import pty
665 master, slave = pty.openpty()
666 width = shutil.get_terminal_size().columns
667 width_args = [] if width is None else ['-w', str(width)]
668 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
669 try:
670 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
671 except OSError:
672 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
673 self._output_channel = os.fdopen(master, 'rb')
674 except OSError as ose:
675 if ose.errno == errno.ENOENT:
676 self.report_warning(
677 'Could not find fribidi executable, ignoring --bidi-workaround. '
678 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
679 else:
680 raise
681
682 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
683 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
684 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
685 self.params['http_headers'].pop('Cookie', None)
686 self._request_director = self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES)
687
688 if auto_init and auto_init != 'no_verbose_header':
689 self.print_debug_header()
690
691 def check_deprecated(param, option, suggestion):
692 if self.params.get(param) is not None:
693 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
694 return True
695 return False
696
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self.params.get('geo_verification_proxy') is None:
699 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
700
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
704
705 for msg in self.params.get('_warnings', []):
706 self.report_warning(msg)
707 for msg in self.params.get('_deprecation_warnings', []):
708 self.deprecated_feature(msg)
709
710 if 'list-formats' in self.params['compat_opts']:
711 self.params['listformats_table'] = False
712
713 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self.params['overwrites'] = not self.params['nooverwrites']
718 elif self.params.get('overwrites') is None:
719 self.params.pop('overwrites', None)
720 else:
721 self.params['nooverwrites'] = not self.params['overwrites']
722
723 if self.params.get('simulate') is None and any((
724 self.params.get('list_thumbnails'),
725 self.params.get('listformats'),
726 self.params.get('listsubtitles'),
727 )):
728 self.params['simulate'] = 'list_only'
729
730 self.params.setdefault('forceprint', {})
731 self.params.setdefault('print_to_file', {})
732
733 # Compatibility with older syntax
734 if not isinstance(params['forceprint'], dict):
735 self.params['forceprint'] = {'video': params['forceprint']}
736
737 if auto_init:
738 self.add_default_info_extractors()
739
740 if (sys.platform != 'win32'
741 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self.params.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
744 self.report_warning(
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self.params['restrictfilenames'] = True
749
750 self._parse_outtmpl()
751
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self.format_selector = (
754 self.params.get('format') if self.params.get('format') in (None, '-')
755 else self.params['format'] if callable(self.params['format'])
756 else self.build_format_selector(self.params['format']))
757
758 hooks = {
759 'post_hooks': self.add_post_hook,
760 'progress_hooks': self.add_progress_hook,
761 'postprocessor_hooks': self.add_postprocessor_hook,
762 }
763 for opt, fn in hooks.items():
764 for ph in self.params.get(opt, []):
765 fn(ph)
766
767 for pp_def_raw in self.params.get('postprocessors', []):
768 pp_def = dict(pp_def_raw)
769 when = pp_def.pop('when', 'post_process')
770 self.add_post_processor(
771 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
772 when=when)
773
774 def preload_download_archive(fn):
775 """Preload the archive, if any is specified"""
776 archive = set()
777 if fn is None:
778 return archive
779 elif not is_path_like(fn):
780 return fn
781
782 self.write_debug(f'Loading archive file {fn!r}')
783 try:
784 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
785 for line in archive_file:
786 archive.add(line.strip())
787 except OSError as ioe:
788 if ioe.errno != errno.ENOENT:
789 raise
790 return archive
791
792 self.archive = preload_download_archive(self.params.get('download_archive'))
793
794 def warn_if_short_id(self, argv):
795 # short YouTube ID starting with dash?
796 idxs = [
797 i for i, a in enumerate(argv)
798 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
799 if idxs:
800 correct_argv = (
801 ['yt-dlp']
802 + [a for i, a in enumerate(argv) if i not in idxs]
803 + ['--'] + [argv[i] for i in idxs]
804 )
805 self.report_warning(
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv))
809
810 def add_info_extractor(self, ie):
811 """Add an InfoExtractor object to the end of the list."""
812 ie_key = ie.ie_key()
813 self._ies[ie_key] = ie
814 if not isinstance(ie, type):
815 self._ies_instances[ie_key] = ie
816 ie.set_downloader(self)
817
818 def get_info_extractor(self, ie_key):
819 """
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
823 """
824 ie = self._ies_instances.get(ie_key)
825 if ie is None:
826 ie = get_info_extractor(ie_key)()
827 self.add_info_extractor(ie)
828 return ie
829
830 def add_default_info_extractors(self):
831 """
832 Add the InfoExtractors returned by gen_extractors to the end of the list
833 """
834 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies['end'] = UnsupportedURLIE()
836 try:
837 ie_names = orderedSet_from_options(
838 self.params.get('allowed_extractors', ['default']), {
839 'all': list(all_ies),
840 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
841 }, use_regex=True)
842 except re.error as e:
843 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
844 for name in ie_names:
845 self.add_info_extractor(all_ies[name])
846 self.write_debug(f'Loaded {len(ie_names)} extractors')
847
848 def add_post_processor(self, pp, when='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
851 self._pps[when].append(pp)
852 pp.set_downloader(self)
853
854 def add_post_hook(self, ph):
855 """Add the post hook"""
856 self._post_hooks.append(ph)
857
858 def add_progress_hook(self, ph):
859 """Add the download progress hook"""
860 self._progress_hooks.append(ph)
861
862 def add_postprocessor_hook(self, ph):
863 """Add the postprocessing progress hook"""
864 self._postprocessor_hooks.append(ph)
865 for pps in self._pps.values():
866 for pp in pps:
867 pp.add_progress_hook(ph)
868
869 def _bidi_workaround(self, message):
870 if not hasattr(self, '_output_channel'):
871 return message
872
873 assert hasattr(self, '_output_process')
874 assert isinstance(message, str)
875 line_count = message.count('\n') + 1
876 self._output_process.stdin.write((message + '\n').encode())
877 self._output_process.stdin.flush()
878 res = ''.join(self._output_channel.readline().decode()
879 for _ in range(line_count))
880 return res[:-len('\n')]
881
882 def _write_string(self, message, out=None, only_once=False):
883 if only_once:
884 if message in self._printed_messages:
885 return
886 self._printed_messages.add(message)
887 write_string(message, out=out, encoding=self.params.get('encoding'))
888
889 def to_stdout(self, message, skip_eol=False, quiet=None):
890 """Print message to stdout"""
891 if quiet is not None:
892 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol is not False:
895 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
898
899 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
900 """Print message to screen if not in quiet mode"""
901 if self.params.get('logger'):
902 self.params['logger'].debug(message)
903 return
904 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
905 return
906 self._write_string(
907 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
908 self._out_files.screen, only_once=only_once)
909
910 def to_stderr(self, message, only_once=False):
911 """Print message to stderr"""
912 assert isinstance(message, str)
913 if self.params.get('logger'):
914 self.params['logger'].error(message)
915 else:
916 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
917
918 def _send_console_code(self, code):
919 if compat_os_name == 'nt' or not self._out_files.console:
920 return
921 self._write_string(code, self._out_files.console)
922
923 def to_console_title(self, message):
924 if not self.params.get('consoletitle', False):
925 return
926 message = remove_terminal_sequences(message)
927 if compat_os_name == 'nt':
928 if ctypes.windll.kernel32.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
932 else:
933 self._send_console_code(f'\033]0;{message}\007')
934
935 def save_console_title(self):
936 if not self.params.get('consoletitle') or self.params.get('simulate'):
937 return
938 self._send_console_code('\033[22;0t') # Save the title on stack
939
940 def restore_console_title(self):
941 if not self.params.get('consoletitle') or self.params.get('simulate'):
942 return
943 self._send_console_code('\033[23;0t') # Restore the title from stack
944
945 def __enter__(self):
946 self.save_console_title()
947 return self
948
949 def save_cookies(self):
950 if self.params.get('cookiefile') is not None:
951 self.cookiejar.save()
952
953 def __exit__(self, *args):
954 self.restore_console_title()
955 self.close()
956
957 def close(self):
958 self.save_cookies()
959 self._request_director.close()
960
961 def trouble(self, message=None, tb=None, is_error=True):
962 """Determine action to take when a download problem appears.
963
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
967
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
970 """
971 if message is not None:
972 self.to_stderr(message)
973 if self.params.get('verbose'):
974 if tb is None:
975 if sys.exc_info()[0]: # if .trouble has been called from an except block
976 tb = ''
977 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
978 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
979 tb += encode_compat_str(traceback.format_exc())
980 else:
981 tb_data = traceback.format_list(traceback.extract_stack())
982 tb = ''.join(tb_data)
983 if tb:
984 self.to_stderr(tb)
985 if not is_error:
986 return
987 if not self.params.get('ignoreerrors'):
988 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
989 exc_info = sys.exc_info()[1].exc_info
990 else:
991 exc_info = sys.exc_info()
992 raise DownloadError(message, exc_info)
993 self._download_retcode = 1
994
995 Styles = Namespace(
996 HEADERS='yellow',
997 EMPHASIS='light blue',
998 FILENAME='green',
999 ID='green',
1000 DELIM='blue',
1001 ERROR='red',
1002 BAD_FORMAT='light red',
1003 WARNING='yellow',
1004 SUPPRESS='light black',
1005 )
1006
1007 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1008 text = str(text)
1009 if test_encoding:
1010 original_text = text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1013 text = text.encode(encoding, 'ignore').decode(encoding)
1014 if fallback is not None and text != original_text:
1015 text = fallback
1016 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1017
1018 def _format_out(self, *args, **kwargs):
1019 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1020
1021 def _format_screen(self, *args, **kwargs):
1022 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1023
1024 def _format_err(self, *args, **kwargs):
1025 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1026
1027 def report_warning(self, message, only_once=False):
1028 '''
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1031 '''
1032 if self.params.get('logger') is not None:
1033 self.params['logger'].warning(message)
1034 else:
1035 if self.params.get('no_warnings'):
1036 return
1037 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1038
1039 def deprecation_warning(self, message, *, stacklevel=0):
1040 deprecation_warning(
1041 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1042
1043 def deprecated_feature(self, message):
1044 if self.params.get('logger') is not None:
1045 self.params['logger'].warning(f'Deprecated Feature: {message}')
1046 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1047
1048 def report_error(self, message, *args, **kwargs):
1049 '''
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1052 '''
1053 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1054
1055 def write_debug(self, message, only_once=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self.params.get('verbose', False):
1058 return
1059 message = f'[debug] {message}'
1060 if self.params.get('logger'):
1061 self.params['logger'].debug(message)
1062 else:
1063 self.to_stderr(message, only_once)
1064
1065 def report_file_already_downloaded(self, file_name):
1066 """Report file has already been fully downloaded."""
1067 try:
1068 self.to_screen('[download] %s has already been downloaded' % file_name)
1069 except UnicodeEncodeError:
1070 self.to_screen('[download] The file has already been downloaded')
1071
1072 def report_file_delete(self, file_name):
1073 """Report that existing file will be deleted."""
1074 try:
1075 self.to_screen('Deleting existing file %s' % file_name)
1076 except UnicodeEncodeError:
1077 self.to_screen('Deleting existing file')
1078
1079 def raise_no_formats(self, info, forced=False, *, msg=None):
1080 has_drm = info.get('_has_drm')
1081 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1082 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1083 if forced or not ignored:
1084 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1085 expected=has_drm or ignored or expected)
1086 else:
1087 self.report_warning(msg)
1088
1089 def parse_outtmpl(self):
1090 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self._parse_outtmpl()
1092 return self.params['outtmpl']
1093
1094 def _parse_outtmpl(self):
1095 sanitize = IDENTITY
1096 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1098
1099 outtmpl = self.params.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl, dict):
1101 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1102 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1103
1104 def get_output_path(self, dir_type='', filename=None):
1105 paths = self.params.get('paths', {})
1106 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1107 path = os.path.join(
1108 expand_path(paths.get('home', '').strip()),
1109 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1110 filename or '')
1111 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1112
1113 @staticmethod
1114 def _outtmpl_expandpath(outtmpl):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep = ''.join(random.choices(string.ascii_letters, k=32))
1120 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1121
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl).replace(sep, '')
1127
1128 @staticmethod
1129 def escape_outtmpl(outtmpl):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1131 return re.sub(
1132 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1133 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1134 outtmpl)
1135
1136 @classmethod
1137 def validate_outtmpl(cls, outtmpl):
1138 ''' @return None or Exception object '''
1139 outtmpl = re.sub(
1140 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj: f'{mobj.group(0)[:-1]}s',
1142 cls._outtmpl_expandpath(outtmpl))
1143 try:
1144 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1145 return None
1146 except ValueError as err:
1147 return err
1148
1149 @staticmethod
1150 def _copy_infodict(info_dict):
1151 info_dict = dict(info_dict)
1152 info_dict.pop('__postprocessors', None)
1153 info_dict.pop('__pending_error', None)
1154 return info_dict
1155
1156 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1160 """
1161
1162 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1163
1164 info_dict = self._copy_infodict(info_dict)
1165 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1167 if info_dict.get('duration', None) is not None
1168 else None)
1169 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1170 info_dict['video_autonumber'] = self._num_videos
1171 if info_dict.get('resolution') is None:
1172 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1173
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map = {
1177 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1179 'autonumber': self.params.get('autonumber_size') or 5,
1180 }
1181
1182 TMPL_DICT = {}
1183 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1184 MATH_FUNCTIONS = {
1185 '+': float.__add__,
1186 '-': float.__sub__,
1187 }
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE,
1193 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1194 }
1195 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1197 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1198 (?P<negate>-)?
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1202 (?P<remaining>
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1206 )$''')
1207
1208 def _traverse_infodict(fields):
1209 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1210 for f in ([x] if x.startswith('{') else x.split('.'))]
1211 for i in (0, -1):
1212 if fields and not fields[i]:
1213 fields.pop(i)
1214
1215 for i, f in enumerate(fields):
1216 if not f.startswith('{'):
1217 continue
1218 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1219 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1220
1221 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1222
1223 def get_value(mdict):
1224 # Object traversal
1225 value = _traverse_infodict(mdict['fields'])
1226 # Negative
1227 if mdict['negate']:
1228 value = float_or_none(value)
1229 if value is not None:
1230 value *= -1
1231 # Do maths
1232 offset_key = mdict['maths']
1233 if offset_key:
1234 value = float_or_none(value)
1235 operator = None
1236 while offset_key:
1237 item = re.match(
1238 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1239 offset_key).group(0)
1240 offset_key = offset_key[len(item):]
1241 if operator is None:
1242 operator = MATH_FUNCTIONS[item]
1243 continue
1244 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1245 offset = float_or_none(item)
1246 if offset is None:
1247 offset = float_or_none(_traverse_infodict(item))
1248 try:
1249 value = operator(value, multiplier * offset)
1250 except (TypeError, ZeroDivisionError):
1251 return None
1252 operator = None
1253 # Datetime formatting
1254 if mdict['strf_format']:
1255 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1256
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize and value == '':
1259 value = None
1260 return value
1261
1262 na = self.params.get('outtmpl_na_placeholder', 'NA')
1263
1264 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1265 return sanitize_filename(str(value), restricted=restricted, is_id=(
1266 bool(re.search(r'(^|[_.])id(\.|$)', key))
1267 if 'filename-sanitization' in self.params['compat_opts']
1268 else NO_DEFAULT))
1269
1270 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1271 sanitize = bool(sanitize)
1272
1273 def _dumpjson_default(obj):
1274 if isinstance(obj, (set, LazyList)):
1275 return list(obj)
1276 return repr(obj)
1277
1278 class _ReplacementFormatter(string.Formatter):
1279 def get_field(self, field_name, args, kwargs):
1280 if field_name.isdigit():
1281 return args[0], -1
1282 raise ValueError('Unsupported field')
1283
1284 replacement_formatter = _ReplacementFormatter()
1285
1286 def create_key(outer_mobj):
1287 if not outer_mobj.group('has_key'):
1288 return outer_mobj.group(0)
1289 key = outer_mobj.group('key')
1290 mobj = re.match(INTERNAL_FORMAT_RE, key)
1291 value, replacement, default, last_field = None, None, na, ''
1292 while mobj:
1293 mobj = mobj.groupdict()
1294 default = mobj['default'] if mobj['default'] is not None else default
1295 value = get_value(mobj)
1296 last_field, replacement = mobj['fields'], mobj['replacement']
1297 if value is None and mobj['alternate']:
1298 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1299 else:
1300 break
1301
1302 if None not in (value, replacement):
1303 try:
1304 value = replacement_formatter.format(replacement, value)
1305 except ValueError:
1306 value, default = None, na
1307
1308 fmt = outer_mobj.group('format')
1309 if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1310 fmt = f'0{field_size_compat_map[last_field]:d}d'
1311
1312 flags = outer_mobj.group('conversion') or ''
1313 str_fmt = f'{fmt[:-1]}s'
1314 if value is None:
1315 value, fmt = default, 's'
1316 elif fmt[-1] == 'l': # list
1317 delim = '\n' if '#' in flags else ', '
1318 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1319 elif fmt[-1] == 'j': # json
1320 value, fmt = json.dumps(
1321 value, default=_dumpjson_default,
1322 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1323 elif fmt[-1] == 'h': # html
1324 value, fmt = escapeHTML(str(value)), str_fmt
1325 elif fmt[-1] == 'q': # quoted
1326 value = map(str, variadic(value) if '#' in flags else [value])
1327 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1328 elif fmt[-1] == 'B': # bytes
1329 value = f'%{str_fmt}'.encode() % str(value).encode()
1330 value, fmt = value.decode('utf-8', 'ignore'), 's'
1331 elif fmt[-1] == 'U': # unicode normalized
1332 value, fmt = unicodedata.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1335 value), str_fmt
1336 elif fmt[-1] == 'D': # decimal suffix
1337 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1338 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1339 factor=1024 if '#' in flags else 1000)
1340 elif fmt[-1] == 'S': # filename sanitization
1341 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1342 elif fmt[-1] == 'c':
1343 if value:
1344 value = str(value)[0]
1345 else:
1346 fmt = str_fmt
1347 elif fmt[-1] not in 'rsa': # numeric
1348 value = float_or_none(value)
1349 if value is None:
1350 value, fmt = default, 's'
1351
1352 if sanitize:
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1355 if fmt[-1] == 'r':
1356 value, fmt = repr(value), str_fmt
1357 elif fmt[-1] == 'a':
1358 value, fmt = ascii(value), str_fmt
1359 if fmt[-1] in 'csra':
1360 value = sanitizer(last_field, value)
1361
1362 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1363 TMPL_DICT[key] = value
1364 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1365
1366 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1367
1368 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1369 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1370 return self.escape_outtmpl(outtmpl) % info_dict
1371
1372 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1373 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1374 if outtmpl is None:
1375 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1376 try:
1377 outtmpl = self._outtmpl_expandpath(outtmpl)
1378 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1379 if not filename:
1380 return None
1381
1382 if tmpl_type in ('', 'temp'):
1383 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1384 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1385 filename = replace_extension(filename, ext, final_ext)
1386 elif tmpl_type:
1387 force_ext = OUTTMPL_TYPES[tmpl_type]
1388 if force_ext:
1389 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1390
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name = self.params.get('trim_file_name', False)
1393 if trim_file_name:
1394 no_ext, *ext = filename.rsplit('.', 2)
1395 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1396
1397 return filename
1398 except ValueError as err:
1399 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1400 return None
1401
1402 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1403 """Generate the output filename"""
1404 if outtmpl:
1405 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1406 dir_type = None
1407 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1408 if not filename and dir_type not in ('', 'temp'):
1409 return ''
1410
1411 if warn:
1412 if not self.params.get('paths'):
1413 pass
1414 elif filename == '-':
1415 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1416 elif os.path.isabs(filename):
1417 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1418 if filename == '-' or not filename:
1419 return filename
1420
1421 return self.get_output_path(dir_type, filename)
1422
1423 def _match_entry(self, info_dict, incomplete=False, silent=False):
1424 """Returns None if the file should be downloaded"""
1425 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1426 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1427
1428 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1429
1430 def check_filter():
1431 if _type in ('playlist', 'multi_video'):
1432 return
1433 elif _type in ('url', 'url_transparent') and not try_call(
1434 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1435 return
1436
1437 if 'title' in info_dict:
1438 # This can happen when we're just evaluating the playlist
1439 title = info_dict['title']
1440 matchtitle = self.params.get('matchtitle', False)
1441 if matchtitle:
1442 if not re.search(matchtitle, title, re.IGNORECASE):
1443 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1444 rejecttitle = self.params.get('rejecttitle', False)
1445 if rejecttitle:
1446 if re.search(rejecttitle, title, re.IGNORECASE):
1447 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1448
1449 date = info_dict.get('upload_date')
1450 if date is not None:
1451 dateRange = self.params.get('daterange', DateRange())
1452 if date not in dateRange:
1453 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count = info_dict.get('view_count')
1455 if view_count is not None:
1456 min_views = self.params.get('min_views')
1457 if min_views is not None and view_count < min_views:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1459 max_views = self.params.get('max_views')
1460 if max_views is not None and view_count > max_views:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1462 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1464
1465 match_filter = self.params.get('match_filter')
1466 if match_filter is None:
1467 return None
1468
1469 cancelled = None
1470 try:
1471 try:
1472 ret = match_filter(info_dict, incomplete=incomplete)
1473 except TypeError:
1474 # For backward compatibility
1475 ret = None if incomplete else match_filter(info_dict)
1476 except DownloadCancelled as err:
1477 if err.msg is not NO_DEFAULT:
1478 raise
1479 ret, cancelled = err.msg, err
1480
1481 if ret is NO_DEFAULT:
1482 while True:
1483 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1484 reply = input(self._format_screen(
1485 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1486 if reply in {'y', ''}:
1487 return None
1488 elif reply == 'n':
1489 if cancelled:
1490 raise type(cancelled)(f'Skipping {video_title}')
1491 return f'Skipping {video_title}'
1492 return ret
1493
1494 if self.in_download_archive(info_dict):
1495 reason = ''.join((
1496 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1497 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1498 'has already been recorded in the archive'))
1499 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1500 else:
1501 try:
1502 reason = check_filter()
1503 except DownloadCancelled as e:
1504 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1505 else:
1506 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1507 if reason is not None:
1508 if not silent:
1509 self.to_screen('[download] ' + reason)
1510 if self.params.get(break_opt, False):
1511 raise break_err()
1512 return reason
1513
1514 @staticmethod
1515 def add_extra_info(info_dict, extra_info):
1516 '''Set the keys from extra_info in info dict if they are missing'''
1517 for key, value in extra_info.items():
1518 info_dict.setdefault(key, value)
1519
1520 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1521 process=True, force_generic_extractor=False):
1522 """
1523 Extract and return the information dictionary of the URL
1524
1525 Arguments:
1526 @param url URL to extract
1527
1528 Keyword arguments:
1529 @param download Whether to download videos
1530 @param process Whether to resolve all unresolved references (URLs, playlist items).
1531 Must be True for download to work
1532 @param ie_key Use only the extractor with this key
1533
1534 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1535 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1536 """
1537
1538 if extra_info is None:
1539 extra_info = {}
1540
1541 if not ie_key and force_generic_extractor:
1542 ie_key = 'Generic'
1543
1544 if ie_key:
1545 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1546 else:
1547 ies = self._ies
1548
1549 for key, ie in ies.items():
1550 if not ie.suitable(url):
1551 continue
1552
1553 if not ie.working():
1554 self.report_warning('The program functionality for this site has been marked as broken, '
1555 'and will probably not work.')
1556
1557 temp_id = ie.get_temp_id(url)
1558 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1559 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1560 'has already been recorded in the archive')
1561 if self.params.get('break_on_existing', False):
1562 raise ExistingVideoReached()
1563 break
1564 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1565 else:
1566 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1567 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1568 tb=False if extractors_restricted else None)
1569
1570 def _handle_extraction_exceptions(func):
1571 @functools.wraps(func)
1572 def wrapper(self, *args, **kwargs):
1573 while True:
1574 try:
1575 return func(self, *args, **kwargs)
1576 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1577 raise
1578 except ReExtractInfo as e:
1579 if e.expected:
1580 self.to_screen(f'{e}; Re-extracting data')
1581 else:
1582 self.to_stderr('\r')
1583 self.report_warning(f'{e}; Re-extracting data')
1584 continue
1585 except GeoRestrictedError as e:
1586 msg = e.msg
1587 if e.countries:
1588 msg += '\nThis video is available in %s.' % ', '.join(
1589 map(ISO3166Utils.short2full, e.countries))
1590 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1591 self.report_error(msg)
1592 except ExtractorError as e: # An error we somewhat expected
1593 self.report_error(str(e), e.format_traceback())
1594 except Exception as e:
1595 if self.params.get('ignoreerrors'):
1596 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1597 else:
1598 raise
1599 break
1600 return wrapper
1601
1602 def _wait_for_video(self, ie_result={}):
1603 if (not self.params.get('wait_for_video')
1604 or ie_result.get('_type', 'video') != 'video'
1605 or ie_result.get('formats') or ie_result.get('url')):
1606 return
1607
1608 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1609 last_msg = ''
1610
1611 def progress(msg):
1612 nonlocal last_msg
1613 full_msg = f'{msg}\n'
1614 if not self.params.get('noprogress'):
1615 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1616 elif last_msg:
1617 return
1618 self.to_screen(full_msg, skip_eol=True)
1619 last_msg = msg
1620
1621 min_wait, max_wait = self.params.get('wait_for_video')
1622 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1623 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1624 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1625 self.report_warning('Release time of video is not known')
1626 elif ie_result and (diff or 0) <= 0:
1627 self.report_warning('Video should already be available according to extracted info')
1628 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1629 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1630
1631 wait_till = time.time() + diff
1632 try:
1633 while True:
1634 diff = wait_till - time.time()
1635 if diff <= 0:
1636 progress('')
1637 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1638 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1639 time.sleep(1)
1640 except KeyboardInterrupt:
1641 progress('')
1642 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1643 except BaseException as e:
1644 if not isinstance(e, ReExtractInfo):
1645 self.to_screen('')
1646 raise
1647
1648 def _load_cookies(self, data, *, autoscope=True):
1649 """Loads cookies from a `Cookie` header
1650
1651 This tries to work around the security vulnerability of passing cookies to every domain.
1652 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1653
1654 @param data The Cookie header as string to load the cookies from
1655 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1656 If `True`, save cookies for later to be stored in the jar with a limited scope
1657 If a URL, save cookies in the jar with the domain of the URL
1658 """
1659 for cookie in LenientSimpleCookie(data).values():
1660 if autoscope and any(cookie.values()):
1661 raise ValueError('Invalid syntax in Cookie Header')
1662
1663 domain = cookie.get('domain') or ''
1664 expiry = cookie.get('expires')
1665 if expiry == '': # 0 is valid
1666 expiry = None
1667 prepared_cookie = http.cookiejar.Cookie(
1668 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1669 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1670 cookie.get('secure') or False, expiry, False, None, None, {})
1671
1672 if domain:
1673 self.cookiejar.set_cookie(prepared_cookie)
1674 elif autoscope is True:
1675 self.deprecated_feature(
1676 'Passing cookies as a header is a potential security risk; '
1677 'they will be scoped to the domain of the downloaded urls. '
1678 'Please consider loading cookies from a file or browser instead.')
1679 self.__header_cookies.append(prepared_cookie)
1680 elif autoscope:
1681 self.report_warning(
1682 'The extractor result contains an unscoped cookie as an HTTP header. '
1683 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1684 only_once=True)
1685 self._apply_header_cookies(autoscope, [prepared_cookie])
1686 else:
1687 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1688 tb=False, is_error=False)
1689
1690 def _apply_header_cookies(self, url, cookies=None):
1691 """Applies stray header cookies to the provided url
1692
1693 This loads header cookies and scopes them to the domain provided in `url`.
1694 While this is not ideal, it helps reduce the risk of them being sent
1695 to an unintended destination while mostly maintaining compatibility.
1696 """
1697 parsed = urllib.parse.urlparse(url)
1698 if not parsed.hostname:
1699 return
1700
1701 for cookie in map(copy.copy, cookies or self.__header_cookies):
1702 cookie.domain = f'.{parsed.hostname}'
1703 self.cookiejar.set_cookie(cookie)
1704
1705 @_handle_extraction_exceptions
1706 def __extract_info(self, url, ie, download, extra_info, process):
1707 self._apply_header_cookies(url)
1708
1709 try:
1710 ie_result = ie.extract(url)
1711 except UserNotLive as e:
1712 if process:
1713 if self.params.get('wait_for_video'):
1714 self.report_warning(e)
1715 self._wait_for_video()
1716 raise
1717 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1718 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1719 return
1720 if isinstance(ie_result, list):
1721 # Backwards compatibility: old IE result format
1722 ie_result = {
1723 '_type': 'compat_list',
1724 'entries': ie_result,
1725 }
1726 if extra_info.get('original_url'):
1727 ie_result.setdefault('original_url', extra_info['original_url'])
1728 self.add_default_extra_info(ie_result, ie, url)
1729 if process:
1730 self._wait_for_video(ie_result)
1731 return self.process_ie_result(ie_result, download, extra_info)
1732 else:
1733 return ie_result
1734
1735 def add_default_extra_info(self, ie_result, ie, url):
1736 if url is not None:
1737 self.add_extra_info(ie_result, {
1738 'webpage_url': url,
1739 'original_url': url,
1740 })
1741 webpage_url = ie_result.get('webpage_url')
1742 if webpage_url:
1743 self.add_extra_info(ie_result, {
1744 'webpage_url_basename': url_basename(webpage_url),
1745 'webpage_url_domain': get_domain(webpage_url),
1746 })
1747 if ie is not None:
1748 self.add_extra_info(ie_result, {
1749 'extractor': ie.IE_NAME,
1750 'extractor_key': ie.ie_key(),
1751 })
1752
1753 def process_ie_result(self, ie_result, download=True, extra_info=None):
1754 """
1755 Take the result of the ie(may be modified) and resolve all unresolved
1756 references (URLs, playlist items).
1757
1758 It will also download the videos if 'download'.
1759 Returns the resolved ie_result.
1760 """
1761 if extra_info is None:
1762 extra_info = {}
1763 result_type = ie_result.get('_type', 'video')
1764
1765 if result_type in ('url', 'url_transparent'):
1766 ie_result['url'] = sanitize_url(
1767 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1768 if ie_result.get('original_url') and not extra_info.get('original_url'):
1769 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1770
1771 extract_flat = self.params.get('extract_flat', False)
1772 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1773 or extract_flat is True):
1774 info_copy = ie_result.copy()
1775 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1776 if ie and not ie_result.get('id'):
1777 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1778 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1779 self.add_extra_info(info_copy, extra_info)
1780 info_copy, _ = self.pre_process(info_copy)
1781 self._fill_common_fields(info_copy, False)
1782 self.__forced_printings(info_copy)
1783 self._raise_pending_errors(info_copy)
1784 if self.params.get('force_write_download_archive', False):
1785 self.record_download_archive(info_copy)
1786 return ie_result
1787
1788 if result_type == 'video':
1789 self.add_extra_info(ie_result, extra_info)
1790 ie_result = self.process_video_result(ie_result, download=download)
1791 self._raise_pending_errors(ie_result)
1792 additional_urls = (ie_result or {}).get('additional_urls')
1793 if additional_urls:
1794 # TODO: Improve MetadataParserPP to allow setting a list
1795 if isinstance(additional_urls, str):
1796 additional_urls = [additional_urls]
1797 self.to_screen(
1798 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1799 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1800 ie_result['additional_entries'] = [
1801 self.extract_info(
1802 url, download, extra_info=extra_info,
1803 force_generic_extractor=self.params.get('force_generic_extractor'))
1804 for url in additional_urls
1805 ]
1806 return ie_result
1807 elif result_type == 'url':
1808 # We have to add extra_info to the results because it may be
1809 # contained in a playlist
1810 return self.extract_info(
1811 ie_result['url'], download,
1812 ie_key=ie_result.get('ie_key'),
1813 extra_info=extra_info)
1814 elif result_type == 'url_transparent':
1815 # Use the information from the embedding page
1816 info = self.extract_info(
1817 ie_result['url'], ie_key=ie_result.get('ie_key'),
1818 extra_info=extra_info, download=False, process=False)
1819
1820 # extract_info may return None when ignoreerrors is enabled and
1821 # extraction failed with an error, don't crash and return early
1822 # in this case
1823 if not info:
1824 return info
1825
1826 exempted_fields = {'_type', 'url', 'ie_key'}
1827 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1828 # For video clips, the id etc of the clip extractor should be used
1829 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1830
1831 new_result = info.copy()
1832 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1833
1834 # Extracted info may not be a video result (i.e.
1835 # info.get('_type', 'video') != video) but rather an url or
1836 # url_transparent. In such cases outer metadata (from ie_result)
1837 # should be propagated to inner one (info). For this to happen
1838 # _type of info should be overridden with url_transparent. This
1839 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1840 if new_result.get('_type') == 'url':
1841 new_result['_type'] = 'url_transparent'
1842
1843 return self.process_ie_result(
1844 new_result, download=download, extra_info=extra_info)
1845 elif result_type in ('playlist', 'multi_video'):
1846 # Protect from infinite recursion due to recursively nested playlists
1847 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1848 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1849 if webpage_url and webpage_url in self._playlist_urls:
1850 self.to_screen(
1851 '[download] Skipping already downloaded playlist: %s'
1852 % ie_result.get('title') or ie_result.get('id'))
1853 return
1854
1855 self._playlist_level += 1
1856 self._playlist_urls.add(webpage_url)
1857 self._fill_common_fields(ie_result, False)
1858 self._sanitize_thumbnails(ie_result)
1859 try:
1860 return self.__process_playlist(ie_result, download)
1861 finally:
1862 self._playlist_level -= 1
1863 if not self._playlist_level:
1864 self._playlist_urls.clear()
1865 elif result_type == 'compat_list':
1866 self.report_warning(
1867 'Extractor %s returned a compat_list result. '
1868 'It needs to be updated.' % ie_result.get('extractor'))
1869
1870 def _fixup(r):
1871 self.add_extra_info(r, {
1872 'extractor': ie_result['extractor'],
1873 'webpage_url': ie_result['webpage_url'],
1874 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1875 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1876 'extractor_key': ie_result['extractor_key'],
1877 })
1878 return r
1879 ie_result['entries'] = [
1880 self.process_ie_result(_fixup(r), download, extra_info)
1881 for r in ie_result['entries']
1882 ]
1883 return ie_result
1884 else:
1885 raise Exception('Invalid result type: %s' % result_type)
1886
1887 def _ensure_dir_exists(self, path):
1888 return make_dir(path, self.report_error)
1889
1890 @staticmethod
1891 def _playlist_infodict(ie_result, strict=False, **kwargs):
1892 info = {
1893 'playlist_count': ie_result.get('playlist_count'),
1894 'playlist': ie_result.get('title') or ie_result.get('id'),
1895 'playlist_id': ie_result.get('id'),
1896 'playlist_title': ie_result.get('title'),
1897 'playlist_uploader': ie_result.get('uploader'),
1898 'playlist_uploader_id': ie_result.get('uploader_id'),
1899 **kwargs,
1900 }
1901 if strict:
1902 return info
1903 if ie_result.get('webpage_url'):
1904 info.update({
1905 'webpage_url': ie_result['webpage_url'],
1906 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1907 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1908 })
1909 return {
1910 **info,
1911 'playlist_index': 0,
1912 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1913 'extractor': ie_result['extractor'],
1914 'extractor_key': ie_result['extractor_key'],
1915 }
1916
1917 def __process_playlist(self, ie_result, download):
1918 """Process each entry in the playlist"""
1919 assert ie_result['_type'] in ('playlist', 'multi_video')
1920
1921 common_info = self._playlist_infodict(ie_result, strict=True)
1922 title = common_info.get('playlist') or '<Untitled>'
1923 if self._match_entry(common_info, incomplete=True) is not None:
1924 return
1925 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1926
1927 all_entries = PlaylistEntries(self, ie_result)
1928 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1929
1930 lazy = self.params.get('lazy_playlist')
1931 if lazy:
1932 resolved_entries, n_entries = [], 'N/A'
1933 ie_result['requested_entries'], ie_result['entries'] = None, None
1934 else:
1935 entries = resolved_entries = list(entries)
1936 n_entries = len(resolved_entries)
1937 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1938 if not ie_result.get('playlist_count'):
1939 # Better to do this after potentially exhausting entries
1940 ie_result['playlist_count'] = all_entries.get_full_count()
1941
1942 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1943 ie_copy = collections.ChainMap(ie_result, extra)
1944
1945 _infojson_written = False
1946 write_playlist_files = self.params.get('allow_playlist_files', True)
1947 if write_playlist_files and self.params.get('list_thumbnails'):
1948 self.list_thumbnails(ie_result)
1949 if write_playlist_files and not self.params.get('simulate'):
1950 _infojson_written = self._write_info_json(
1951 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1952 if _infojson_written is None:
1953 return
1954 if self._write_description('playlist', ie_result,
1955 self.prepare_filename(ie_copy, 'pl_description')) is None:
1956 return
1957 # TODO: This should be passed to ThumbnailsConvertor if necessary
1958 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1959
1960 if lazy:
1961 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1962 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1963 elif self.params.get('playlistreverse'):
1964 entries.reverse()
1965 elif self.params.get('playlistrandom'):
1966 random.shuffle(entries)
1967
1968 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1969 f'{format_field(ie_result, "playlist_count", " of %s")}')
1970
1971 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1972 if self.params.get('extract_flat') == 'discard_in_playlist':
1973 keep_resolved_entries = ie_result['_type'] != 'playlist'
1974 if keep_resolved_entries:
1975 self.write_debug('The information of all playlist entries will be held in memory')
1976
1977 failures = 0
1978 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1979 for i, (playlist_index, entry) in enumerate(entries):
1980 if lazy:
1981 resolved_entries.append((playlist_index, entry))
1982 if not entry:
1983 continue
1984
1985 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1986 if not lazy and 'playlist-index' in self.params['compat_opts']:
1987 playlist_index = ie_result['requested_entries'][i]
1988
1989 entry_copy = collections.ChainMap(entry, {
1990 **common_info,
1991 'n_entries': int_or_none(n_entries),
1992 'playlist_index': playlist_index,
1993 'playlist_autonumber': i + 1,
1994 })
1995
1996 if self._match_entry(entry_copy, incomplete=True) is not None:
1997 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1998 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1999 continue
2000
2001 self.to_screen('[download] Downloading item %s of %s' % (
2002 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2003
2004 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2005 'playlist_index': playlist_index,
2006 'playlist_autonumber': i + 1,
2007 }, extra))
2008 if not entry_result:
2009 failures += 1
2010 if failures >= max_failures:
2011 self.report_error(
2012 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2013 break
2014 if keep_resolved_entries:
2015 resolved_entries[i] = (playlist_index, entry_result)
2016
2017 # Update with processed data
2018 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2019 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2020 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2021 # Do not set for full playlist
2022 ie_result.pop('requested_entries')
2023
2024 # Write the updated info to json
2025 if _infojson_written is True and self._write_info_json(
2026 'updated playlist', ie_result,
2027 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2028 return
2029
2030 ie_result = self.run_all_pps('playlist', ie_result)
2031 self.to_screen(f'[download] Finished downloading playlist: {title}')
2032 return ie_result
2033
2034 @_handle_extraction_exceptions
2035 def __process_iterable_entry(self, entry, download, extra_info):
2036 return self.process_ie_result(
2037 entry, download=download, extra_info=extra_info)
2038
2039 def _build_format_filter(self, filter_spec):
2040 " Returns a function to filter the formats according to the filter_spec "
2041
2042 OPERATORS = {
2043 '<': operator.lt,
2044 '<=': operator.le,
2045 '>': operator.gt,
2046 '>=': operator.ge,
2047 '=': operator.eq,
2048 '!=': operator.ne,
2049 }
2050 operator_rex = re.compile(r'''(?x)\s*
2051 (?P<key>[\w.-]+)\s*
2052 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2053 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2054 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2055 m = operator_rex.fullmatch(filter_spec)
2056 if m:
2057 try:
2058 comparison_value = int(m.group('value'))
2059 except ValueError:
2060 comparison_value = parse_filesize(m.group('value'))
2061 if comparison_value is None:
2062 comparison_value = parse_filesize(m.group('value') + 'B')
2063 if comparison_value is None:
2064 raise ValueError(
2065 'Invalid value %r in format specification %r' % (
2066 m.group('value'), filter_spec))
2067 op = OPERATORS[m.group('op')]
2068
2069 if not m:
2070 STR_OPERATORS = {
2071 '=': operator.eq,
2072 '^=': lambda attr, value: attr.startswith(value),
2073 '$=': lambda attr, value: attr.endswith(value),
2074 '*=': lambda attr, value: value in attr,
2075 '~=': lambda attr, value: value.search(attr) is not None
2076 }
2077 str_operator_rex = re.compile(r'''(?x)\s*
2078 (?P<key>[a-zA-Z0-9._-]+)\s*
2079 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2080 (?P<quote>["'])?
2081 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2082 (?(quote)(?P=quote))\s*
2083 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2084 m = str_operator_rex.fullmatch(filter_spec)
2085 if m:
2086 if m.group('op') == '~=':
2087 comparison_value = re.compile(m.group('value'))
2088 else:
2089 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2090 str_op = STR_OPERATORS[m.group('op')]
2091 if m.group('negation'):
2092 op = lambda attr, value: not str_op(attr, value)
2093 else:
2094 op = str_op
2095
2096 if not m:
2097 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2098
2099 def _filter(f):
2100 actual_value = f.get(m.group('key'))
2101 if actual_value is None:
2102 return m.group('none_inclusive')
2103 return op(actual_value, comparison_value)
2104 return _filter
2105
2106 def _check_formats(self, formats):
2107 for f in formats:
2108 self.to_screen('[info] Testing format %s' % f['format_id'])
2109 path = self.get_output_path('temp')
2110 if not self._ensure_dir_exists(f'{path}/'):
2111 continue
2112 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2113 temp_file.close()
2114 try:
2115 success, _ = self.dl(temp_file.name, f, test=True)
2116 except (DownloadError, OSError, ValueError) + network_exceptions:
2117 success = False
2118 finally:
2119 if os.path.exists(temp_file.name):
2120 try:
2121 os.remove(temp_file.name)
2122 except OSError:
2123 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2124 if success:
2125 yield f
2126 else:
2127 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2128
2129 def _default_format_spec(self, info_dict, download=True):
2130
2131 def can_merge():
2132 merger = FFmpegMergerPP(self)
2133 return merger.available and merger.can_merge()
2134
2135 prefer_best = (
2136 not self.params.get('simulate')
2137 and download
2138 and (
2139 not can_merge()
2140 or info_dict.get('is_live') and not self.params.get('live_from_start')
2141 or self.params['outtmpl']['default'] == '-'))
2142 compat = (
2143 prefer_best
2144 or self.params.get('allow_multiple_audio_streams', False)
2145 or 'format-spec' in self.params['compat_opts'])
2146
2147 return (
2148 'best/bestvideo+bestaudio' if prefer_best
2149 else 'bestvideo*+bestaudio/best' if not compat
2150 else 'bestvideo+bestaudio/best')
2151
2152 def build_format_selector(self, format_spec):
2153 def syntax_error(note, start):
2154 message = (
2155 'Invalid format specification: '
2156 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2157 return SyntaxError(message)
2158
2159 PICKFIRST = 'PICKFIRST'
2160 MERGE = 'MERGE'
2161 SINGLE = 'SINGLE'
2162 GROUP = 'GROUP'
2163 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2164
2165 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2166 'video': self.params.get('allow_multiple_video_streams', False)}
2167
2168 def _parse_filter(tokens):
2169 filter_parts = []
2170 for type, string_, start, _, _ in tokens:
2171 if type == tokenize.OP and string_ == ']':
2172 return ''.join(filter_parts)
2173 else:
2174 filter_parts.append(string_)
2175
2176 def _remove_unused_ops(tokens):
2177 # Remove operators that we don't use and join them with the surrounding strings.
2178 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2179 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2180 last_string, last_start, last_end, last_line = None, None, None, None
2181 for type, string_, start, end, line in tokens:
2182 if type == tokenize.OP and string_ == '[':
2183 if last_string:
2184 yield tokenize.NAME, last_string, last_start, last_end, last_line
2185 last_string = None
2186 yield type, string_, start, end, line
2187 # everything inside brackets will be handled by _parse_filter
2188 for type, string_, start, end, line in tokens:
2189 yield type, string_, start, end, line
2190 if type == tokenize.OP and string_ == ']':
2191 break
2192 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2193 if last_string:
2194 yield tokenize.NAME, last_string, last_start, last_end, last_line
2195 last_string = None
2196 yield type, string_, start, end, line
2197 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2198 if not last_string:
2199 last_string = string_
2200 last_start = start
2201 last_end = end
2202 else:
2203 last_string += string_
2204 if last_string:
2205 yield tokenize.NAME, last_string, last_start, last_end, last_line
2206
2207 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2208 selectors = []
2209 current_selector = None
2210 for type, string_, start, _, _ in tokens:
2211 # ENCODING is only defined in python 3.x
2212 if type == getattr(tokenize, 'ENCODING', None):
2213 continue
2214 elif type in [tokenize.NAME, tokenize.NUMBER]:
2215 current_selector = FormatSelector(SINGLE, string_, [])
2216 elif type == tokenize.OP:
2217 if string_ == ')':
2218 if not inside_group:
2219 # ')' will be handled by the parentheses group
2220 tokens.restore_last_token()
2221 break
2222 elif inside_merge and string_ in ['/', ',']:
2223 tokens.restore_last_token()
2224 break
2225 elif inside_choice and string_ == ',':
2226 tokens.restore_last_token()
2227 break
2228 elif string_ == ',':
2229 if not current_selector:
2230 raise syntax_error('"," must follow a format selector', start)
2231 selectors.append(current_selector)
2232 current_selector = None
2233 elif string_ == '/':
2234 if not current_selector:
2235 raise syntax_error('"/" must follow a format selector', start)
2236 first_choice = current_selector
2237 second_choice = _parse_format_selection(tokens, inside_choice=True)
2238 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2239 elif string_ == '[':
2240 if not current_selector:
2241 current_selector = FormatSelector(SINGLE, 'best', [])
2242 format_filter = _parse_filter(tokens)
2243 current_selector.filters.append(format_filter)
2244 elif string_ == '(':
2245 if current_selector:
2246 raise syntax_error('Unexpected "("', start)
2247 group = _parse_format_selection(tokens, inside_group=True)
2248 current_selector = FormatSelector(GROUP, group, [])
2249 elif string_ == '+':
2250 if not current_selector:
2251 raise syntax_error('Unexpected "+"', start)
2252 selector_1 = current_selector
2253 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2254 if not selector_2:
2255 raise syntax_error('Expected a selector', start)
2256 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2257 else:
2258 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2259 elif type == tokenize.ENDMARKER:
2260 break
2261 if current_selector:
2262 selectors.append(current_selector)
2263 return selectors
2264
2265 def _merge(formats_pair):
2266 format_1, format_2 = formats_pair
2267
2268 formats_info = []
2269 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2270 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2271
2272 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2273 get_no_more = {'video': False, 'audio': False}
2274 for (i, fmt_info) in enumerate(formats_info):
2275 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2276 formats_info.pop(i)
2277 continue
2278 for aud_vid in ['audio', 'video']:
2279 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2280 if get_no_more[aud_vid]:
2281 formats_info.pop(i)
2282 break
2283 get_no_more[aud_vid] = True
2284
2285 if len(formats_info) == 1:
2286 return formats_info[0]
2287
2288 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2289 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2290
2291 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2292 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2293
2294 output_ext = get_compatible_ext(
2295 vcodecs=[f.get('vcodec') for f in video_fmts],
2296 acodecs=[f.get('acodec') for f in audio_fmts],
2297 vexts=[f['ext'] for f in video_fmts],
2298 aexts=[f['ext'] for f in audio_fmts],
2299 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2300 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2301
2302 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2303
2304 new_dict = {
2305 'requested_formats': formats_info,
2306 'format': '+'.join(filtered('format')),
2307 'format_id': '+'.join(filtered('format_id')),
2308 'ext': output_ext,
2309 'protocol': '+'.join(map(determine_protocol, formats_info)),
2310 'language': '+'.join(orderedSet(filtered('language'))) or None,
2311 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2312 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2313 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2314 }
2315
2316 if the_only_video:
2317 new_dict.update({
2318 'width': the_only_video.get('width'),
2319 'height': the_only_video.get('height'),
2320 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2321 'fps': the_only_video.get('fps'),
2322 'dynamic_range': the_only_video.get('dynamic_range'),
2323 'vcodec': the_only_video.get('vcodec'),
2324 'vbr': the_only_video.get('vbr'),
2325 'stretched_ratio': the_only_video.get('stretched_ratio'),
2326 'aspect_ratio': the_only_video.get('aspect_ratio'),
2327 })
2328
2329 if the_only_audio:
2330 new_dict.update({
2331 'acodec': the_only_audio.get('acodec'),
2332 'abr': the_only_audio.get('abr'),
2333 'asr': the_only_audio.get('asr'),
2334 'audio_channels': the_only_audio.get('audio_channels')
2335 })
2336
2337 return new_dict
2338
2339 def _check_formats(formats):
2340 if self.params.get('check_formats') == 'selected':
2341 yield from self._check_formats(formats)
2342 return
2343 elif (self.params.get('check_formats') is not None
2344 or self.params.get('allow_unplayable_formats')):
2345 yield from formats
2346 return
2347
2348 for f in formats:
2349 if f.get('has_drm'):
2350 yield from self._check_formats([f])
2351 else:
2352 yield f
2353
2354 def _build_selector_function(selector):
2355 if isinstance(selector, list): # ,
2356 fs = [_build_selector_function(s) for s in selector]
2357
2358 def selector_function(ctx):
2359 for f in fs:
2360 yield from f(ctx)
2361 return selector_function
2362
2363 elif selector.type == GROUP: # ()
2364 selector_function = _build_selector_function(selector.selector)
2365
2366 elif selector.type == PICKFIRST: # /
2367 fs = [_build_selector_function(s) for s in selector.selector]
2368
2369 def selector_function(ctx):
2370 for f in fs:
2371 picked_formats = list(f(ctx))
2372 if picked_formats:
2373 return picked_formats
2374 return []
2375
2376 elif selector.type == MERGE: # +
2377 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2378
2379 def selector_function(ctx):
2380 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2381 yield _merge(pair)
2382
2383 elif selector.type == SINGLE: # atom
2384 format_spec = selector.selector or 'best'
2385
2386 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2387 if format_spec == 'all':
2388 def selector_function(ctx):
2389 yield from _check_formats(ctx['formats'][::-1])
2390 elif format_spec == 'mergeall':
2391 def selector_function(ctx):
2392 formats = list(_check_formats(
2393 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2394 if not formats:
2395 return
2396 merged_format = formats[-1]
2397 for f in formats[-2::-1]:
2398 merged_format = _merge((merged_format, f))
2399 yield merged_format
2400
2401 else:
2402 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2403 mobj = re.match(
2404 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2405 format_spec)
2406 if mobj is not None:
2407 format_idx = int_or_none(mobj.group('n'), default=1)
2408 format_reverse = mobj.group('bw')[0] == 'b'
2409 format_type = (mobj.group('type') or [None])[0]
2410 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2411 format_modified = mobj.group('mod') is not None
2412
2413 format_fallback = not format_type and not format_modified # for b, w
2414 _filter_f = (
2415 (lambda f: f.get('%scodec' % format_type) != 'none')
2416 if format_type and format_modified # bv*, ba*, wv*, wa*
2417 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2418 if format_type # bv, ba, wv, wa
2419 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2420 if not format_modified # b, w
2421 else lambda f: True) # b*, w*
2422 filter_f = lambda f: _filter_f(f) and (
2423 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2424 else:
2425 if format_spec in self._format_selection_exts['audio']:
2426 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2427 elif format_spec in self._format_selection_exts['video']:
2428 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2429 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2430 elif format_spec in self._format_selection_exts['storyboards']:
2431 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2432 else:
2433 filter_f = lambda f: f.get('format_id') == format_spec # id
2434
2435 def selector_function(ctx):
2436 formats = list(ctx['formats'])
2437 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2438 if not matches:
2439 if format_fallback and ctx['incomplete_formats']:
2440 # for extractors with incomplete formats (audio only (soundcloud)
2441 # or video only (imgur)) best/worst will fallback to
2442 # best/worst {video,audio}-only format
2443 matches = formats
2444 elif seperate_fallback and not ctx['has_merged_format']:
2445 # for compatibility with youtube-dl when there is no pre-merged format
2446 matches = list(filter(seperate_fallback, formats))
2447 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2448 try:
2449 yield matches[format_idx - 1]
2450 except LazyList.IndexError:
2451 return
2452
2453 filters = [self._build_format_filter(f) for f in selector.filters]
2454
2455 def final_selector(ctx):
2456 ctx_copy = dict(ctx)
2457 for _filter in filters:
2458 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2459 return selector_function(ctx_copy)
2460 return final_selector
2461
2462 stream = io.BytesIO(format_spec.encode())
2463 try:
2464 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2465 except tokenize.TokenError:
2466 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2467
2468 class TokenIterator:
2469 def __init__(self, tokens):
2470 self.tokens = tokens
2471 self.counter = 0
2472
2473 def __iter__(self):
2474 return self
2475
2476 def __next__(self):
2477 if self.counter >= len(self.tokens):
2478 raise StopIteration()
2479 value = self.tokens[self.counter]
2480 self.counter += 1
2481 return value
2482
2483 next = __next__
2484
2485 def restore_last_token(self):
2486 self.counter -= 1
2487
2488 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2489 return _build_selector_function(parsed_selector)
2490
2491 def _calc_headers(self, info_dict, load_cookies=False):
2492 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2493 clean_headers(res)
2494
2495 if load_cookies: # For --load-info-json
2496 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2497 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2498 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2499 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2500 res.pop('Cookie', None)
2501 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2502 if cookies:
2503 encoder = LenientSimpleCookie()
2504 values = []
2505 for cookie in cookies:
2506 _, value = encoder.value_encode(cookie.value)
2507 values.append(f'{cookie.name}={value}')
2508 if cookie.domain:
2509 values.append(f'Domain={cookie.domain}')
2510 if cookie.path:
2511 values.append(f'Path={cookie.path}')
2512 if cookie.secure:
2513 values.append('Secure')
2514 if cookie.expires:
2515 values.append(f'Expires={cookie.expires}')
2516 if cookie.version:
2517 values.append(f'Version={cookie.version}')
2518 info_dict['cookies'] = '; '.join(values)
2519
2520 if 'X-Forwarded-For' not in res:
2521 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2522 if x_forwarded_for_ip:
2523 res['X-Forwarded-For'] = x_forwarded_for_ip
2524
2525 return res
2526
2527 def _calc_cookies(self, url):
2528 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2529 return self.cookiejar.get_cookie_header(url)
2530
2531 def _sort_thumbnails(self, thumbnails):
2532 thumbnails.sort(key=lambda t: (
2533 t.get('preference') if t.get('preference') is not None else -1,
2534 t.get('width') if t.get('width') is not None else -1,
2535 t.get('height') if t.get('height') is not None else -1,
2536 t.get('id') if t.get('id') is not None else '',
2537 t.get('url')))
2538
2539 def _sanitize_thumbnails(self, info_dict):
2540 thumbnails = info_dict.get('thumbnails')
2541 if thumbnails is None:
2542 thumbnail = info_dict.get('thumbnail')
2543 if thumbnail:
2544 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2545 if not thumbnails:
2546 return
2547
2548 def check_thumbnails(thumbnails):
2549 for t in thumbnails:
2550 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2551 try:
2552 self.urlopen(HEADRequest(t['url']))
2553 except network_exceptions as err:
2554 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2555 continue
2556 yield t
2557
2558 self._sort_thumbnails(thumbnails)
2559 for i, t in enumerate(thumbnails):
2560 if t.get('id') is None:
2561 t['id'] = '%d' % i
2562 if t.get('width') and t.get('height'):
2563 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2564 t['url'] = sanitize_url(t['url'])
2565
2566 if self.params.get('check_formats') is True:
2567 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2568 else:
2569 info_dict['thumbnails'] = thumbnails
2570
2571 def _fill_common_fields(self, info_dict, final=True):
2572 # TODO: move sanitization here
2573 if final:
2574 title = info_dict['fulltitle'] = info_dict.get('title')
2575 if not title:
2576 if title == '':
2577 self.write_debug('Extractor gave empty title. Creating a generic title')
2578 else:
2579 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2580 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2581
2582 if info_dict.get('duration') is not None:
2583 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2584
2585 for ts_key, date_key in (
2586 ('timestamp', 'upload_date'),
2587 ('release_timestamp', 'release_date'),
2588 ('modified_timestamp', 'modified_date'),
2589 ):
2590 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2591 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2592 # see http://bugs.python.org/issue1646728)
2593 with contextlib.suppress(ValueError, OverflowError, OSError):
2594 upload_date = datetime.datetime.fromtimestamp(info_dict[ts_key], datetime.timezone.utc)
2595 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2596
2597 live_keys = ('is_live', 'was_live')
2598 live_status = info_dict.get('live_status')
2599 if live_status is None:
2600 for key in live_keys:
2601 if info_dict.get(key) is False:
2602 continue
2603 if info_dict.get(key):
2604 live_status = key
2605 break
2606 if all(info_dict.get(key) is False for key in live_keys):
2607 live_status = 'not_live'
2608 if live_status:
2609 info_dict['live_status'] = live_status
2610 for key in live_keys:
2611 if info_dict.get(key) is None:
2612 info_dict[key] = (live_status == key)
2613 if live_status == 'post_live':
2614 info_dict['was_live'] = True
2615
2616 # Auto generate title fields corresponding to the *_number fields when missing
2617 # in order to always have clean titles. This is very common for TV series.
2618 for field in ('chapter', 'season', 'episode'):
2619 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2620 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2621
2622 def _raise_pending_errors(self, info):
2623 err = info.pop('__pending_error', None)
2624 if err:
2625 self.report_error(err, tb=False)
2626
2627 def sort_formats(self, info_dict):
2628 formats = self._get_formats(info_dict)
2629 formats.sort(key=FormatSorter(
2630 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2631
2632 def process_video_result(self, info_dict, download=True):
2633 assert info_dict.get('_type', 'video') == 'video'
2634 self._num_videos += 1
2635
2636 if 'id' not in info_dict:
2637 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2638 elif not info_dict.get('id'):
2639 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2640
2641 def report_force_conversion(field, field_not, conversion):
2642 self.report_warning(
2643 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2644 % (field, field_not, conversion))
2645
2646 def sanitize_string_field(info, string_field):
2647 field = info.get(string_field)
2648 if field is None or isinstance(field, str):
2649 return
2650 report_force_conversion(string_field, 'a string', 'string')
2651 info[string_field] = str(field)
2652
2653 def sanitize_numeric_fields(info):
2654 for numeric_field in self._NUMERIC_FIELDS:
2655 field = info.get(numeric_field)
2656 if field is None or isinstance(field, (int, float)):
2657 continue
2658 report_force_conversion(numeric_field, 'numeric', 'int')
2659 info[numeric_field] = int_or_none(field)
2660
2661 sanitize_string_field(info_dict, 'id')
2662 sanitize_numeric_fields(info_dict)
2663 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2664 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2665 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2666 self.report_warning('"duration" field is negative, there is an error in extractor')
2667
2668 chapters = info_dict.get('chapters') or []
2669 if chapters and chapters[0].get('start_time'):
2670 chapters.insert(0, {'start_time': 0})
2671
2672 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2673 for idx, (prev, current, next_) in enumerate(zip(
2674 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2675 if current.get('start_time') is None:
2676 current['start_time'] = prev.get('end_time')
2677 if not current.get('end_time'):
2678 current['end_time'] = next_.get('start_time')
2679 if not current.get('title'):
2680 current['title'] = f'<Untitled Chapter {idx}>'
2681
2682 if 'playlist' not in info_dict:
2683 # It isn't part of a playlist
2684 info_dict['playlist'] = None
2685 info_dict['playlist_index'] = None
2686
2687 self._sanitize_thumbnails(info_dict)
2688
2689 thumbnail = info_dict.get('thumbnail')
2690 thumbnails = info_dict.get('thumbnails')
2691 if thumbnail:
2692 info_dict['thumbnail'] = sanitize_url(thumbnail)
2693 elif thumbnails:
2694 info_dict['thumbnail'] = thumbnails[-1]['url']
2695
2696 if info_dict.get('display_id') is None and 'id' in info_dict:
2697 info_dict['display_id'] = info_dict['id']
2698
2699 self._fill_common_fields(info_dict)
2700
2701 for cc_kind in ('subtitles', 'automatic_captions'):
2702 cc = info_dict.get(cc_kind)
2703 if cc:
2704 for _, subtitle in cc.items():
2705 for subtitle_format in subtitle:
2706 if subtitle_format.get('url'):
2707 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2708 if subtitle_format.get('ext') is None:
2709 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2710
2711 automatic_captions = info_dict.get('automatic_captions')
2712 subtitles = info_dict.get('subtitles')
2713
2714 info_dict['requested_subtitles'] = self.process_subtitles(
2715 info_dict['id'], subtitles, automatic_captions)
2716
2717 formats = self._get_formats(info_dict)
2718
2719 # Backward compatibility with InfoExtractor._sort_formats
2720 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2721 if field_preference:
2722 info_dict['_format_sort_fields'] = field_preference
2723
2724 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2725 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2726 if not self.params.get('allow_unplayable_formats'):
2727 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2728
2729 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2730 self.report_warning(
2731 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2732 'only images are available for download. Use --list-formats to see them'.capitalize())
2733
2734 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2735 if not get_from_start:
2736 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2737 if info_dict.get('is_live') and formats:
2738 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2739 if get_from_start and not formats:
2740 self.raise_no_formats(info_dict, msg=(
2741 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2742 'If you want to download from the current time, use --no-live-from-start'))
2743
2744 def is_wellformed(f):
2745 url = f.get('url')
2746 if not url:
2747 self.report_warning(
2748 '"url" field is missing or empty - skipping format, '
2749 'there is an error in extractor')
2750 return False
2751 if isinstance(url, bytes):
2752 sanitize_string_field(f, 'url')
2753 return True
2754
2755 # Filter out malformed formats for better extraction robustness
2756 formats = list(filter(is_wellformed, formats or []))
2757
2758 if not formats:
2759 self.raise_no_formats(info_dict)
2760
2761 for format in formats:
2762 sanitize_string_field(format, 'format_id')
2763 sanitize_numeric_fields(format)
2764 format['url'] = sanitize_url(format['url'])
2765 if format.get('ext') is None:
2766 format['ext'] = determine_ext(format['url']).lower()
2767 if format.get('protocol') is None:
2768 format['protocol'] = determine_protocol(format)
2769 if format.get('resolution') is None:
2770 format['resolution'] = self.format_resolution(format, default=None)
2771 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2772 format['dynamic_range'] = 'SDR'
2773 if format.get('aspect_ratio') is None:
2774 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2775 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2776 and info_dict.get('duration') and format.get('tbr')
2777 and not format.get('filesize') and not format.get('filesize_approx')):
2778 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2779 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2780
2781 # Safeguard against old/insecure infojson when using --load-info-json
2782 if info_dict.get('http_headers'):
2783 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2784 info_dict['http_headers'].pop('Cookie', None)
2785
2786 # This is copied to http_headers by the above _calc_headers and can now be removed
2787 if '__x_forwarded_for_ip' in info_dict:
2788 del info_dict['__x_forwarded_for_ip']
2789
2790 self.sort_formats({
2791 'formats': formats,
2792 '_format_sort_fields': info_dict.get('_format_sort_fields')
2793 })
2794
2795 # Sanitize and group by format_id
2796 formats_dict = {}
2797 for i, format in enumerate(formats):
2798 if not format.get('format_id'):
2799 format['format_id'] = str(i)
2800 else:
2801 # Sanitize format_id from characters used in format selector expression
2802 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2803 formats_dict.setdefault(format['format_id'], []).append(format)
2804
2805 # Make sure all formats have unique format_id
2806 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2807 for format_id, ambiguous_formats in formats_dict.items():
2808 ambigious_id = len(ambiguous_formats) > 1
2809 for i, format in enumerate(ambiguous_formats):
2810 if ambigious_id:
2811 format['format_id'] = '%s-%d' % (format_id, i)
2812 # Ensure there is no conflict between id and ext in format selection
2813 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2814 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2815 format['format_id'] = 'f%s' % format['format_id']
2816
2817 if format.get('format') is None:
2818 format['format'] = '{id} - {res}{note}'.format(
2819 id=format['format_id'],
2820 res=self.format_resolution(format),
2821 note=format_field(format, 'format_note', ' (%s)'),
2822 )
2823
2824 if self.params.get('check_formats') is True:
2825 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2826
2827 if not formats or formats[0] is not info_dict:
2828 # only set the 'formats' fields if the original info_dict list them
2829 # otherwise we end up with a circular reference, the first (and unique)
2830 # element in the 'formats' field in info_dict is info_dict itself,
2831 # which can't be exported to json
2832 info_dict['formats'] = formats
2833
2834 info_dict, _ = self.pre_process(info_dict)
2835
2836 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2837 return info_dict
2838
2839 self.post_extract(info_dict)
2840 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2841
2842 # The pre-processors may have modified the formats
2843 formats = self._get_formats(info_dict)
2844
2845 list_only = self.params.get('simulate') == 'list_only'
2846 interactive_format_selection = not list_only and self.format_selector == '-'
2847 if self.params.get('list_thumbnails'):
2848 self.list_thumbnails(info_dict)
2849 if self.params.get('listsubtitles'):
2850 if 'automatic_captions' in info_dict:
2851 self.list_subtitles(
2852 info_dict['id'], automatic_captions, 'automatic captions')
2853 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2854 if self.params.get('listformats') or interactive_format_selection:
2855 self.list_formats(info_dict)
2856 if list_only:
2857 # Without this printing, -F --print-json will not work
2858 self.__forced_printings(info_dict)
2859 return info_dict
2860
2861 format_selector = self.format_selector
2862 while True:
2863 if interactive_format_selection:
2864 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2865 + '(Press ENTER for default, or Ctrl+C to quit)'
2866 + self._format_screen(': ', self.Styles.EMPHASIS))
2867 try:
2868 format_selector = self.build_format_selector(req_format) if req_format else None
2869 except SyntaxError as err:
2870 self.report_error(err, tb=False, is_error=False)
2871 continue
2872
2873 if format_selector is None:
2874 req_format = self._default_format_spec(info_dict, download=download)
2875 self.write_debug(f'Default format spec: {req_format}')
2876 format_selector = self.build_format_selector(req_format)
2877
2878 formats_to_download = list(format_selector({
2879 'formats': formats,
2880 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2881 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2882 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2883 }))
2884 if interactive_format_selection and not formats_to_download:
2885 self.report_error('Requested format is not available', tb=False, is_error=False)
2886 continue
2887 break
2888
2889 if not formats_to_download:
2890 if not self.params.get('ignore_no_formats_error'):
2891 raise ExtractorError(
2892 'Requested format is not available. Use --list-formats for a list of available formats',
2893 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2894 self.report_warning('Requested format is not available')
2895 # Process what we can, even without any available formats.
2896 formats_to_download = [{}]
2897
2898 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2899 best_format, downloaded_formats = formats_to_download[-1], []
2900 if download:
2901 if best_format and requested_ranges:
2902 def to_screen(*msg):
2903 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2904
2905 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2906 (f['format_id'] for f in formats_to_download))
2907 if requested_ranges != ({}, ):
2908 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2909 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2910 max_downloads_reached = False
2911
2912 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2913 new_info = self._copy_infodict(info_dict)
2914 new_info.update(fmt)
2915 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2916 end_time = offset + min(chapter.get('end_time', duration), duration)
2917 # duration may not be accurate. So allow deviations <1sec
2918 if end_time == float('inf') or end_time > offset + duration + 1:
2919 end_time = None
2920 if chapter or offset:
2921 new_info.update({
2922 'section_start': offset + chapter.get('start_time', 0),
2923 'section_end': end_time,
2924 'section_title': chapter.get('title'),
2925 'section_number': chapter.get('index'),
2926 })
2927 downloaded_formats.append(new_info)
2928 try:
2929 self.process_info(new_info)
2930 except MaxDownloadsReached:
2931 max_downloads_reached = True
2932 self._raise_pending_errors(new_info)
2933 # Remove copied info
2934 for key, val in tuple(new_info.items()):
2935 if info_dict.get(key) == val:
2936 new_info.pop(key)
2937 if max_downloads_reached:
2938 break
2939
2940 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2941 assert write_archive.issubset({True, False, 'ignore'})
2942 if True in write_archive and False not in write_archive:
2943 self.record_download_archive(info_dict)
2944
2945 info_dict['requested_downloads'] = downloaded_formats
2946 info_dict = self.run_all_pps('after_video', info_dict)
2947 if max_downloads_reached:
2948 raise MaxDownloadsReached()
2949
2950 # We update the info dict with the selected best quality format (backwards compatibility)
2951 info_dict.update(best_format)
2952 return info_dict
2953
2954 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2955 """Select the requested subtitles and their format"""
2956 available_subs, normal_sub_langs = {}, []
2957 if normal_subtitles and self.params.get('writesubtitles'):
2958 available_subs.update(normal_subtitles)
2959 normal_sub_langs = tuple(normal_subtitles.keys())
2960 if automatic_captions and self.params.get('writeautomaticsub'):
2961 for lang, cap_info in automatic_captions.items():
2962 if lang not in available_subs:
2963 available_subs[lang] = cap_info
2964
2965 if not available_subs or (
2966 not self.params.get('writesubtitles')
2967 and not self.params.get('writeautomaticsub')):
2968 return None
2969
2970 all_sub_langs = tuple(available_subs.keys())
2971 if self.params.get('allsubtitles', False):
2972 requested_langs = all_sub_langs
2973 elif self.params.get('subtitleslangs', False):
2974 try:
2975 requested_langs = orderedSet_from_options(
2976 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2977 except re.error as e:
2978 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2979 else:
2980 requested_langs = LazyList(itertools.chain(
2981 ['en'] if 'en' in normal_sub_langs else [],
2982 filter(lambda f: f.startswith('en'), normal_sub_langs),
2983 ['en'] if 'en' in all_sub_langs else [],
2984 filter(lambda f: f.startswith('en'), all_sub_langs),
2985 normal_sub_langs, all_sub_langs,
2986 ))[:1]
2987 if requested_langs:
2988 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2989
2990 formats_query = self.params.get('subtitlesformat', 'best')
2991 formats_preference = formats_query.split('/') if formats_query else []
2992 subs = {}
2993 for lang in requested_langs:
2994 formats = available_subs.get(lang)
2995 if formats is None:
2996 self.report_warning(f'{lang} subtitles not available for {video_id}')
2997 continue
2998 for ext in formats_preference:
2999 if ext == 'best':
3000 f = formats[-1]
3001 break
3002 matches = list(filter(lambda f: f['ext'] == ext, formats))
3003 if matches:
3004 f = matches[-1]
3005 break
3006 else:
3007 f = formats[-1]
3008 self.report_warning(
3009 'No subtitle format found matching "%s" for language %s, '
3010 'using %s' % (formats_query, lang, f['ext']))
3011 subs[lang] = f
3012 return subs
3013
3014 def _forceprint(self, key, info_dict):
3015 if info_dict is None:
3016 return
3017 info_copy = info_dict.copy()
3018 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3019 if info_dict.get('requested_formats') is not None:
3020 # For RTMP URLs, also include the playpath
3021 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3022 elif info_dict.get('url'):
3023 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3024 info_copy['formats_table'] = self.render_formats_table(info_dict)
3025 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3026 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3027 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3028
3029 def format_tmpl(tmpl):
3030 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3031 if not mobj:
3032 return tmpl
3033
3034 fmt = '%({})s'
3035 if tmpl.startswith('{'):
3036 tmpl, fmt = f'.{tmpl}', '%({})j'
3037 if tmpl.endswith('='):
3038 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3039 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3040
3041 for tmpl in self.params['forceprint'].get(key, []):
3042 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3043
3044 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3045 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3046 tmpl = format_tmpl(tmpl)
3047 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3048 if self._ensure_dir_exists(filename):
3049 with open(filename, 'a', encoding='utf-8', newline='') as f:
3050 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3051
3052 return info_copy
3053
3054 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3055 if (self.params.get('forcejson')
3056 or self.params['forceprint'].get('video')
3057 or self.params['print_to_file'].get('video')):
3058 self.post_extract(info_dict)
3059 if filename:
3060 info_dict['filename'] = filename
3061 info_copy = self._forceprint('video', info_dict)
3062
3063 def print_field(field, actual_field=None, optional=False):
3064 if actual_field is None:
3065 actual_field = field
3066 if self.params.get(f'force{field}') and (
3067 info_copy.get(field) is not None or (not optional and not incomplete)):
3068 self.to_stdout(info_copy[actual_field])
3069
3070 print_field('title')
3071 print_field('id')
3072 print_field('url', 'urls')
3073 print_field('thumbnail', optional=True)
3074 print_field('description', optional=True)
3075 print_field('filename')
3076 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3077 self.to_stdout(formatSeconds(info_copy['duration']))
3078 print_field('format')
3079
3080 if self.params.get('forcejson'):
3081 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3082
3083 def dl(self, name, info, subtitle=False, test=False):
3084 if not info.get('url'):
3085 self.raise_no_formats(info, True)
3086
3087 if test:
3088 verbose = self.params.get('verbose')
3089 params = {
3090 'test': True,
3091 'quiet': self.params.get('quiet') or not verbose,
3092 'verbose': verbose,
3093 'noprogress': not verbose,
3094 'nopart': True,
3095 'skip_unavailable_fragments': False,
3096 'keep_fragments': False,
3097 'overwrites': True,
3098 '_no_ytdl_file': True,
3099 }
3100 else:
3101 params = self.params
3102 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3103 if not test:
3104 for ph in self._progress_hooks:
3105 fd.add_progress_hook(ph)
3106 urls = '", "'.join(
3107 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3108 for f in info.get('requested_formats', []) or [info])
3109 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3110
3111 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3112 # But it may contain objects that are not deep-copyable
3113 new_info = self._copy_infodict(info)
3114 if new_info.get('http_headers') is None:
3115 new_info['http_headers'] = self._calc_headers(new_info)
3116 return fd.download(name, new_info, subtitle)
3117
3118 def existing_file(self, filepaths, *, default_overwrite=True):
3119 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3120 if existing_files and not self.params.get('overwrites', default_overwrite):
3121 return existing_files[0]
3122
3123 for file in existing_files:
3124 self.report_file_delete(file)
3125 os.remove(file)
3126 return None
3127
3128 def process_info(self, info_dict):
3129 """Process a single resolved IE result. (Modifies it in-place)"""
3130
3131 assert info_dict.get('_type', 'video') == 'video'
3132 original_infodict = info_dict
3133
3134 if 'format' not in info_dict and 'ext' in info_dict:
3135 info_dict['format'] = info_dict['ext']
3136
3137 if self._match_entry(info_dict) is not None:
3138 info_dict['__write_download_archive'] = 'ignore'
3139 return
3140
3141 # Does nothing under normal operation - for backward compatibility of process_info
3142 self.post_extract(info_dict)
3143
3144 def replace_info_dict(new_info):
3145 nonlocal info_dict
3146 if new_info == info_dict:
3147 return
3148 info_dict.clear()
3149 info_dict.update(new_info)
3150
3151 new_info, _ = self.pre_process(info_dict, 'video')
3152 replace_info_dict(new_info)
3153 self._num_downloads += 1
3154
3155 # info_dict['_filename'] needs to be set for backward compatibility
3156 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3157 temp_filename = self.prepare_filename(info_dict, 'temp')
3158 files_to_move = {}
3159
3160 # Forced printings
3161 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3162
3163 def check_max_downloads():
3164 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3165 raise MaxDownloadsReached()
3166
3167 if self.params.get('simulate'):
3168 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3169 check_max_downloads()
3170 return
3171
3172 if full_filename is None:
3173 return
3174 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3175 return
3176 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3177 return
3178
3179 if self._write_description('video', info_dict,
3180 self.prepare_filename(info_dict, 'description')) is None:
3181 return
3182
3183 sub_files = self._write_subtitles(info_dict, temp_filename)
3184 if sub_files is None:
3185 return
3186 files_to_move.update(dict(sub_files))
3187
3188 thumb_files = self._write_thumbnails(
3189 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3190 if thumb_files is None:
3191 return
3192 files_to_move.update(dict(thumb_files))
3193
3194 infofn = self.prepare_filename(info_dict, 'infojson')
3195 _infojson_written = self._write_info_json('video', info_dict, infofn)
3196 if _infojson_written:
3197 info_dict['infojson_filename'] = infofn
3198 # For backward compatibility, even though it was a private field
3199 info_dict['__infojson_filename'] = infofn
3200 elif _infojson_written is None:
3201 return
3202
3203 # Note: Annotations are deprecated
3204 annofn = None
3205 if self.params.get('writeannotations', False):
3206 annofn = self.prepare_filename(info_dict, 'annotation')
3207 if annofn:
3208 if not self._ensure_dir_exists(encodeFilename(annofn)):
3209 return
3210 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3211 self.to_screen('[info] Video annotations are already present')
3212 elif not info_dict.get('annotations'):
3213 self.report_warning('There are no annotations to write.')
3214 else:
3215 try:
3216 self.to_screen('[info] Writing video annotations to: ' + annofn)
3217 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3218 annofile.write(info_dict['annotations'])
3219 except (KeyError, TypeError):
3220 self.report_warning('There are no annotations to write.')
3221 except OSError:
3222 self.report_error('Cannot write annotations file: ' + annofn)
3223 return
3224
3225 # Write internet shortcut files
3226 def _write_link_file(link_type):
3227 url = try_get(info_dict['webpage_url'], iri_to_uri)
3228 if not url:
3229 self.report_warning(
3230 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3231 return True
3232 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3233 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3234 return False
3235 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3236 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3237 return True
3238 try:
3239 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3240 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3241 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3242 template_vars = {'url': url}
3243 if link_type == 'desktop':
3244 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3245 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3246 except OSError:
3247 self.report_error(f'Cannot write internet shortcut {linkfn}')
3248 return False
3249 return True
3250
3251 write_links = {
3252 'url': self.params.get('writeurllink'),
3253 'webloc': self.params.get('writewebloclink'),
3254 'desktop': self.params.get('writedesktoplink'),
3255 }
3256 if self.params.get('writelink'):
3257 link_type = ('webloc' if sys.platform == 'darwin'
3258 else 'desktop' if sys.platform.startswith('linux')
3259 else 'url')
3260 write_links[link_type] = True
3261
3262 if any(should_write and not _write_link_file(link_type)
3263 for link_type, should_write in write_links.items()):
3264 return
3265
3266 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3267 replace_info_dict(new_info)
3268
3269 if self.params.get('skip_download'):
3270 info_dict['filepath'] = temp_filename
3271 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3272 info_dict['__files_to_move'] = files_to_move
3273 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3274 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3275 else:
3276 # Download
3277 info_dict.setdefault('__postprocessors', [])
3278 try:
3279
3280 def existing_video_file(*filepaths):
3281 ext = info_dict.get('ext')
3282 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3283 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3284 default_overwrite=False)
3285 if file:
3286 info_dict['ext'] = os.path.splitext(file)[1][1:]
3287 return file
3288
3289 fd, success = None, True
3290 if info_dict.get('protocol') or info_dict.get('url'):
3291 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3292 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3293 info_dict.get('section_start') or info_dict.get('section_end')):
3294 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3295 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3296 self.report_error(f'{msg}. Aborting')
3297 return
3298
3299 if info_dict.get('requested_formats') is not None:
3300 old_ext = info_dict['ext']
3301 if self.params.get('merge_output_format') is None:
3302 if (info_dict['ext'] == 'webm'
3303 and info_dict.get('thumbnails')
3304 # check with type instead of pp_key, __name__, or isinstance
3305 # since we dont want any custom PPs to trigger this
3306 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3307 info_dict['ext'] = 'mkv'
3308 self.report_warning(
3309 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3310 new_ext = info_dict['ext']
3311
3312 def correct_ext(filename, ext=new_ext):
3313 if filename == '-':
3314 return filename
3315 filename_real_ext = os.path.splitext(filename)[1][1:]
3316 filename_wo_ext = (
3317 os.path.splitext(filename)[0]
3318 if filename_real_ext in (old_ext, new_ext)
3319 else filename)
3320 return f'{filename_wo_ext}.{ext}'
3321
3322 # Ensure filename always has a correct extension for successful merge
3323 full_filename = correct_ext(full_filename)
3324 temp_filename = correct_ext(temp_filename)
3325 dl_filename = existing_video_file(full_filename, temp_filename)
3326
3327 info_dict['__real_download'] = False
3328 # NOTE: Copy so that original format dicts are not modified
3329 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3330
3331 merger = FFmpegMergerPP(self)
3332 downloaded = []
3333 if dl_filename is not None:
3334 self.report_file_already_downloaded(dl_filename)
3335 elif fd:
3336 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3337 f['filepath'] = fname = prepend_extension(
3338 correct_ext(temp_filename, info_dict['ext']),
3339 'f%s' % f['format_id'], info_dict['ext'])
3340 downloaded.append(fname)
3341 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3342 success, real_download = self.dl(temp_filename, info_dict)
3343 info_dict['__real_download'] = real_download
3344 else:
3345 if self.params.get('allow_unplayable_formats'):
3346 self.report_warning(
3347 'You have requested merging of multiple formats '
3348 'while also allowing unplayable formats to be downloaded. '
3349 'The formats won\'t be merged to prevent data corruption.')
3350 elif not merger.available:
3351 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3352 if not self.params.get('ignoreerrors'):
3353 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3354 return
3355 self.report_warning(f'{msg}. The formats won\'t be merged')
3356
3357 if temp_filename == '-':
3358 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3359 else 'but the formats are incompatible for simultaneous download' if merger.available
3360 else 'but ffmpeg is not installed')
3361 self.report_warning(
3362 f'You have requested downloading multiple formats to stdout {reason}. '
3363 'The formats will be streamed one after the other')
3364 fname = temp_filename
3365 for f in info_dict['requested_formats']:
3366 new_info = dict(info_dict)
3367 del new_info['requested_formats']
3368 new_info.update(f)
3369 if temp_filename != '-':
3370 fname = prepend_extension(
3371 correct_ext(temp_filename, new_info['ext']),
3372 'f%s' % f['format_id'], new_info['ext'])
3373 if not self._ensure_dir_exists(fname):
3374 return
3375 f['filepath'] = fname
3376 downloaded.append(fname)
3377 partial_success, real_download = self.dl(fname, new_info)
3378 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3379 success = success and partial_success
3380
3381 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3382 info_dict['__postprocessors'].append(merger)
3383 info_dict['__files_to_merge'] = downloaded
3384 # Even if there were no downloads, it is being merged only now
3385 info_dict['__real_download'] = True
3386 else:
3387 for file in downloaded:
3388 files_to_move[file] = None
3389 else:
3390 # Just a single file
3391 dl_filename = existing_video_file(full_filename, temp_filename)
3392 if dl_filename is None or dl_filename == temp_filename:
3393 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3394 # So we should try to resume the download
3395 success, real_download = self.dl(temp_filename, info_dict)
3396 info_dict['__real_download'] = real_download
3397 else:
3398 self.report_file_already_downloaded(dl_filename)
3399
3400 dl_filename = dl_filename or temp_filename
3401 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3402
3403 except network_exceptions as err:
3404 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3405 return
3406 except OSError as err:
3407 raise UnavailableVideoError(err)
3408 except (ContentTooShortError, ) as err:
3409 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3410 return
3411
3412 self._raise_pending_errors(info_dict)
3413 if success and full_filename != '-':
3414
3415 def fixup():
3416 do_fixup = True
3417 fixup_policy = self.params.get('fixup')
3418 vid = info_dict['id']
3419
3420 if fixup_policy in ('ignore', 'never'):
3421 return
3422 elif fixup_policy == 'warn':
3423 do_fixup = 'warn'
3424 elif fixup_policy != 'force':
3425 assert fixup_policy in ('detect_or_warn', None)
3426 if not info_dict.get('__real_download'):
3427 do_fixup = False
3428
3429 def ffmpeg_fixup(cndn, msg, cls):
3430 if not (do_fixup and cndn):
3431 return
3432 elif do_fixup == 'warn':
3433 self.report_warning(f'{vid}: {msg}')
3434 return
3435 pp = cls(self)
3436 if pp.available:
3437 info_dict['__postprocessors'].append(pp)
3438 else:
3439 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3440
3441 stretched_ratio = info_dict.get('stretched_ratio')
3442 ffmpeg_fixup(stretched_ratio not in (1, None),
3443 f'Non-uniform pixel ratio {stretched_ratio}',
3444 FFmpegFixupStretchedPP)
3445
3446 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3447 downloader = downloader.FD_NAME if downloader else None
3448
3449 ext = info_dict.get('ext')
3450 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3451 isinstance(pp, FFmpegVideoConvertorPP)
3452 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3453 ) for pp in self._pps['post_process'])
3454
3455 if not postprocessed_by_ffmpeg:
3456 ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a'
3457 and info_dict.get('container') == 'm4a_dash',
3458 'writing DASH m4a. Only some players support this container',
3459 FFmpegFixupM4aPP)
3460 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3461 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3462 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3463 FFmpegFixupM3u8PP)
3464 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3465 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3466
3467 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3468 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3469
3470 fixup()
3471 try:
3472 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3473 except PostProcessingError as err:
3474 self.report_error('Postprocessing: %s' % str(err))
3475 return
3476 try:
3477 for ph in self._post_hooks:
3478 ph(info_dict['filepath'])
3479 except Exception as err:
3480 self.report_error('post hooks: %s' % str(err))
3481 return
3482 info_dict['__write_download_archive'] = True
3483
3484 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3485 if self.params.get('force_write_download_archive'):
3486 info_dict['__write_download_archive'] = True
3487 check_max_downloads()
3488
3489 def __download_wrapper(self, func):
3490 @functools.wraps(func)
3491 def wrapper(*args, **kwargs):
3492 try:
3493 res = func(*args, **kwargs)
3494 except UnavailableVideoError as e:
3495 self.report_error(e)
3496 except DownloadCancelled as e:
3497 self.to_screen(f'[info] {e}')
3498 if not self.params.get('break_per_url'):
3499 raise
3500 self._num_downloads = 0
3501 else:
3502 if self.params.get('dump_single_json', False):
3503 self.post_extract(res)
3504 self.to_stdout(json.dumps(self.sanitize_info(res)))
3505 return wrapper
3506
3507 def download(self, url_list):
3508 """Download a given list of URLs."""
3509 url_list = variadic(url_list) # Passing a single URL is a common mistake
3510 outtmpl = self.params['outtmpl']['default']
3511 if (len(url_list) > 1
3512 and outtmpl != '-'
3513 and '%' not in outtmpl
3514 and self.params.get('max_downloads') != 1):
3515 raise SameFileError(outtmpl)
3516
3517 for url in url_list:
3518 self.__download_wrapper(self.extract_info)(
3519 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3520
3521 return self._download_retcode
3522
3523 def download_with_info_file(self, info_filename):
3524 with contextlib.closing(fileinput.FileInput(
3525 [info_filename], mode='r',
3526 openhook=fileinput.hook_encoded('utf-8'))) as f:
3527 # FileInput doesn't have a read method, we can't call json.load
3528 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3529 for info in variadic(json.loads('\n'.join(f)))]
3530 for info in infos:
3531 try:
3532 self.__download_wrapper(self.process_ie_result)(info, download=True)
3533 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3534 if not isinstance(e, EntryNotInPlaylist):
3535 self.to_stderr('\r')
3536 webpage_url = info.get('webpage_url')
3537 if webpage_url is None:
3538 raise
3539 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3540 self.download([webpage_url])
3541 return self._download_retcode
3542
3543 @staticmethod
3544 def sanitize_info(info_dict, remove_private_keys=False):
3545 ''' Sanitize the infodict for converting to json '''
3546 if info_dict is None:
3547 return info_dict
3548 info_dict.setdefault('epoch', int(time.time()))
3549 info_dict.setdefault('_type', 'video')
3550 info_dict.setdefault('_version', {
3551 'version': __version__,
3552 'current_git_head': current_git_head(),
3553 'release_git_head': RELEASE_GIT_HEAD,
3554 'repository': REPOSITORY,
3555 })
3556
3557 if remove_private_keys:
3558 reject = lambda k, v: v is None or k.startswith('__') or k in {
3559 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3560 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3561 'playlist_autonumber', '_format_sort_fields',
3562 }
3563 else:
3564 reject = lambda k, v: False
3565
3566 def filter_fn(obj):
3567 if isinstance(obj, dict):
3568 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3569 elif isinstance(obj, (list, tuple, set, LazyList)):
3570 return list(map(filter_fn, obj))
3571 elif obj is None or isinstance(obj, (str, int, float, bool)):
3572 return obj
3573 else:
3574 return repr(obj)
3575
3576 return filter_fn(info_dict)
3577
3578 @staticmethod
3579 def filter_requested_info(info_dict, actually_filter=True):
3580 ''' Alias of sanitize_info for backward compatibility '''
3581 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3582
3583 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3584 for filename in set(filter(None, files_to_delete)):
3585 if msg:
3586 self.to_screen(msg % filename)
3587 try:
3588 os.remove(filename)
3589 except OSError:
3590 self.report_warning(f'Unable to delete file {filename}')
3591 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3592 del info['__files_to_move'][filename]
3593
3594 @staticmethod
3595 def post_extract(info_dict):
3596 def actual_post_extract(info_dict):
3597 if info_dict.get('_type') in ('playlist', 'multi_video'):
3598 for video_dict in info_dict.get('entries', {}):
3599 actual_post_extract(video_dict or {})
3600 return
3601
3602 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3603 info_dict.update(post_extractor())
3604
3605 actual_post_extract(info_dict or {})
3606
3607 def run_pp(self, pp, infodict):
3608 files_to_delete = []
3609 if '__files_to_move' not in infodict:
3610 infodict['__files_to_move'] = {}
3611 try:
3612 files_to_delete, infodict = pp.run(infodict)
3613 except PostProcessingError as e:
3614 # Must be True and not 'only_download'
3615 if self.params.get('ignoreerrors') is True:
3616 self.report_error(e)
3617 return infodict
3618 raise
3619
3620 if not files_to_delete:
3621 return infodict
3622 if self.params.get('keepvideo', False):
3623 for f in files_to_delete:
3624 infodict['__files_to_move'].setdefault(f, '')
3625 else:
3626 self._delete_downloaded_files(
3627 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3628 return infodict
3629
3630 def run_all_pps(self, key, info, *, additional_pps=None):
3631 if key != 'video':
3632 self._forceprint(key, info)
3633 for pp in (additional_pps or []) + self._pps[key]:
3634 info = self.run_pp(pp, info)
3635 return info
3636
3637 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3638 info = dict(ie_info)
3639 info['__files_to_move'] = files_to_move or {}
3640 try:
3641 info = self.run_all_pps(key, info)
3642 except PostProcessingError as err:
3643 msg = f'Preprocessing: {err}'
3644 info.setdefault('__pending_error', msg)
3645 self.report_error(msg, is_error=False)
3646 return info, info.pop('__files_to_move', None)
3647
3648 def post_process(self, filename, info, files_to_move=None):
3649 """Run all the postprocessors on the given file."""
3650 info['filepath'] = filename
3651 info['__files_to_move'] = files_to_move or {}
3652 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3653 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3654 del info['__files_to_move']
3655 return self.run_all_pps('after_move', info)
3656
3657 def _make_archive_id(self, info_dict):
3658 video_id = info_dict.get('id')
3659 if not video_id:
3660 return
3661 # Future-proof against any change in case
3662 # and backwards compatibility with prior versions
3663 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3664 if extractor is None:
3665 url = str_or_none(info_dict.get('url'))
3666 if not url:
3667 return
3668 # Try to find matching extractor for the URL and take its ie_key
3669 for ie_key, ie in self._ies.items():
3670 if ie.suitable(url):
3671 extractor = ie_key
3672 break
3673 else:
3674 return
3675 return make_archive_id(extractor, video_id)
3676
3677 def in_download_archive(self, info_dict):
3678 if not self.archive:
3679 return False
3680
3681 vid_ids = [self._make_archive_id(info_dict)]
3682 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3683 return any(id_ in self.archive for id_ in vid_ids)
3684
3685 def record_download_archive(self, info_dict):
3686 fn = self.params.get('download_archive')
3687 if fn is None:
3688 return
3689 vid_id = self._make_archive_id(info_dict)
3690 assert vid_id
3691
3692 self.write_debug(f'Adding to archive: {vid_id}')
3693 if is_path_like(fn):
3694 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3695 archive_file.write(vid_id + '\n')
3696 self.archive.add(vid_id)
3697
3698 @staticmethod
3699 def format_resolution(format, default='unknown'):
3700 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3701 return 'audio only'
3702 if format.get('resolution') is not None:
3703 return format['resolution']
3704 if format.get('width') and format.get('height'):
3705 return '%dx%d' % (format['width'], format['height'])
3706 elif format.get('height'):
3707 return '%sp' % format['height']
3708 elif format.get('width'):
3709 return '%dx?' % format['width']
3710 return default
3711
3712 def _list_format_headers(self, *headers):
3713 if self.params.get('listformats_table', True) is not False:
3714 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3715 return headers
3716
3717 def _format_note(self, fdict):
3718 res = ''
3719 if fdict.get('ext') in ['f4f', 'f4m']:
3720 res += '(unsupported)'
3721 if fdict.get('language'):
3722 if res:
3723 res += ' '
3724 res += '[%s]' % fdict['language']
3725 if fdict.get('format_note') is not None:
3726 if res:
3727 res += ' '
3728 res += fdict['format_note']
3729 if fdict.get('tbr') is not None:
3730 if res:
3731 res += ', '
3732 res += '%4dk' % fdict['tbr']
3733 if fdict.get('container') is not None:
3734 if res:
3735 res += ', '
3736 res += '%s container' % fdict['container']
3737 if (fdict.get('vcodec') is not None
3738 and fdict.get('vcodec') != 'none'):
3739 if res:
3740 res += ', '
3741 res += fdict['vcodec']
3742 if fdict.get('vbr') is not None:
3743 res += '@'
3744 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3745 res += 'video@'
3746 if fdict.get('vbr') is not None:
3747 res += '%4dk' % fdict['vbr']
3748 if fdict.get('fps') is not None:
3749 if res:
3750 res += ', '
3751 res += '%sfps' % fdict['fps']
3752 if fdict.get('acodec') is not None:
3753 if res:
3754 res += ', '
3755 if fdict['acodec'] == 'none':
3756 res += 'video only'
3757 else:
3758 res += '%-5s' % fdict['acodec']
3759 elif fdict.get('abr') is not None:
3760 if res:
3761 res += ', '
3762 res += 'audio'
3763 if fdict.get('abr') is not None:
3764 res += '@%3dk' % fdict['abr']
3765 if fdict.get('asr') is not None:
3766 res += ' (%5dHz)' % fdict['asr']
3767 if fdict.get('filesize') is not None:
3768 if res:
3769 res += ', '
3770 res += format_bytes(fdict['filesize'])
3771 elif fdict.get('filesize_approx') is not None:
3772 if res:
3773 res += ', '
3774 res += '~' + format_bytes(fdict['filesize_approx'])
3775 return res
3776
3777 def _get_formats(self, info_dict):
3778 if info_dict.get('formats') is None:
3779 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3780 return [info_dict]
3781 return []
3782 return info_dict['formats']
3783
3784 def render_formats_table(self, info_dict):
3785 formats = self._get_formats(info_dict)
3786 if not formats:
3787 return
3788 if not self.params.get('listformats_table', True) is not False:
3789 table = [
3790 [
3791 format_field(f, 'format_id'),
3792 format_field(f, 'ext'),
3793 self.format_resolution(f),
3794 self._format_note(f)
3795 ] for f in formats if (f.get('preference') or 0) >= -1000]
3796 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3797
3798 def simplified_codec(f, field):
3799 assert field in ('acodec', 'vcodec')
3800 codec = f.get(field)
3801 if not codec:
3802 return 'unknown'
3803 elif codec != 'none':
3804 return '.'.join(codec.split('.')[:4])
3805
3806 if field == 'vcodec' and f.get('acodec') == 'none':
3807 return 'images'
3808 elif field == 'acodec' and f.get('vcodec') == 'none':
3809 return ''
3810 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3811 self.Styles.SUPPRESS)
3812
3813 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3814 table = [
3815 [
3816 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3817 format_field(f, 'ext'),
3818 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3819 format_field(f, 'fps', '\t%d', func=round),
3820 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3821 format_field(f, 'audio_channels', '\t%s'),
3822 delim, (
3823 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3824 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3825 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3826 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3827 format_field(f, 'tbr', '\t%dk', func=round),
3828 shorten_protocol_name(f.get('protocol', '')),
3829 delim,
3830 simplified_codec(f, 'vcodec'),
3831 format_field(f, 'vbr', '\t%dk', func=round),
3832 simplified_codec(f, 'acodec'),
3833 format_field(f, 'abr', '\t%dk', func=round),
3834 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3835 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3836 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3837 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3838 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3839 format_field(f, 'format_note'),
3840 format_field(f, 'container', ignore=(None, f.get('ext'))),
3841 delim=', '), delim=' '),
3842 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3843 header_line = self._list_format_headers(
3844 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3845 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3846
3847 return render_table(
3848 header_line, table, hide_empty=True,
3849 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3850
3851 def render_thumbnails_table(self, info_dict):
3852 thumbnails = list(info_dict.get('thumbnails') or [])
3853 if not thumbnails:
3854 return None
3855 return render_table(
3856 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3857 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3858
3859 def render_subtitles_table(self, video_id, subtitles):
3860 def _row(lang, formats):
3861 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3862 if len(set(names)) == 1:
3863 names = [] if names[0] == 'unknown' else names[:1]
3864 return [lang, ', '.join(names), ', '.join(exts)]
3865
3866 if not subtitles:
3867 return None
3868 return render_table(
3869 self._list_format_headers('Language', 'Name', 'Formats'),
3870 [_row(lang, formats) for lang, formats in subtitles.items()],
3871 hide_empty=True)
3872
3873 def __list_table(self, video_id, name, func, *args):
3874 table = func(*args)
3875 if not table:
3876 self.to_screen(f'{video_id} has no {name}')
3877 return
3878 self.to_screen(f'[info] Available {name} for {video_id}:')
3879 self.to_stdout(table)
3880
3881 def list_formats(self, info_dict):
3882 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3883
3884 def list_thumbnails(self, info_dict):
3885 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3886
3887 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3888 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3889
3890 def print_debug_header(self):
3891 if not self.params.get('verbose'):
3892 return
3893
3894 from . import _IN_CLI # Must be delayed import
3895
3896 # These imports can be slow. So import them only as needed
3897 from .extractor.extractors import _LAZY_LOADER
3898 from .extractor.extractors import (
3899 _PLUGIN_CLASSES as plugin_ies,
3900 _PLUGIN_OVERRIDES as plugin_ie_overrides
3901 )
3902
3903 def get_encoding(stream):
3904 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3905 additional_info = []
3906 if os.environ.get('TERM', '').lower() == 'dumb':
3907 additional_info.append('dumb')
3908 if not supports_terminal_sequences(stream):
3909 from .utils import WINDOWS_VT_MODE # Must be imported locally
3910 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3911 if additional_info:
3912 ret = f'{ret} ({",".join(additional_info)})'
3913 return ret
3914
3915 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3916 locale.getpreferredencoding(),
3917 sys.getfilesystemencoding(),
3918 self.get_encoding(),
3919 ', '.join(
3920 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3921 if stream is not None and key != 'console')
3922 )
3923
3924 logger = self.params.get('logger')
3925 if logger:
3926 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3927 write_debug(encoding_str)
3928 else:
3929 write_string(f'[debug] {encoding_str}\n', encoding=None)
3930 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3931
3932 source = detect_variant()
3933 if VARIANT not in (None, 'pip'):
3934 source += '*'
3935 klass = type(self)
3936 write_debug(join_nonempty(
3937 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3938 f'{CHANNEL}@{__version__}',
3939 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3940 '' if source == 'unknown' else f'({source})',
3941 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3942 delim=' '))
3943
3944 if not _IN_CLI:
3945 write_debug(f'params: {self.params}')
3946
3947 if not _LAZY_LOADER:
3948 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3949 write_debug('Lazy loading extractors is forcibly disabled')
3950 else:
3951 write_debug('Lazy loading extractors is disabled')
3952 if self.params['compat_opts']:
3953 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3954
3955 if current_git_head():
3956 write_debug(f'Git HEAD: {current_git_head()}')
3957 write_debug(system_identifier())
3958
3959 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3960 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3961 if ffmpeg_features:
3962 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3963
3964 exe_versions['rtmpdump'] = rtmpdump_version()
3965 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3966 exe_str = ', '.join(
3967 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3968 ) or 'none'
3969 write_debug('exe versions: %s' % exe_str)
3970
3971 from .compat.compat_utils import get_package_info
3972 from .dependencies import available_dependencies
3973
3974 write_debug('Optional libraries: %s' % (', '.join(sorted({
3975 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3976 })) or 'none'))
3977
3978 write_debug(f'Proxy map: {self.proxies}')
3979 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}')
3980 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3981 display_list = ['%s%s' % (
3982 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3983 for name, klass in plugins.items()]
3984 if plugin_type == 'Extractor':
3985 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3986 for parent, plugins in plugin_ie_overrides.items())
3987 if not display_list:
3988 continue
3989 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3990
3991 plugin_dirs = plugin_directories()
3992 if plugin_dirs:
3993 write_debug(f'Plugin directories: {plugin_dirs}')
3994
3995 # Not implemented
3996 if False and self.params.get('call_home'):
3997 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3998 write_debug('Public IP address: %s' % ipaddr)
3999 latest_version = self.urlopen(
4000 'https://yt-dl.org/latest/version').read().decode()
4001 if version_tuple(latest_version) > version_tuple(__version__):
4002 self.report_warning(
4003 'You are using an outdated version (newest version: %s)! '
4004 'See https://yt-dl.org/update if you need help updating.' %
4005 latest_version)
4006
4007 @functools.cached_property
4008 def proxies(self):
4009 """Global proxy configuration"""
4010 opts_proxy = self.params.get('proxy')
4011 if opts_proxy is not None:
4012 if opts_proxy == '':
4013 opts_proxy = '__noproxy__'
4014 proxies = {'all': opts_proxy}
4015 else:
4016 proxies = urllib.request.getproxies()
4017 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4018 if 'http' in proxies and 'https' not in proxies:
4019 proxies['https'] = proxies['http']
4020
4021 return proxies
4022
4023 @functools.cached_property
4024 def cookiejar(self):
4025 """Global cookiejar instance"""
4026 return load_cookies(
4027 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4028
4029 @property
4030 def _opener(self):
4031 """
4032 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4033 """
4034 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4035 handler = self._request_director.handlers['Urllib']
4036 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4037
4038 def urlopen(self, req):
4039 """ Start an HTTP download """
4040 if isinstance(req, str):
4041 req = Request(req)
4042 elif isinstance(req, urllib.request.Request):
4043 self.deprecation_warning(
4044 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4045 'Use yt_dlp.networking.common.Request instead.')
4046 req = urllib_req_to_req(req)
4047 assert isinstance(req, Request)
4048
4049 # compat: Assume user:pass url params are basic auth
4050 url, basic_auth_header = extract_basic_auth(req.url)
4051 if basic_auth_header:
4052 req.headers['Authorization'] = basic_auth_header
4053 req.url = sanitize_url(url)
4054
4055 clean_proxies(proxies=req.proxies, headers=req.headers)
4056 clean_headers(req.headers)
4057
4058 try:
4059 return self._request_director.send(req)
4060 except NoSupportingHandlers as e:
4061 for ue in e.unsupported_errors:
4062 if not (ue.handler and ue.msg):
4063 continue
4064 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4065 raise RequestError(
4066 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4067 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4068 raise
4069 except SSLError as e:
4070 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4071 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4072 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4073 raise RequestError(
4074 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4075 'Try using --legacy-server-connect', cause=e) from e
4076 raise
4077 except HTTPError as e: # TODO: Remove in a future release
4078 raise _CompatHTTPError(e) from e
4079
4080 def build_request_director(self, handlers, preferences=None):
4081 logger = _YDLLogger(self)
4082 headers = self.params['http_headers'].copy()
4083 proxies = self.proxies.copy()
4084 clean_headers(headers)
4085 clean_proxies(proxies, headers)
4086
4087 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4088 for handler in handlers:
4089 director.add_handler(handler(
4090 logger=logger,
4091 headers=headers,
4092 cookiejar=self.cookiejar,
4093 proxies=proxies,
4094 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4095 verify=not self.params.get('nocheckcertificate'),
4096 **traverse_obj(self.params, {
4097 'verbose': 'debug_printtraffic',
4098 'source_address': 'source_address',
4099 'timeout': 'socket_timeout',
4100 'legacy_ssl_support': 'legacyserverconnect',
4101 'enable_file_urls': 'enable_file_urls',
4102 'client_cert': {
4103 'client_certificate': 'client_certificate',
4104 'client_certificate_key': 'client_certificate_key',
4105 'client_certificate_password': 'client_certificate_password',
4106 },
4107 }),
4108 ))
4109 director.preferences.update(preferences or [])
4110 return director
4111
4112 def encode(self, s):
4113 if isinstance(s, bytes):
4114 return s # Already encoded
4115
4116 try:
4117 return s.encode(self.get_encoding())
4118 except UnicodeEncodeError as err:
4119 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4120 raise
4121
4122 def get_encoding(self):
4123 encoding = self.params.get('encoding')
4124 if encoding is None:
4125 encoding = preferredencoding()
4126 return encoding
4127
4128 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4129 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4130 if overwrite is None:
4131 overwrite = self.params.get('overwrites', True)
4132 if not self.params.get('writeinfojson'):
4133 return False
4134 elif not infofn:
4135 self.write_debug(f'Skipping writing {label} infojson')
4136 return False
4137 elif not self._ensure_dir_exists(infofn):
4138 return None
4139 elif not overwrite and os.path.exists(infofn):
4140 self.to_screen(f'[info] {label.title()} metadata is already present')
4141 return 'exists'
4142
4143 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4144 try:
4145 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4146 return True
4147 except OSError:
4148 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4149 return None
4150
4151 def _write_description(self, label, ie_result, descfn):
4152 ''' Write description and returns True = written, False = skip, None = error '''
4153 if not self.params.get('writedescription'):
4154 return False
4155 elif not descfn:
4156 self.write_debug(f'Skipping writing {label} description')
4157 return False
4158 elif not self._ensure_dir_exists(descfn):
4159 return None
4160 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4161 self.to_screen(f'[info] {label.title()} description is already present')
4162 elif ie_result.get('description') is None:
4163 self.to_screen(f'[info] There\'s no {label} description to write')
4164 return False
4165 else:
4166 try:
4167 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4168 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4169 descfile.write(ie_result['description'])
4170 except OSError:
4171 self.report_error(f'Cannot write {label} description file {descfn}')
4172 return None
4173 return True
4174
4175 def _write_subtitles(self, info_dict, filename):
4176 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4177 ret = []
4178 subtitles = info_dict.get('requested_subtitles')
4179 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4180 # subtitles download errors are already managed as troubles in relevant IE
4181 # that way it will silently go on when used with unsupporting IE
4182 return ret
4183 elif not subtitles:
4184 self.to_screen('[info] There are no subtitles for the requested languages')
4185 return ret
4186 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4187 if not sub_filename_base:
4188 self.to_screen('[info] Skipping writing video subtitles')
4189 return ret
4190
4191 for sub_lang, sub_info in subtitles.items():
4192 sub_format = sub_info['ext']
4193 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4194 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4195 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4196 if existing_sub:
4197 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4198 sub_info['filepath'] = existing_sub
4199 ret.append((existing_sub, sub_filename_final))
4200 continue
4201
4202 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4203 if sub_info.get('data') is not None:
4204 try:
4205 # Use newline='' to prevent conversion of newline characters
4206 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4207 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4208 subfile.write(sub_info['data'])
4209 sub_info['filepath'] = sub_filename
4210 ret.append((sub_filename, sub_filename_final))
4211 continue
4212 except OSError:
4213 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4214 return None
4215
4216 try:
4217 sub_copy = sub_info.copy()
4218 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4219 self.dl(sub_filename, sub_copy, subtitle=True)
4220 sub_info['filepath'] = sub_filename
4221 ret.append((sub_filename, sub_filename_final))
4222 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4223 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4224 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4225 if not self.params.get('ignoreerrors'):
4226 self.report_error(msg)
4227 raise DownloadError(msg)
4228 self.report_warning(msg)
4229 return ret
4230
4231 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4232 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4233 write_all = self.params.get('write_all_thumbnails', False)
4234 thumbnails, ret = [], []
4235 if write_all or self.params.get('writethumbnail', False):
4236 thumbnails = info_dict.get('thumbnails') or []
4237 if not thumbnails:
4238 self.to_screen(f'[info] There are no {label} thumbnails to download')
4239 return ret
4240 multiple = write_all and len(thumbnails) > 1
4241
4242 if thumb_filename_base is None:
4243 thumb_filename_base = filename
4244 if thumbnails and not thumb_filename_base:
4245 self.write_debug(f'Skipping writing {label} thumbnail')
4246 return ret
4247
4248 for idx, t in list(enumerate(thumbnails))[::-1]:
4249 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4250 thumb_display_id = f'{label} thumbnail {t["id"]}'
4251 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4252 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4253
4254 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4255 if existing_thumb:
4256 self.to_screen('[info] %s is already present' % (
4257 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4258 t['filepath'] = existing_thumb
4259 ret.append((existing_thumb, thumb_filename_final))
4260 else:
4261 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4262 try:
4263 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4264 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4265 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4266 shutil.copyfileobj(uf, thumbf)
4267 ret.append((thumb_filename, thumb_filename_final))
4268 t['filepath'] = thumb_filename
4269 except network_exceptions as err:
4270 if isinstance(err, HTTPError) and err.status == 404:
4271 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4272 else:
4273 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4274 thumbnails.pop(idx)
4275 if ret and not write_all:
4276 break
4277 return ret