]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] Misc fixes
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import HEADRequest, Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 network_exceptions,
45 )
46 from .plugins import directories as plugin_directories
47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
48 from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
58 FFmpegVideoConvertorPP,
59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61 )
62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
63 from .update import REPOSITORY, current_git_head, detect_variant
64 from .utils import (
65 DEFAULT_OUTTMPL,
66 IDENTITY,
67 LINK_TEMPLATES,
68 MEDIA_EXTENSIONS,
69 NO_DEFAULT,
70 NUMBER_RE,
71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
82 FormatSorter,
83 GeoRestrictedError,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 number_of_digits,
126 orderedSet,
127 orderedSet_from_options,
128 parse_filesize,
129 preferredencoding,
130 prepend_extension,
131 remove_terminal_sequences,
132 render_table,
133 replace_extension,
134 sanitize_filename,
135 sanitize_path,
136 sanitize_url,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils._utils import _YDLLogger
155 from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
159 std_headers,
160 )
161 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL:
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 For compatibility with youtube-dl,
260 "nooverwrites" may also be used instead
261 playlist_items: Specific indices of playlist to download.
262 playlistrandom: Download playlist items in random order.
263 lazy_playlist: Process playlist entries as they are received.
264 matchtitle: Download only matching titles.
265 rejecttitle: Reject downloads for matching titles.
266 logger: Log messages to a logging.Logger instance.
267 logtostderr: Print everything to stderr instead of stdout.
268 consoletitle: Display progress in console window's titlebar.
269 writedescription: Write the video description to a .description file
270 writeinfojson: Write the video description to a .info.json file
271 clean_infojson: Remove internal metadata from the infojson
272 getcomments: Extract video comments. This will not be written to disk
273 unless writeinfojson is also given
274 writeannotations: Write the video annotations to a .annotations.xml file
275 writethumbnail: Write the thumbnail image to a file
276 allow_playlist_files: Whether to write playlists' description, infojson etc
277 also to disk when using the 'write*' options
278 write_all_thumbnails: Write all thumbnail formats to files
279 writelink: Write an internet shortcut file, depending on the
280 current platform (.url/.webloc/.desktop)
281 writeurllink: Write a Windows internet shortcut file (.url)
282 writewebloclink: Write a macOS internet shortcut file (.webloc)
283 writedesktoplink: Write a Linux internet shortcut file (.desktop)
284 writesubtitles: Write the video subtitles to a file
285 writeautomaticsub: Write the automatically generated subtitles to a file
286 listsubtitles: Lists all available subtitles for the video
287 subtitlesformat: The format code for subtitles
288 subtitleslangs: List of languages of the subtitles to download (can be regex).
289 The list may contain "all" to refer to all the available
290 subtitles. The language can be prefixed with a "-" to
291 exclude it from the requested languages, e.g. ['all', '-live_chat']
292 keepvideo: Keep the video file after post-processing
293 daterange: A utils.DateRange object, download only if the upload_date is in the range.
294 skip_download: Skip the actual download of the video file
295 cachedir: Location of the cache files in the filesystem.
296 False to disable filesystem cache.
297 noplaylist: Download single video instead of a playlist if in doubt.
298 age_limit: An integer representing the user's age in years.
299 Unsuitable videos for the given age are skipped.
300 min_views: An integer representing the minimum view count the video
301 must have in order to not be skipped.
302 Videos without view count information are always
303 downloaded. None for no limit.
304 max_views: An integer representing the maximum view count.
305 Videos that are more popular than that are not
306 downloaded.
307 Videos without view count information are always
308 downloaded. None for no limit.
309 download_archive: A set, or the name of a file where all downloads are recorded.
310 Videos already present in the file are not downloaded again.
311 break_on_existing: Stop the download process after attempting to download a
312 file that is in the archive.
313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
315 cookiefile: File name or text stream from where cookies should be read and dumped to
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/path from where cookies are loaded, the name of the keyring,
318 and the container name, e.g. ('chrome', ) or
319 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
320 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
321 support RFC 5746 secure renegotiation
322 nocheckcertificate: Do not verify SSL certificates
323 client_certificate: Path to client certificate file in PEM format. May include the private key
324 client_certificate_key: Path to private key file for client certificate
325 client_certificate_password: Password for client certificate private key, if encrypted.
326 If not provided and the key is encrypted, yt-dlp will ask interactively
327 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
328 (Only supported by some extractors)
329 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
330 http_headers: A dictionary of custom headers to be used for all requests
331 proxy: URL of the proxy server to use
332 geo_verification_proxy: URL of the proxy to use for IP address verification
333 on geo-restricted sites.
334 socket_timeout: Time to wait for unresponsive hosts, in seconds
335 bidi_workaround: Work around buggy terminals without bidirectional text
336 support, using fridibi
337 debug_printtraffic:Print out sent and received HTTP traffic
338 default_search: Prepend this string if an input url is not valid.
339 'auto' for elaborate guessing
340 encoding: Use this encoding instead of the system-specified.
341 extract_flat: Whether to resolve and process url_results further
342 * False: Always process. Default for API
343 * True: Never process
344 * 'in_playlist': Do not process inside playlist/multi_video
345 * 'discard': Always process, but don't return the result
346 from inside playlist/multi_video
347 * 'discard_in_playlist': Same as "discard", but only for
348 playlists (not multi_video). Default for CLI
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 progress_hooks: A list of functions that get called on download
359 progress, with a dictionary with the entries
360 * status: One of "downloading", "error", or "finished".
361 Check this first and ignore unknown values.
362 * info_dict: The extracted info_dict
363
364 If status is one of "downloading", or "finished", the
365 following properties may also be present:
366 * filename: The final filename (always present)
367 * tmpfilename: The filename we're currently writing to
368 * downloaded_bytes: Bytes on disk
369 * total_bytes: Size of the whole file, None if unknown
370 * total_bytes_estimate: Guess of the eventual file size,
371 None if unavailable.
372 * elapsed: The number of seconds since download started.
373 * eta: The estimated time in seconds, None if unknown
374 * speed: The download speed in bytes/second, None if
375 unknown
376 * fragment_index: The counter of the currently
377 downloaded video fragment.
378 * fragment_count: The number of fragments (= individual
379 files that will be merged)
380
381 Progress hooks are guaranteed to be called at least once
382 (with status "finished") if the download is successful.
383 postprocessor_hooks: A list of functions that get called on postprocessing
384 progress, with a dictionary with the entries
385 * status: One of "started", "processing", or "finished".
386 Check this first and ignore unknown values.
387 * postprocessor: Name of the postprocessor
388 * info_dict: The extracted info_dict
389
390 Progress hooks are guaranteed to be called at least twice
391 (with status "started" and "finished") if the processing is successful.
392 merge_output_format: "/" separated list of extensions to use when merging formats.
393 final_ext: Expected final extension; used to detect when the file was
394 already downloaded and converted
395 fixup: Automatically correct known faults of the file.
396 One of:
397 - "never": do nothing
398 - "warn": only emit a warning
399 - "detect_or_warn": check whether we can do anything
400 about it, warn otherwise (default)
401 source_address: Client-side IP address to bind to.
402 sleep_interval_requests: Number of seconds to sleep between requests
403 during extraction
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
408 max_sleep_interval.
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
425 - Raise utils.DownloadCancelled(msg) to abort remaining
426 downloads when a video is rejected.
427 match_filter_func in utils.py is one example for this.
428 color: A Dictionary with output stream names as keys
429 and their respective color policy as values.
430 Can also just be a single color policy,
431 in which case it applies to all outputs.
432 Valid stream names are 'stdout' and 'stderr'.
433 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
434 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
435 HTTP header
436 geo_bypass_country:
437 Two-letter ISO 3166-2 country code that will be used for
438 explicit geographic restriction bypassing via faking
439 X-Forwarded-For HTTP header
440 geo_bypass_ip_block:
441 IP range in CIDR notation that will be used similarly to
442 geo_bypass_country
443 external_downloader: A dictionary of protocol keys and the executable of the
444 external downloader to use for it. The allowed protocols
445 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
446 Set the value to 'native' to use the native downloader
447 compat_opts: Compatibility options. See "Differences in default behavior".
448 The following options do not work when used through the API:
449 filename, abort-on-error, multistreams, no-live-chat, format-sort
450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
451 Refer __init__.py for their implementation
452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
459 download_ranges: A callback function that gets called for every video with
460 the signature (info_dict, ydl) -> Iterable[Section].
461 Only the returned sections will be downloaded.
462 Each Section is a dict with the following keys:
463 * start_time: Start time of the section in seconds
464 * end_time: End time of the section in seconds
465 * title: Section title (Optional)
466 * index: Section number (Optional)
467 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
468 noprogress: Do not print the progress bar
469 live_from_start: Whether to download livestreams videos from the start
470
471 The following parameters are not used by YoutubeDL itself, they are used by
472 the downloader (see yt_dlp/downloader/common.py):
473 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
474 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
475 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
476 external_downloader_args, concurrent_fragment_downloads.
477
478 The following options are used by the post processors:
479 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
480 to the binary or its containing directory.
481 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
482 and a list of additional command-line arguments for the
483 postprocessor/executable. The dict can also have "PP+EXE" keys
484 which are used when the given exe is used by the given PP.
485 Use 'default' as the name for arguments to passed to all PP
486 For compatibility with youtube-dl, a single list of args
487 can also be used
488
489 The following options are used by the extractors:
490 extractor_retries: Number of times to retry for known errors (default: 3)
491 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
492 hls_split_discontinuity: Split HLS playlists to different formats at
493 discontinuities such as ad breaks (default: False)
494 extractor_args: A dictionary of arguments to be passed to the extractors.
495 See "EXTRACTOR ARGUMENTS" for details.
496 E.g. {'youtube': {'skip': ['dash', 'hls']}}
497 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
498
499 The following options are deprecated and may be removed in the future:
500
501 break_on_reject: Stop the download process when encountering a video that
502 has been filtered out.
503 - `raise DownloadCancelled(msg)` in match_filter instead
504 force_generic_extractor: Force downloader to use the generic extractor
505 - Use allowed_extractors = ['generic', 'default']
506 playliststart: - Use playlist_items
507 Playlist item to start at.
508 playlistend: - Use playlist_items
509 Playlist item to end at.
510 playlistreverse: - Use playlist_items
511 Download playlist items in reverse order.
512 forceurl: - Use forceprint
513 Force printing final URL.
514 forcetitle: - Use forceprint
515 Force printing title.
516 forceid: - Use forceprint
517 Force printing ID.
518 forcethumbnail: - Use forceprint
519 Force printing thumbnail URL.
520 forcedescription: - Use forceprint
521 Force printing description.
522 forcefilename: - Use forceprint
523 Force printing final filename.
524 forceduration: - Use forceprint
525 Force printing duration.
526 allsubtitles: - Use subtitleslangs = ['all']
527 Downloads all the subtitles of the video
528 (requires writesubtitles or writeautomaticsub)
529 include_ads: - Doesn't work
530 Download ads as well
531 call_home: - Not implemented
532 Boolean, true iff we are allowed to contact the
533 yt-dlp servers for debugging.
534 post_hooks: - Register a custom postprocessor
535 A list of functions that get called as the final step
536 for each video file, after all postprocessors have been
537 called. The filename will be passed as the only argument.
538 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
539 Use the native HLS downloader instead of ffmpeg/avconv
540 if True, otherwise use ffmpeg/avconv if False, otherwise
541 use downloader suggested by extractor if None.
542 prefer_ffmpeg: - avconv support is deprecated
543 If False, use avconv instead of ffmpeg if both are available,
544 otherwise prefer ffmpeg.
545 youtube_include_dash_manifest: - Use extractor_args
546 If True (default), DASH manifests and related
547 data will be downloaded and processed by extractor.
548 You can reduce network I/O by disabling it if you don't
549 care about DASH. (only for youtube)
550 youtube_include_hls_manifest: - Use extractor_args
551 If True (default), HLS manifests and related
552 data will be downloaded and processed by extractor.
553 You can reduce network I/O by disabling it if you don't
554 care about HLS. (only for youtube)
555 no_color: Same as `color='no_color'`
556 """
557
558 _NUMERIC_FIELDS = {
559 'width', 'height', 'asr', 'audio_channels', 'fps',
560 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
561 'timestamp', 'release_timestamp',
562 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
563 'average_rating', 'comment_count', 'age_limit',
564 'start_time', 'end_time',
565 'chapter_number', 'season_number', 'episode_number',
566 'track_number', 'disc_number', 'release_year',
567 }
568
569 _format_fields = {
570 # NB: Keep in sync with the docstring of extractor/common.py
571 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
572 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
573 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
574 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
575 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies',
576 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
577 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
578 }
579 _format_selection_exts = {
580 'audio': set(MEDIA_EXTENSIONS.common_audio),
581 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
582 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
583 }
584
585 def __init__(self, params=None, auto_init=True):
586 """Create a FileDownloader object with the given options.
587 @param auto_init Whether to load the default extractors and print header (if verbose).
588 Set to 'no_verbose_header' to not print the header
589 """
590 if params is None:
591 params = {}
592 self.params = params
593 self._ies = {}
594 self._ies_instances = {}
595 self._pps = {k: [] for k in POSTPROCESS_WHEN}
596 self._printed_messages = set()
597 self._first_webpage_request = True
598 self._post_hooks = []
599 self._progress_hooks = []
600 self._postprocessor_hooks = []
601 self._download_retcode = 0
602 self._num_downloads = 0
603 self._num_videos = 0
604 self._playlist_level = 0
605 self._playlist_urls = set()
606 self.cache = Cache(self)
607
608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
615 )
616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
624 self.params.setdefault('_warnings', []).append(
625 'Overwriting params from "color" with "no_color"')
626 self.params['color'] = 'no_color'
627
628 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
629
630 def process_color_policy(stream):
631 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
632 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
633 if policy in ('auto', None):
634 return term_allow_color and supports_terminal_sequences(stream)
635 assert policy in ('always', 'never', 'no_color')
636 return {'always': True, 'never': False}.get(policy, policy)
637
638 self._allow_colors = Namespace(**{
639 name: process_color_policy(stream)
640 for name, stream in self._out_files.items_ if name != 'console'
641 })
642
643 # The code is left like this to be reused for future deprecations
644 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
645 current_version = sys.version_info[:2]
646 if current_version < MIN_RECOMMENDED:
647 msg = ('Support for Python version %d.%d has been deprecated. '
648 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
649 '\n You will no longer receive updates on this version')
650 if current_version < MIN_SUPPORTED:
651 msg = 'Python version %d.%d is no longer supported'
652 self.deprecated_feature(
653 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
654
655 if self.params.get('allow_unplayable_formats'):
656 self.report_warning(
657 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
658 'This is a developer option intended for debugging. \n'
659 ' If you experience any issues while using this option, '
660 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
661
662 if self.params.get('bidi_workaround', False):
663 try:
664 import pty
665 master, slave = pty.openpty()
666 width = shutil.get_terminal_size().columns
667 width_args = [] if width is None else ['-w', str(width)]
668 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
669 try:
670 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
671 except OSError:
672 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
673 self._output_channel = os.fdopen(master, 'rb')
674 except OSError as ose:
675 if ose.errno == errno.ENOENT:
676 self.report_warning(
677 'Could not find fribidi executable, ignoring --bidi-workaround. '
678 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
679 else:
680 raise
681
682 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
683 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
684 self.__header_cookies = []
685 self._load_cookies(self.params['http_headers'].get('Cookie')) # compat
686 self.params['http_headers'].pop('Cookie', None)
687
688 self._request_director = self.build_request_director(
689 sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
690 if auto_init and auto_init != 'no_verbose_header':
691 self.print_debug_header()
692
693 def check_deprecated(param, option, suggestion):
694 if self.params.get(param) is not None:
695 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
696 return True
697 return False
698
699 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
700 if self.params.get('geo_verification_proxy') is None:
701 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
702
703 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
704 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
705 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
706
707 for msg in self.params.get('_warnings', []):
708 self.report_warning(msg)
709 for msg in self.params.get('_deprecation_warnings', []):
710 self.deprecated_feature(msg)
711
712 if 'list-formats' in self.params['compat_opts']:
713 self.params['listformats_table'] = False
714
715 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
716 # nooverwrites was unnecessarily changed to overwrites
717 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
718 # This ensures compatibility with both keys
719 self.params['overwrites'] = not self.params['nooverwrites']
720 elif self.params.get('overwrites') is None:
721 self.params.pop('overwrites', None)
722 else:
723 self.params['nooverwrites'] = not self.params['overwrites']
724
725 if self.params.get('simulate') is None and any((
726 self.params.get('list_thumbnails'),
727 self.params.get('listformats'),
728 self.params.get('listsubtitles'),
729 )):
730 self.params['simulate'] = 'list_only'
731
732 self.params.setdefault('forceprint', {})
733 self.params.setdefault('print_to_file', {})
734
735 # Compatibility with older syntax
736 if not isinstance(params['forceprint'], dict):
737 self.params['forceprint'] = {'video': params['forceprint']}
738
739 if auto_init:
740 self.add_default_info_extractors()
741
742 if (sys.platform != 'win32'
743 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
744 and not self.params.get('restrictfilenames', False)):
745 # Unicode filesystem API will throw errors (#1474, #13027)
746 self.report_warning(
747 'Assuming --restrict-filenames since file system encoding '
748 'cannot encode all characters. '
749 'Set the LC_ALL environment variable to fix this.')
750 self.params['restrictfilenames'] = True
751
752 self._parse_outtmpl()
753
754 # Creating format selector here allows us to catch syntax errors before the extraction
755 self.format_selector = (
756 self.params.get('format') if self.params.get('format') in (None, '-')
757 else self.params['format'] if callable(self.params['format'])
758 else self.build_format_selector(self.params['format']))
759
760 hooks = {
761 'post_hooks': self.add_post_hook,
762 'progress_hooks': self.add_progress_hook,
763 'postprocessor_hooks': self.add_postprocessor_hook,
764 }
765 for opt, fn in hooks.items():
766 for ph in self.params.get(opt, []):
767 fn(ph)
768
769 for pp_def_raw in self.params.get('postprocessors', []):
770 pp_def = dict(pp_def_raw)
771 when = pp_def.pop('when', 'post_process')
772 self.add_post_processor(
773 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
774 when=when)
775
776 def preload_download_archive(fn):
777 """Preload the archive, if any is specified"""
778 archive = set()
779 if fn is None:
780 return archive
781 elif not is_path_like(fn):
782 return fn
783
784 self.write_debug(f'Loading archive file {fn!r}')
785 try:
786 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
787 for line in archive_file:
788 archive.add(line.strip())
789 except OSError as ioe:
790 if ioe.errno != errno.ENOENT:
791 raise
792 return archive
793
794 self.archive = preload_download_archive(self.params.get('download_archive'))
795
796 def warn_if_short_id(self, argv):
797 # short YouTube ID starting with dash?
798 idxs = [
799 i for i, a in enumerate(argv)
800 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
801 if idxs:
802 correct_argv = (
803 ['yt-dlp']
804 + [a for i, a in enumerate(argv) if i not in idxs]
805 + ['--'] + [argv[i] for i in idxs]
806 )
807 self.report_warning(
808 'Long argument string detected. '
809 'Use -- to separate parameters and URLs, like this:\n%s' %
810 args_to_str(correct_argv))
811
812 def add_info_extractor(self, ie):
813 """Add an InfoExtractor object to the end of the list."""
814 ie_key = ie.ie_key()
815 self._ies[ie_key] = ie
816 if not isinstance(ie, type):
817 self._ies_instances[ie_key] = ie
818 ie.set_downloader(self)
819
820 def get_info_extractor(self, ie_key):
821 """
822 Get an instance of an IE with name ie_key, it will try to get one from
823 the _ies list, if there's no instance it will create a new one and add
824 it to the extractor list.
825 """
826 ie = self._ies_instances.get(ie_key)
827 if ie is None:
828 ie = get_info_extractor(ie_key)()
829 self.add_info_extractor(ie)
830 return ie
831
832 def add_default_info_extractors(self):
833 """
834 Add the InfoExtractors returned by gen_extractors to the end of the list
835 """
836 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
837 all_ies['end'] = UnsupportedURLIE()
838 try:
839 ie_names = orderedSet_from_options(
840 self.params.get('allowed_extractors', ['default']), {
841 'all': list(all_ies),
842 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
843 }, use_regex=True)
844 except re.error as e:
845 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
846 for name in ie_names:
847 self.add_info_extractor(all_ies[name])
848 self.write_debug(f'Loaded {len(ie_names)} extractors')
849
850 def add_post_processor(self, pp, when='post_process'):
851 """Add a PostProcessor object to the end of the chain."""
852 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
853 self._pps[when].append(pp)
854 pp.set_downloader(self)
855
856 def add_post_hook(self, ph):
857 """Add the post hook"""
858 self._post_hooks.append(ph)
859
860 def add_progress_hook(self, ph):
861 """Add the download progress hook"""
862 self._progress_hooks.append(ph)
863
864 def add_postprocessor_hook(self, ph):
865 """Add the postprocessing progress hook"""
866 self._postprocessor_hooks.append(ph)
867 for pps in self._pps.values():
868 for pp in pps:
869 pp.add_progress_hook(ph)
870
871 def _bidi_workaround(self, message):
872 if not hasattr(self, '_output_channel'):
873 return message
874
875 assert hasattr(self, '_output_process')
876 assert isinstance(message, str)
877 line_count = message.count('\n') + 1
878 self._output_process.stdin.write((message + '\n').encode())
879 self._output_process.stdin.flush()
880 res = ''.join(self._output_channel.readline().decode()
881 for _ in range(line_count))
882 return res[:-len('\n')]
883
884 def _write_string(self, message, out=None, only_once=False):
885 if only_once:
886 if message in self._printed_messages:
887 return
888 self._printed_messages.add(message)
889 write_string(message, out=out, encoding=self.params.get('encoding'))
890
891 def to_stdout(self, message, skip_eol=False, quiet=None):
892 """Print message to stdout"""
893 if quiet is not None:
894 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
895 'Use "YoutubeDL.to_screen" instead')
896 if skip_eol is not False:
897 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
898 'Use "YoutubeDL.to_screen" instead')
899 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
900
901 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
902 """Print message to screen if not in quiet mode"""
903 if self.params.get('logger'):
904 self.params['logger'].debug(message)
905 return
906 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
907 return
908 self._write_string(
909 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
910 self._out_files.screen, only_once=only_once)
911
912 def to_stderr(self, message, only_once=False):
913 """Print message to stderr"""
914 assert isinstance(message, str)
915 if self.params.get('logger'):
916 self.params['logger'].error(message)
917 else:
918 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
919
920 def _send_console_code(self, code):
921 if compat_os_name == 'nt' or not self._out_files.console:
922 return
923 self._write_string(code, self._out_files.console)
924
925 def to_console_title(self, message):
926 if not self.params.get('consoletitle', False):
927 return
928 message = remove_terminal_sequences(message)
929 if compat_os_name == 'nt':
930 if ctypes.windll.kernel32.GetConsoleWindow():
931 # c_wchar_p() might not be necessary if `message` is
932 # already of type unicode()
933 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
934 else:
935 self._send_console_code(f'\033]0;{message}\007')
936
937 def save_console_title(self):
938 if not self.params.get('consoletitle') or self.params.get('simulate'):
939 return
940 self._send_console_code('\033[22;0t') # Save the title on stack
941
942 def restore_console_title(self):
943 if not self.params.get('consoletitle') or self.params.get('simulate'):
944 return
945 self._send_console_code('\033[23;0t') # Restore the title from stack
946
947 def __enter__(self):
948 self.save_console_title()
949 return self
950
951 def save_cookies(self):
952 if self.params.get('cookiefile') is not None:
953 self.cookiejar.save()
954
955 def __exit__(self, *args):
956 self.restore_console_title()
957 self.close()
958
959 def close(self):
960 self.save_cookies()
961 self._request_director.close()
962
963 def trouble(self, message=None, tb=None, is_error=True):
964 """Determine action to take when a download problem appears.
965
966 Depending on if the downloader has been configured to ignore
967 download errors or not, this method may throw an exception or
968 not when errors are found, after printing the message.
969
970 @param tb If given, is additional traceback information
971 @param is_error Whether to raise error according to ignorerrors
972 """
973 if message is not None:
974 self.to_stderr(message)
975 if self.params.get('verbose'):
976 if tb is None:
977 if sys.exc_info()[0]: # if .trouble has been called from an except block
978 tb = ''
979 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
980 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
981 tb += encode_compat_str(traceback.format_exc())
982 else:
983 tb_data = traceback.format_list(traceback.extract_stack())
984 tb = ''.join(tb_data)
985 if tb:
986 self.to_stderr(tb)
987 if not is_error:
988 return
989 if not self.params.get('ignoreerrors'):
990 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
991 exc_info = sys.exc_info()[1].exc_info
992 else:
993 exc_info = sys.exc_info()
994 raise DownloadError(message, exc_info)
995 self._download_retcode = 1
996
997 Styles = Namespace(
998 HEADERS='yellow',
999 EMPHASIS='light blue',
1000 FILENAME='green',
1001 ID='green',
1002 DELIM='blue',
1003 ERROR='red',
1004 BAD_FORMAT='light red',
1005 WARNING='yellow',
1006 SUPPRESS='light black',
1007 )
1008
1009 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1010 text = str(text)
1011 if test_encoding:
1012 original_text = text
1013 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1014 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1015 text = text.encode(encoding, 'ignore').decode(encoding)
1016 if fallback is not None and text != original_text:
1017 text = fallback
1018 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1019
1020 def _format_out(self, *args, **kwargs):
1021 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1022
1023 def _format_screen(self, *args, **kwargs):
1024 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1025
1026 def _format_err(self, *args, **kwargs):
1027 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1028
1029 def report_warning(self, message, only_once=False):
1030 '''
1031 Print the message to stderr, it will be prefixed with 'WARNING:'
1032 If stderr is a tty file the 'WARNING:' will be colored
1033 '''
1034 if self.params.get('logger') is not None:
1035 self.params['logger'].warning(message)
1036 else:
1037 if self.params.get('no_warnings'):
1038 return
1039 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1040
1041 def deprecation_warning(self, message, *, stacklevel=0):
1042 deprecation_warning(
1043 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1044
1045 def deprecated_feature(self, message):
1046 if self.params.get('logger') is not None:
1047 self.params['logger'].warning(f'Deprecated Feature: {message}')
1048 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1049
1050 def report_error(self, message, *args, **kwargs):
1051 '''
1052 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1053 in red if stderr is a tty file.
1054 '''
1055 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1056
1057 def write_debug(self, message, only_once=False):
1058 '''Log debug message or Print message to stderr'''
1059 if not self.params.get('verbose', False):
1060 return
1061 message = f'[debug] {message}'
1062 if self.params.get('logger'):
1063 self.params['logger'].debug(message)
1064 else:
1065 self.to_stderr(message, only_once)
1066
1067 def report_file_already_downloaded(self, file_name):
1068 """Report file has already been fully downloaded."""
1069 try:
1070 self.to_screen('[download] %s has already been downloaded' % file_name)
1071 except UnicodeEncodeError:
1072 self.to_screen('[download] The file has already been downloaded')
1073
1074 def report_file_delete(self, file_name):
1075 """Report that existing file will be deleted."""
1076 try:
1077 self.to_screen('Deleting existing file %s' % file_name)
1078 except UnicodeEncodeError:
1079 self.to_screen('Deleting existing file')
1080
1081 def raise_no_formats(self, info, forced=False, *, msg=None):
1082 has_drm = info.get('_has_drm')
1083 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1084 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1085 if forced or not ignored:
1086 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1087 expected=has_drm or ignored or expected)
1088 else:
1089 self.report_warning(msg)
1090
1091 def parse_outtmpl(self):
1092 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1093 self._parse_outtmpl()
1094 return self.params['outtmpl']
1095
1096 def _parse_outtmpl(self):
1097 sanitize = IDENTITY
1098 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1099 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1100
1101 outtmpl = self.params.setdefault('outtmpl', {})
1102 if not isinstance(outtmpl, dict):
1103 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1104 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1105
1106 def get_output_path(self, dir_type='', filename=None):
1107 paths = self.params.get('paths', {})
1108 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1109 path = os.path.join(
1110 expand_path(paths.get('home', '').strip()),
1111 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1112 filename or '')
1113 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1114
1115 @staticmethod
1116 def _outtmpl_expandpath(outtmpl):
1117 # expand_path translates '%%' into '%' and '$$' into '$'
1118 # correspondingly that is not what we want since we need to keep
1119 # '%%' intact for template dict substitution step. Working around
1120 # with boundary-alike separator hack.
1121 sep = ''.join(random.choices(string.ascii_letters, k=32))
1122 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1123
1124 # outtmpl should be expand_path'ed before template dict substitution
1125 # because meta fields may contain env variables we don't want to
1126 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1127 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1128 return expand_path(outtmpl).replace(sep, '')
1129
1130 @staticmethod
1131 def escape_outtmpl(outtmpl):
1132 ''' Escape any remaining strings like %s, %abc% etc. '''
1133 return re.sub(
1134 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1135 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1136 outtmpl)
1137
1138 @classmethod
1139 def validate_outtmpl(cls, outtmpl):
1140 ''' @return None or Exception object '''
1141 outtmpl = re.sub(
1142 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1143 lambda mobj: f'{mobj.group(0)[:-1]}s',
1144 cls._outtmpl_expandpath(outtmpl))
1145 try:
1146 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1147 return None
1148 except ValueError as err:
1149 return err
1150
1151 @staticmethod
1152 def _copy_infodict(info_dict):
1153 info_dict = dict(info_dict)
1154 info_dict.pop('__postprocessors', None)
1155 info_dict.pop('__pending_error', None)
1156 return info_dict
1157
1158 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1159 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1160 @param sanitize Whether to sanitize the output as a filename.
1161 For backward compatibility, a function can also be passed
1162 """
1163
1164 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1165
1166 info_dict = self._copy_infodict(info_dict)
1167 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1168 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1169 if info_dict.get('duration', None) is not None
1170 else None)
1171 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1172 info_dict['video_autonumber'] = self._num_videos
1173 if info_dict.get('resolution') is None:
1174 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1175
1176 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1177 # of %(field)s to %(field)0Nd for backward compatibility
1178 field_size_compat_map = {
1179 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1180 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1181 'autonumber': self.params.get('autonumber_size') or 5,
1182 }
1183
1184 TMPL_DICT = {}
1185 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1186 MATH_FUNCTIONS = {
1187 '+': float.__add__,
1188 '-': float.__sub__,
1189 }
1190 # Field is of the form key1.key2...
1191 # where keys (except first) can be string, int, slice or "{field, ...}"
1192 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1193 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1194 'inner': FIELD_INNER_RE,
1195 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1196 }
1197 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1198 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1199 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1200 (?P<negate>-)?
1201 (?P<fields>{FIELD_RE})
1202 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1203 (?:>(?P<strf_format>.+?))?
1204 (?P<remaining>
1205 (?P<alternate>(?<!\\),[^|&)]+)?
1206 (?:&(?P<replacement>.*?))?
1207 (?:\|(?P<default>.*?))?
1208 )$''')
1209
1210 def _traverse_infodict(fields):
1211 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1212 for f in ([x] if x.startswith('{') else x.split('.'))]
1213 for i in (0, -1):
1214 if fields and not fields[i]:
1215 fields.pop(i)
1216
1217 for i, f in enumerate(fields):
1218 if not f.startswith('{'):
1219 continue
1220 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1221 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1222
1223 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1224
1225 def get_value(mdict):
1226 # Object traversal
1227 value = _traverse_infodict(mdict['fields'])
1228 # Negative
1229 if mdict['negate']:
1230 value = float_or_none(value)
1231 if value is not None:
1232 value *= -1
1233 # Do maths
1234 offset_key = mdict['maths']
1235 if offset_key:
1236 value = float_or_none(value)
1237 operator = None
1238 while offset_key:
1239 item = re.match(
1240 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1241 offset_key).group(0)
1242 offset_key = offset_key[len(item):]
1243 if operator is None:
1244 operator = MATH_FUNCTIONS[item]
1245 continue
1246 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1247 offset = float_or_none(item)
1248 if offset is None:
1249 offset = float_or_none(_traverse_infodict(item))
1250 try:
1251 value = operator(value, multiplier * offset)
1252 except (TypeError, ZeroDivisionError):
1253 return None
1254 operator = None
1255 # Datetime formatting
1256 if mdict['strf_format']:
1257 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1258
1259 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1260 if sanitize and value == '':
1261 value = None
1262 return value
1263
1264 na = self.params.get('outtmpl_na_placeholder', 'NA')
1265
1266 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1267 return sanitize_filename(str(value), restricted=restricted, is_id=(
1268 bool(re.search(r'(^|[_.])id(\.|$)', key))
1269 if 'filename-sanitization' in self.params['compat_opts']
1270 else NO_DEFAULT))
1271
1272 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1273 sanitize = bool(sanitize)
1274
1275 def _dumpjson_default(obj):
1276 if isinstance(obj, (set, LazyList)):
1277 return list(obj)
1278 return repr(obj)
1279
1280 class _ReplacementFormatter(string.Formatter):
1281 def get_field(self, field_name, args, kwargs):
1282 if field_name.isdigit():
1283 return args[0], -1
1284 raise ValueError('Unsupported field')
1285
1286 replacement_formatter = _ReplacementFormatter()
1287
1288 def create_key(outer_mobj):
1289 if not outer_mobj.group('has_key'):
1290 return outer_mobj.group(0)
1291 key = outer_mobj.group('key')
1292 mobj = re.match(INTERNAL_FORMAT_RE, key)
1293 value, replacement, default, last_field = None, None, na, ''
1294 while mobj:
1295 mobj = mobj.groupdict()
1296 default = mobj['default'] if mobj['default'] is not None else default
1297 value = get_value(mobj)
1298 last_field, replacement = mobj['fields'], mobj['replacement']
1299 if value is None and mobj['alternate']:
1300 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1301 else:
1302 break
1303
1304 if None not in (value, replacement):
1305 try:
1306 value = replacement_formatter.format(replacement, value)
1307 except ValueError:
1308 value, default = None, na
1309
1310 fmt = outer_mobj.group('format')
1311 if fmt == 's' and last_field in field_size_compat_map.keys() and isinstance(value, int):
1312 fmt = f'0{field_size_compat_map[last_field]:d}d'
1313
1314 flags = outer_mobj.group('conversion') or ''
1315 str_fmt = f'{fmt[:-1]}s'
1316 if value is None:
1317 value, fmt = default, 's'
1318 elif fmt[-1] == 'l': # list
1319 delim = '\n' if '#' in flags else ', '
1320 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1321 elif fmt[-1] == 'j': # json
1322 value, fmt = json.dumps(
1323 value, default=_dumpjson_default,
1324 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1325 elif fmt[-1] == 'h': # html
1326 value, fmt = escapeHTML(str(value)), str_fmt
1327 elif fmt[-1] == 'q': # quoted
1328 value = map(str, variadic(value) if '#' in flags else [value])
1329 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1330 elif fmt[-1] == 'B': # bytes
1331 value = f'%{str_fmt}'.encode() % str(value).encode()
1332 value, fmt = value.decode('utf-8', 'ignore'), 's'
1333 elif fmt[-1] == 'U': # unicode normalized
1334 value, fmt = unicodedata.normalize(
1335 # "+" = compatibility equivalence, "#" = NFD
1336 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1337 value), str_fmt
1338 elif fmt[-1] == 'D': # decimal suffix
1339 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1340 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1341 factor=1024 if '#' in flags else 1000)
1342 elif fmt[-1] == 'S': # filename sanitization
1343 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1344 elif fmt[-1] == 'c':
1345 if value:
1346 value = str(value)[0]
1347 else:
1348 fmt = str_fmt
1349 elif fmt[-1] not in 'rsa': # numeric
1350 value = float_or_none(value)
1351 if value is None:
1352 value, fmt = default, 's'
1353
1354 if sanitize:
1355 # If value is an object, sanitize might convert it to a string
1356 # So we convert it to repr first
1357 if fmt[-1] == 'r':
1358 value, fmt = repr(value), str_fmt
1359 elif fmt[-1] == 'a':
1360 value, fmt = ascii(value), str_fmt
1361 if fmt[-1] in 'csra':
1362 value = sanitizer(last_field, value)
1363
1364 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1365 TMPL_DICT[key] = value
1366 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1367
1368 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1369
1370 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1371 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1372 return self.escape_outtmpl(outtmpl) % info_dict
1373
1374 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1375 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1376 if outtmpl is None:
1377 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1378 try:
1379 outtmpl = self._outtmpl_expandpath(outtmpl)
1380 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1381 if not filename:
1382 return None
1383
1384 if tmpl_type in ('', 'temp'):
1385 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1386 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1387 filename = replace_extension(filename, ext, final_ext)
1388 elif tmpl_type:
1389 force_ext = OUTTMPL_TYPES[tmpl_type]
1390 if force_ext:
1391 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1392
1393 # https://github.com/blackjack4494/youtube-dlc/issues/85
1394 trim_file_name = self.params.get('trim_file_name', False)
1395 if trim_file_name:
1396 no_ext, *ext = filename.rsplit('.', 2)
1397 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1398
1399 return filename
1400 except ValueError as err:
1401 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1402 return None
1403
1404 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1405 """Generate the output filename"""
1406 if outtmpl:
1407 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1408 dir_type = None
1409 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1410 if not filename and dir_type not in ('', 'temp'):
1411 return ''
1412
1413 if warn:
1414 if not self.params.get('paths'):
1415 pass
1416 elif filename == '-':
1417 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1418 elif os.path.isabs(filename):
1419 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1420 if filename == '-' or not filename:
1421 return filename
1422
1423 return self.get_output_path(dir_type, filename)
1424
1425 def _match_entry(self, info_dict, incomplete=False, silent=False):
1426 """Returns None if the file should be downloaded"""
1427 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1428 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1429
1430 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1431
1432 def check_filter():
1433 if _type in ('playlist', 'multi_video'):
1434 return
1435 elif _type in ('url', 'url_transparent') and not try_call(
1436 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1437 return
1438
1439 if 'title' in info_dict:
1440 # This can happen when we're just evaluating the playlist
1441 title = info_dict['title']
1442 matchtitle = self.params.get('matchtitle', False)
1443 if matchtitle:
1444 if not re.search(matchtitle, title, re.IGNORECASE):
1445 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1446 rejecttitle = self.params.get('rejecttitle', False)
1447 if rejecttitle:
1448 if re.search(rejecttitle, title, re.IGNORECASE):
1449 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1450
1451 date = info_dict.get('upload_date')
1452 if date is not None:
1453 dateRange = self.params.get('daterange', DateRange())
1454 if date not in dateRange:
1455 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1456 view_count = info_dict.get('view_count')
1457 if view_count is not None:
1458 min_views = self.params.get('min_views')
1459 if min_views is not None and view_count < min_views:
1460 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1461 max_views = self.params.get('max_views')
1462 if max_views is not None and view_count > max_views:
1463 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1464 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1465 return 'Skipping "%s" because it is age restricted' % video_title
1466
1467 match_filter = self.params.get('match_filter')
1468 if match_filter is None:
1469 return None
1470
1471 cancelled = None
1472 try:
1473 try:
1474 ret = match_filter(info_dict, incomplete=incomplete)
1475 except TypeError:
1476 # For backward compatibility
1477 ret = None if incomplete else match_filter(info_dict)
1478 except DownloadCancelled as err:
1479 if err.msg is not NO_DEFAULT:
1480 raise
1481 ret, cancelled = err.msg, err
1482
1483 if ret is NO_DEFAULT:
1484 while True:
1485 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1486 reply = input(self._format_screen(
1487 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1488 if reply in {'y', ''}:
1489 return None
1490 elif reply == 'n':
1491 if cancelled:
1492 raise type(cancelled)(f'Skipping {video_title}')
1493 return f'Skipping {video_title}'
1494 return ret
1495
1496 if self.in_download_archive(info_dict):
1497 reason = ''.join((
1498 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1499 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1500 'has already been recorded in the archive'))
1501 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1502 else:
1503 try:
1504 reason = check_filter()
1505 except DownloadCancelled as e:
1506 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1507 else:
1508 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1509 if reason is not None:
1510 if not silent:
1511 self.to_screen('[download] ' + reason)
1512 if self.params.get(break_opt, False):
1513 raise break_err()
1514 return reason
1515
1516 @staticmethod
1517 def add_extra_info(info_dict, extra_info):
1518 '''Set the keys from extra_info in info dict if they are missing'''
1519 for key, value in extra_info.items():
1520 info_dict.setdefault(key, value)
1521
1522 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1523 process=True, force_generic_extractor=False):
1524 """
1525 Extract and return the information dictionary of the URL
1526
1527 Arguments:
1528 @param url URL to extract
1529
1530 Keyword arguments:
1531 @param download Whether to download videos
1532 @param process Whether to resolve all unresolved references (URLs, playlist items).
1533 Must be True for download to work
1534 @param ie_key Use only the extractor with this key
1535
1536 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1537 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1538 """
1539
1540 if extra_info is None:
1541 extra_info = {}
1542
1543 if not ie_key and force_generic_extractor:
1544 ie_key = 'Generic'
1545
1546 if ie_key:
1547 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1548 else:
1549 ies = self._ies
1550
1551 for key, ie in ies.items():
1552 if not ie.suitable(url):
1553 continue
1554
1555 if not ie.working():
1556 self.report_warning('The program functionality for this site has been marked as broken, '
1557 'and will probably not work.')
1558
1559 temp_id = ie.get_temp_id(url)
1560 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1561 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1562 'has already been recorded in the archive')
1563 if self.params.get('break_on_existing', False):
1564 raise ExistingVideoReached()
1565 break
1566 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1567 else:
1568 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1569 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1570 tb=False if extractors_restricted else None)
1571
1572 def _handle_extraction_exceptions(func):
1573 @functools.wraps(func)
1574 def wrapper(self, *args, **kwargs):
1575 while True:
1576 try:
1577 return func(self, *args, **kwargs)
1578 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1579 raise
1580 except ReExtractInfo as e:
1581 if e.expected:
1582 self.to_screen(f'{e}; Re-extracting data')
1583 else:
1584 self.to_stderr('\r')
1585 self.report_warning(f'{e}; Re-extracting data')
1586 continue
1587 except GeoRestrictedError as e:
1588 msg = e.msg
1589 if e.countries:
1590 msg += '\nThis video is available in %s.' % ', '.join(
1591 map(ISO3166Utils.short2full, e.countries))
1592 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1593 self.report_error(msg)
1594 except ExtractorError as e: # An error we somewhat expected
1595 self.report_error(str(e), e.format_traceback())
1596 except Exception as e:
1597 if self.params.get('ignoreerrors'):
1598 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1599 else:
1600 raise
1601 break
1602 return wrapper
1603
1604 def _wait_for_video(self, ie_result={}):
1605 if (not self.params.get('wait_for_video')
1606 or ie_result.get('_type', 'video') != 'video'
1607 or ie_result.get('formats') or ie_result.get('url')):
1608 return
1609
1610 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1611 last_msg = ''
1612
1613 def progress(msg):
1614 nonlocal last_msg
1615 full_msg = f'{msg}\n'
1616 if not self.params.get('noprogress'):
1617 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1618 elif last_msg:
1619 return
1620 self.to_screen(full_msg, skip_eol=True)
1621 last_msg = msg
1622
1623 min_wait, max_wait = self.params.get('wait_for_video')
1624 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1625 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1626 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1627 self.report_warning('Release time of video is not known')
1628 elif ie_result and (diff or 0) <= 0:
1629 self.report_warning('Video should already be available according to extracted info')
1630 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1631 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1632
1633 wait_till = time.time() + diff
1634 try:
1635 while True:
1636 diff = wait_till - time.time()
1637 if diff <= 0:
1638 progress('')
1639 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1640 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1641 time.sleep(1)
1642 except KeyboardInterrupt:
1643 progress('')
1644 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1645 except BaseException as e:
1646 if not isinstance(e, ReExtractInfo):
1647 self.to_screen('')
1648 raise
1649
1650 def _load_cookies(self, data, *, autoscope=True):
1651 """Loads cookies from a `Cookie` header
1652
1653 This tries to work around the security vulnerability of passing cookies to every domain.
1654 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1655
1656 @param data The Cookie header as string to load the cookies from
1657 @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains
1658 If `True`, save cookies for later to be stored in the jar with a limited scope
1659 If a URL, save cookies in the jar with the domain of the URL
1660 """
1661 for cookie in LenientSimpleCookie(data).values():
1662 if autoscope and any(cookie.values()):
1663 raise ValueError('Invalid syntax in Cookie Header')
1664
1665 domain = cookie.get('domain') or ''
1666 expiry = cookie.get('expires')
1667 if expiry == '': # 0 is valid
1668 expiry = None
1669 prepared_cookie = http.cookiejar.Cookie(
1670 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1671 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1672 cookie.get('secure') or False, expiry, False, None, None, {})
1673
1674 if domain:
1675 self.cookiejar.set_cookie(prepared_cookie)
1676 elif autoscope is True:
1677 self.deprecated_feature(
1678 'Passing cookies as a header is a potential security risk; '
1679 'they will be scoped to the domain of the downloaded urls. '
1680 'Please consider loading cookies from a file or browser instead.')
1681 self.__header_cookies.append(prepared_cookie)
1682 elif autoscope:
1683 self.report_warning(
1684 'The extractor result contains an unscoped cookie as an HTTP header. '
1685 f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}',
1686 only_once=True)
1687 self._apply_header_cookies(autoscope, [prepared_cookie])
1688 else:
1689 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1690 tb=False, is_error=False)
1691
1692 def _apply_header_cookies(self, url, cookies=None):
1693 """Applies stray header cookies to the provided url
1694
1695 This loads header cookies and scopes them to the domain provided in `url`.
1696 While this is not ideal, it helps reduce the risk of them being sent
1697 to an unintended destination while mostly maintaining compatibility.
1698 """
1699 parsed = urllib.parse.urlparse(url)
1700 if not parsed.hostname:
1701 return
1702
1703 for cookie in map(copy.copy, cookies or self.__header_cookies):
1704 cookie.domain = f'.{parsed.hostname}'
1705 self.cookiejar.set_cookie(cookie)
1706
1707 @_handle_extraction_exceptions
1708 def __extract_info(self, url, ie, download, extra_info, process):
1709 self._apply_header_cookies(url)
1710
1711 try:
1712 ie_result = ie.extract(url)
1713 except UserNotLive as e:
1714 if process:
1715 if self.params.get('wait_for_video'):
1716 self.report_warning(e)
1717 self._wait_for_video()
1718 raise
1719 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1720 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1721 return
1722 if isinstance(ie_result, list):
1723 # Backwards compatibility: old IE result format
1724 ie_result = {
1725 '_type': 'compat_list',
1726 'entries': ie_result,
1727 }
1728 if extra_info.get('original_url'):
1729 ie_result.setdefault('original_url', extra_info['original_url'])
1730 self.add_default_extra_info(ie_result, ie, url)
1731 if process:
1732 self._wait_for_video(ie_result)
1733 return self.process_ie_result(ie_result, download, extra_info)
1734 else:
1735 return ie_result
1736
1737 def add_default_extra_info(self, ie_result, ie, url):
1738 if url is not None:
1739 self.add_extra_info(ie_result, {
1740 'webpage_url': url,
1741 'original_url': url,
1742 })
1743 webpage_url = ie_result.get('webpage_url')
1744 if webpage_url:
1745 self.add_extra_info(ie_result, {
1746 'webpage_url_basename': url_basename(webpage_url),
1747 'webpage_url_domain': get_domain(webpage_url),
1748 })
1749 if ie is not None:
1750 self.add_extra_info(ie_result, {
1751 'extractor': ie.IE_NAME,
1752 'extractor_key': ie.ie_key(),
1753 })
1754
1755 def process_ie_result(self, ie_result, download=True, extra_info=None):
1756 """
1757 Take the result of the ie(may be modified) and resolve all unresolved
1758 references (URLs, playlist items).
1759
1760 It will also download the videos if 'download'.
1761 Returns the resolved ie_result.
1762 """
1763 if extra_info is None:
1764 extra_info = {}
1765 result_type = ie_result.get('_type', 'video')
1766
1767 if result_type in ('url', 'url_transparent'):
1768 ie_result['url'] = sanitize_url(
1769 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1770 if ie_result.get('original_url') and not extra_info.get('original_url'):
1771 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1772
1773 extract_flat = self.params.get('extract_flat', False)
1774 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1775 or extract_flat is True):
1776 info_copy = ie_result.copy()
1777 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1778 if ie and not ie_result.get('id'):
1779 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1780 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1781 self.add_extra_info(info_copy, extra_info)
1782 info_copy, _ = self.pre_process(info_copy)
1783 self._fill_common_fields(info_copy, False)
1784 self.__forced_printings(info_copy)
1785 self._raise_pending_errors(info_copy)
1786 if self.params.get('force_write_download_archive', False):
1787 self.record_download_archive(info_copy)
1788 return ie_result
1789
1790 if result_type == 'video':
1791 self.add_extra_info(ie_result, extra_info)
1792 ie_result = self.process_video_result(ie_result, download=download)
1793 self._raise_pending_errors(ie_result)
1794 additional_urls = (ie_result or {}).get('additional_urls')
1795 if additional_urls:
1796 # TODO: Improve MetadataParserPP to allow setting a list
1797 if isinstance(additional_urls, str):
1798 additional_urls = [additional_urls]
1799 self.to_screen(
1800 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1801 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1802 ie_result['additional_entries'] = [
1803 self.extract_info(
1804 url, download, extra_info=extra_info,
1805 force_generic_extractor=self.params.get('force_generic_extractor'))
1806 for url in additional_urls
1807 ]
1808 return ie_result
1809 elif result_type == 'url':
1810 # We have to add extra_info to the results because it may be
1811 # contained in a playlist
1812 return self.extract_info(
1813 ie_result['url'], download,
1814 ie_key=ie_result.get('ie_key'),
1815 extra_info=extra_info)
1816 elif result_type == 'url_transparent':
1817 # Use the information from the embedding page
1818 info = self.extract_info(
1819 ie_result['url'], ie_key=ie_result.get('ie_key'),
1820 extra_info=extra_info, download=False, process=False)
1821
1822 # extract_info may return None when ignoreerrors is enabled and
1823 # extraction failed with an error, don't crash and return early
1824 # in this case
1825 if not info:
1826 return info
1827
1828 exempted_fields = {'_type', 'url', 'ie_key'}
1829 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1830 # For video clips, the id etc of the clip extractor should be used
1831 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1832
1833 new_result = info.copy()
1834 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1835
1836 # Extracted info may not be a video result (i.e.
1837 # info.get('_type', 'video') != video) but rather an url or
1838 # url_transparent. In such cases outer metadata (from ie_result)
1839 # should be propagated to inner one (info). For this to happen
1840 # _type of info should be overridden with url_transparent. This
1841 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1842 if new_result.get('_type') == 'url':
1843 new_result['_type'] = 'url_transparent'
1844
1845 return self.process_ie_result(
1846 new_result, download=download, extra_info=extra_info)
1847 elif result_type in ('playlist', 'multi_video'):
1848 # Protect from infinite recursion due to recursively nested playlists
1849 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1850 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1851 if webpage_url and webpage_url in self._playlist_urls:
1852 self.to_screen(
1853 '[download] Skipping already downloaded playlist: %s'
1854 % ie_result.get('title') or ie_result.get('id'))
1855 return
1856
1857 self._playlist_level += 1
1858 self._playlist_urls.add(webpage_url)
1859 self._fill_common_fields(ie_result, False)
1860 self._sanitize_thumbnails(ie_result)
1861 try:
1862 return self.__process_playlist(ie_result, download)
1863 finally:
1864 self._playlist_level -= 1
1865 if not self._playlist_level:
1866 self._playlist_urls.clear()
1867 elif result_type == 'compat_list':
1868 self.report_warning(
1869 'Extractor %s returned a compat_list result. '
1870 'It needs to be updated.' % ie_result.get('extractor'))
1871
1872 def _fixup(r):
1873 self.add_extra_info(r, {
1874 'extractor': ie_result['extractor'],
1875 'webpage_url': ie_result['webpage_url'],
1876 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1877 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1878 'extractor_key': ie_result['extractor_key'],
1879 })
1880 return r
1881 ie_result['entries'] = [
1882 self.process_ie_result(_fixup(r), download, extra_info)
1883 for r in ie_result['entries']
1884 ]
1885 return ie_result
1886 else:
1887 raise Exception('Invalid result type: %s' % result_type)
1888
1889 def _ensure_dir_exists(self, path):
1890 return make_dir(path, self.report_error)
1891
1892 @staticmethod
1893 def _playlist_infodict(ie_result, strict=False, **kwargs):
1894 info = {
1895 'playlist_count': ie_result.get('playlist_count'),
1896 'playlist': ie_result.get('title') or ie_result.get('id'),
1897 'playlist_id': ie_result.get('id'),
1898 'playlist_title': ie_result.get('title'),
1899 'playlist_uploader': ie_result.get('uploader'),
1900 'playlist_uploader_id': ie_result.get('uploader_id'),
1901 **kwargs,
1902 }
1903 if strict:
1904 return info
1905 if ie_result.get('webpage_url'):
1906 info.update({
1907 'webpage_url': ie_result['webpage_url'],
1908 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1909 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1910 })
1911 return {
1912 **info,
1913 'playlist_index': 0,
1914 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1915 'extractor': ie_result['extractor'],
1916 'extractor_key': ie_result['extractor_key'],
1917 }
1918
1919 def __process_playlist(self, ie_result, download):
1920 """Process each entry in the playlist"""
1921 assert ie_result['_type'] in ('playlist', 'multi_video')
1922
1923 common_info = self._playlist_infodict(ie_result, strict=True)
1924 title = common_info.get('playlist') or '<Untitled>'
1925 if self._match_entry(common_info, incomplete=True) is not None:
1926 return
1927 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1928
1929 all_entries = PlaylistEntries(self, ie_result)
1930 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1931
1932 lazy = self.params.get('lazy_playlist')
1933 if lazy:
1934 resolved_entries, n_entries = [], 'N/A'
1935 ie_result['requested_entries'], ie_result['entries'] = None, None
1936 else:
1937 entries = resolved_entries = list(entries)
1938 n_entries = len(resolved_entries)
1939 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1940 if not ie_result.get('playlist_count'):
1941 # Better to do this after potentially exhausting entries
1942 ie_result['playlist_count'] = all_entries.get_full_count()
1943
1944 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1945 ie_copy = collections.ChainMap(ie_result, extra)
1946
1947 _infojson_written = False
1948 write_playlist_files = self.params.get('allow_playlist_files', True)
1949 if write_playlist_files and self.params.get('list_thumbnails'):
1950 self.list_thumbnails(ie_result)
1951 if write_playlist_files and not self.params.get('simulate'):
1952 _infojson_written = self._write_info_json(
1953 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1954 if _infojson_written is None:
1955 return
1956 if self._write_description('playlist', ie_result,
1957 self.prepare_filename(ie_copy, 'pl_description')) is None:
1958 return
1959 # TODO: This should be passed to ThumbnailsConvertor if necessary
1960 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1961
1962 if lazy:
1963 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1964 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1965 elif self.params.get('playlistreverse'):
1966 entries.reverse()
1967 elif self.params.get('playlistrandom'):
1968 random.shuffle(entries)
1969
1970 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1971 f'{format_field(ie_result, "playlist_count", " of %s")}')
1972
1973 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1974 if self.params.get('extract_flat') == 'discard_in_playlist':
1975 keep_resolved_entries = ie_result['_type'] != 'playlist'
1976 if keep_resolved_entries:
1977 self.write_debug('The information of all playlist entries will be held in memory')
1978
1979 failures = 0
1980 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1981 for i, (playlist_index, entry) in enumerate(entries):
1982 if lazy:
1983 resolved_entries.append((playlist_index, entry))
1984 if not entry:
1985 continue
1986
1987 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1988 if not lazy and 'playlist-index' in self.params['compat_opts']:
1989 playlist_index = ie_result['requested_entries'][i]
1990
1991 entry_copy = collections.ChainMap(entry, {
1992 **common_info,
1993 'n_entries': int_or_none(n_entries),
1994 'playlist_index': playlist_index,
1995 'playlist_autonumber': i + 1,
1996 })
1997
1998 if self._match_entry(entry_copy, incomplete=True) is not None:
1999 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
2000 resolved_entries[i] = (playlist_index, NO_DEFAULT)
2001 continue
2002
2003 self.to_screen('[download] Downloading item %s of %s' % (
2004 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
2005
2006 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
2007 'playlist_index': playlist_index,
2008 'playlist_autonumber': i + 1,
2009 }, extra))
2010 if not entry_result:
2011 failures += 1
2012 if failures >= max_failures:
2013 self.report_error(
2014 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2015 break
2016 if keep_resolved_entries:
2017 resolved_entries[i] = (playlist_index, entry_result)
2018
2019 # Update with processed data
2020 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2021 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2022 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2023 # Do not set for full playlist
2024 ie_result.pop('requested_entries')
2025
2026 # Write the updated info to json
2027 if _infojson_written is True and self._write_info_json(
2028 'updated playlist', ie_result,
2029 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2030 return
2031
2032 ie_result = self.run_all_pps('playlist', ie_result)
2033 self.to_screen(f'[download] Finished downloading playlist: {title}')
2034 return ie_result
2035
2036 @_handle_extraction_exceptions
2037 def __process_iterable_entry(self, entry, download, extra_info):
2038 return self.process_ie_result(
2039 entry, download=download, extra_info=extra_info)
2040
2041 def _build_format_filter(self, filter_spec):
2042 " Returns a function to filter the formats according to the filter_spec "
2043
2044 OPERATORS = {
2045 '<': operator.lt,
2046 '<=': operator.le,
2047 '>': operator.gt,
2048 '>=': operator.ge,
2049 '=': operator.eq,
2050 '!=': operator.ne,
2051 }
2052 operator_rex = re.compile(r'''(?x)\s*
2053 (?P<key>[\w.-]+)\s*
2054 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2055 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2056 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2057 m = operator_rex.fullmatch(filter_spec)
2058 if m:
2059 try:
2060 comparison_value = int(m.group('value'))
2061 except ValueError:
2062 comparison_value = parse_filesize(m.group('value'))
2063 if comparison_value is None:
2064 comparison_value = parse_filesize(m.group('value') + 'B')
2065 if comparison_value is None:
2066 raise ValueError(
2067 'Invalid value %r in format specification %r' % (
2068 m.group('value'), filter_spec))
2069 op = OPERATORS[m.group('op')]
2070
2071 if not m:
2072 STR_OPERATORS = {
2073 '=': operator.eq,
2074 '^=': lambda attr, value: attr.startswith(value),
2075 '$=': lambda attr, value: attr.endswith(value),
2076 '*=': lambda attr, value: value in attr,
2077 '~=': lambda attr, value: value.search(attr) is not None
2078 }
2079 str_operator_rex = re.compile(r'''(?x)\s*
2080 (?P<key>[a-zA-Z0-9._-]+)\s*
2081 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2082 (?P<quote>["'])?
2083 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2084 (?(quote)(?P=quote))\s*
2085 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2086 m = str_operator_rex.fullmatch(filter_spec)
2087 if m:
2088 if m.group('op') == '~=':
2089 comparison_value = re.compile(m.group('value'))
2090 else:
2091 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2092 str_op = STR_OPERATORS[m.group('op')]
2093 if m.group('negation'):
2094 op = lambda attr, value: not str_op(attr, value)
2095 else:
2096 op = str_op
2097
2098 if not m:
2099 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2100
2101 def _filter(f):
2102 actual_value = f.get(m.group('key'))
2103 if actual_value is None:
2104 return m.group('none_inclusive')
2105 return op(actual_value, comparison_value)
2106 return _filter
2107
2108 def _check_formats(self, formats):
2109 for f in formats:
2110 self.to_screen('[info] Testing format %s' % f['format_id'])
2111 path = self.get_output_path('temp')
2112 if not self._ensure_dir_exists(f'{path}/'):
2113 continue
2114 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2115 temp_file.close()
2116 try:
2117 success, _ = self.dl(temp_file.name, f, test=True)
2118 except (DownloadError, OSError, ValueError) + network_exceptions:
2119 success = False
2120 finally:
2121 if os.path.exists(temp_file.name):
2122 try:
2123 os.remove(temp_file.name)
2124 except OSError:
2125 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2126 if success:
2127 yield f
2128 else:
2129 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2130
2131 def _default_format_spec(self, info_dict, download=True):
2132
2133 def can_merge():
2134 merger = FFmpegMergerPP(self)
2135 return merger.available and merger.can_merge()
2136
2137 prefer_best = (
2138 not self.params.get('simulate')
2139 and download
2140 and (
2141 not can_merge()
2142 or info_dict.get('is_live') and not self.params.get('live_from_start')
2143 or self.params['outtmpl']['default'] == '-'))
2144 compat = (
2145 prefer_best
2146 or self.params.get('allow_multiple_audio_streams', False)
2147 or 'format-spec' in self.params['compat_opts'])
2148
2149 return (
2150 'best/bestvideo+bestaudio' if prefer_best
2151 else 'bestvideo*+bestaudio/best' if not compat
2152 else 'bestvideo+bestaudio/best')
2153
2154 def build_format_selector(self, format_spec):
2155 def syntax_error(note, start):
2156 message = (
2157 'Invalid format specification: '
2158 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2159 return SyntaxError(message)
2160
2161 PICKFIRST = 'PICKFIRST'
2162 MERGE = 'MERGE'
2163 SINGLE = 'SINGLE'
2164 GROUP = 'GROUP'
2165 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2166
2167 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2168 'video': self.params.get('allow_multiple_video_streams', False)}
2169
2170 def _parse_filter(tokens):
2171 filter_parts = []
2172 for type, string_, start, _, _ in tokens:
2173 if type == tokenize.OP and string_ == ']':
2174 return ''.join(filter_parts)
2175 else:
2176 filter_parts.append(string_)
2177
2178 def _remove_unused_ops(tokens):
2179 # Remove operators that we don't use and join them with the surrounding strings.
2180 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2181 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2182 last_string, last_start, last_end, last_line = None, None, None, None
2183 for type, string_, start, end, line in tokens:
2184 if type == tokenize.OP and string_ == '[':
2185 if last_string:
2186 yield tokenize.NAME, last_string, last_start, last_end, last_line
2187 last_string = None
2188 yield type, string_, start, end, line
2189 # everything inside brackets will be handled by _parse_filter
2190 for type, string_, start, end, line in tokens:
2191 yield type, string_, start, end, line
2192 if type == tokenize.OP and string_ == ']':
2193 break
2194 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2195 if last_string:
2196 yield tokenize.NAME, last_string, last_start, last_end, last_line
2197 last_string = None
2198 yield type, string_, start, end, line
2199 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2200 if not last_string:
2201 last_string = string_
2202 last_start = start
2203 last_end = end
2204 else:
2205 last_string += string_
2206 if last_string:
2207 yield tokenize.NAME, last_string, last_start, last_end, last_line
2208
2209 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2210 selectors = []
2211 current_selector = None
2212 for type, string_, start, _, _ in tokens:
2213 # ENCODING is only defined in python 3.x
2214 if type == getattr(tokenize, 'ENCODING', None):
2215 continue
2216 elif type in [tokenize.NAME, tokenize.NUMBER]:
2217 current_selector = FormatSelector(SINGLE, string_, [])
2218 elif type == tokenize.OP:
2219 if string_ == ')':
2220 if not inside_group:
2221 # ')' will be handled by the parentheses group
2222 tokens.restore_last_token()
2223 break
2224 elif inside_merge and string_ in ['/', ',']:
2225 tokens.restore_last_token()
2226 break
2227 elif inside_choice and string_ == ',':
2228 tokens.restore_last_token()
2229 break
2230 elif string_ == ',':
2231 if not current_selector:
2232 raise syntax_error('"," must follow a format selector', start)
2233 selectors.append(current_selector)
2234 current_selector = None
2235 elif string_ == '/':
2236 if not current_selector:
2237 raise syntax_error('"/" must follow a format selector', start)
2238 first_choice = current_selector
2239 second_choice = _parse_format_selection(tokens, inside_choice=True)
2240 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2241 elif string_ == '[':
2242 if not current_selector:
2243 current_selector = FormatSelector(SINGLE, 'best', [])
2244 format_filter = _parse_filter(tokens)
2245 current_selector.filters.append(format_filter)
2246 elif string_ == '(':
2247 if current_selector:
2248 raise syntax_error('Unexpected "("', start)
2249 group = _parse_format_selection(tokens, inside_group=True)
2250 current_selector = FormatSelector(GROUP, group, [])
2251 elif string_ == '+':
2252 if not current_selector:
2253 raise syntax_error('Unexpected "+"', start)
2254 selector_1 = current_selector
2255 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2256 if not selector_2:
2257 raise syntax_error('Expected a selector', start)
2258 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2259 else:
2260 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2261 elif type == tokenize.ENDMARKER:
2262 break
2263 if current_selector:
2264 selectors.append(current_selector)
2265 return selectors
2266
2267 def _merge(formats_pair):
2268 format_1, format_2 = formats_pair
2269
2270 formats_info = []
2271 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2272 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2273
2274 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2275 get_no_more = {'video': False, 'audio': False}
2276 for (i, fmt_info) in enumerate(formats_info):
2277 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2278 formats_info.pop(i)
2279 continue
2280 for aud_vid in ['audio', 'video']:
2281 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2282 if get_no_more[aud_vid]:
2283 formats_info.pop(i)
2284 break
2285 get_no_more[aud_vid] = True
2286
2287 if len(formats_info) == 1:
2288 return formats_info[0]
2289
2290 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2291 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2292
2293 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2294 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2295
2296 output_ext = get_compatible_ext(
2297 vcodecs=[f.get('vcodec') for f in video_fmts],
2298 acodecs=[f.get('acodec') for f in audio_fmts],
2299 vexts=[f['ext'] for f in video_fmts],
2300 aexts=[f['ext'] for f in audio_fmts],
2301 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2302 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2303
2304 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2305
2306 new_dict = {
2307 'requested_formats': formats_info,
2308 'format': '+'.join(filtered('format')),
2309 'format_id': '+'.join(filtered('format_id')),
2310 'ext': output_ext,
2311 'protocol': '+'.join(map(determine_protocol, formats_info)),
2312 'language': '+'.join(orderedSet(filtered('language'))) or None,
2313 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2314 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2315 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2316 }
2317
2318 if the_only_video:
2319 new_dict.update({
2320 'width': the_only_video.get('width'),
2321 'height': the_only_video.get('height'),
2322 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2323 'fps': the_only_video.get('fps'),
2324 'dynamic_range': the_only_video.get('dynamic_range'),
2325 'vcodec': the_only_video.get('vcodec'),
2326 'vbr': the_only_video.get('vbr'),
2327 'stretched_ratio': the_only_video.get('stretched_ratio'),
2328 'aspect_ratio': the_only_video.get('aspect_ratio'),
2329 })
2330
2331 if the_only_audio:
2332 new_dict.update({
2333 'acodec': the_only_audio.get('acodec'),
2334 'abr': the_only_audio.get('abr'),
2335 'asr': the_only_audio.get('asr'),
2336 'audio_channels': the_only_audio.get('audio_channels')
2337 })
2338
2339 return new_dict
2340
2341 def _check_formats(formats):
2342 if (self.params.get('check_formats') is not None
2343 or self.params.get('allow_unplayable_formats')):
2344 yield from formats
2345 return
2346 elif self.params.get('check_formats') == 'selected':
2347 yield from self._check_formats(formats)
2348 return
2349
2350 for f in formats:
2351 if f.get('has_drm'):
2352 yield from self._check_formats([f])
2353 else:
2354 yield f
2355
2356 def _build_selector_function(selector):
2357 if isinstance(selector, list): # ,
2358 fs = [_build_selector_function(s) for s in selector]
2359
2360 def selector_function(ctx):
2361 for f in fs:
2362 yield from f(ctx)
2363 return selector_function
2364
2365 elif selector.type == GROUP: # ()
2366 selector_function = _build_selector_function(selector.selector)
2367
2368 elif selector.type == PICKFIRST: # /
2369 fs = [_build_selector_function(s) for s in selector.selector]
2370
2371 def selector_function(ctx):
2372 for f in fs:
2373 picked_formats = list(f(ctx))
2374 if picked_formats:
2375 return picked_formats
2376 return []
2377
2378 elif selector.type == MERGE: # +
2379 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2380
2381 def selector_function(ctx):
2382 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2383 yield _merge(pair)
2384
2385 elif selector.type == SINGLE: # atom
2386 format_spec = selector.selector or 'best'
2387
2388 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2389 if format_spec == 'all':
2390 def selector_function(ctx):
2391 yield from _check_formats(ctx['formats'][::-1])
2392 elif format_spec == 'mergeall':
2393 def selector_function(ctx):
2394 formats = list(_check_formats(
2395 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2396 if not formats:
2397 return
2398 merged_format = formats[-1]
2399 for f in formats[-2::-1]:
2400 merged_format = _merge((merged_format, f))
2401 yield merged_format
2402
2403 else:
2404 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2405 mobj = re.match(
2406 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2407 format_spec)
2408 if mobj is not None:
2409 format_idx = int_or_none(mobj.group('n'), default=1)
2410 format_reverse = mobj.group('bw')[0] == 'b'
2411 format_type = (mobj.group('type') or [None])[0]
2412 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2413 format_modified = mobj.group('mod') is not None
2414
2415 format_fallback = not format_type and not format_modified # for b, w
2416 _filter_f = (
2417 (lambda f: f.get('%scodec' % format_type) != 'none')
2418 if format_type and format_modified # bv*, ba*, wv*, wa*
2419 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2420 if format_type # bv, ba, wv, wa
2421 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2422 if not format_modified # b, w
2423 else lambda f: True) # b*, w*
2424 filter_f = lambda f: _filter_f(f) and (
2425 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2426 else:
2427 if format_spec in self._format_selection_exts['audio']:
2428 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2429 elif format_spec in self._format_selection_exts['video']:
2430 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2431 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2432 elif format_spec in self._format_selection_exts['storyboards']:
2433 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2434 else:
2435 filter_f = lambda f: f.get('format_id') == format_spec # id
2436
2437 def selector_function(ctx):
2438 formats = list(ctx['formats'])
2439 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2440 if not matches:
2441 if format_fallback and ctx['incomplete_formats']:
2442 # for extractors with incomplete formats (audio only (soundcloud)
2443 # or video only (imgur)) best/worst will fallback to
2444 # best/worst {video,audio}-only format
2445 matches = formats
2446 elif seperate_fallback and not ctx['has_merged_format']:
2447 # for compatibility with youtube-dl when there is no pre-merged format
2448 matches = list(filter(seperate_fallback, formats))
2449 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2450 try:
2451 yield matches[format_idx - 1]
2452 except LazyList.IndexError:
2453 return
2454
2455 filters = [self._build_format_filter(f) for f in selector.filters]
2456
2457 def final_selector(ctx):
2458 ctx_copy = dict(ctx)
2459 for _filter in filters:
2460 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2461 return selector_function(ctx_copy)
2462 return final_selector
2463
2464 stream = io.BytesIO(format_spec.encode())
2465 try:
2466 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2467 except tokenize.TokenError:
2468 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2469
2470 class TokenIterator:
2471 def __init__(self, tokens):
2472 self.tokens = tokens
2473 self.counter = 0
2474
2475 def __iter__(self):
2476 return self
2477
2478 def __next__(self):
2479 if self.counter >= len(self.tokens):
2480 raise StopIteration()
2481 value = self.tokens[self.counter]
2482 self.counter += 1
2483 return value
2484
2485 next = __next__
2486
2487 def restore_last_token(self):
2488 self.counter -= 1
2489
2490 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2491 return _build_selector_function(parsed_selector)
2492
2493 def _calc_headers(self, info_dict, load_cookies=False):
2494 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2495 clean_headers(res)
2496
2497 if load_cookies: # For --load-info-json
2498 self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat
2499 self._load_cookies(info_dict.get('cookies'), autoscope=False)
2500 # The `Cookie` header is removed to prevent leaks and unscoped cookies.
2501 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
2502 res.pop('Cookie', None)
2503 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2504 if cookies:
2505 encoder = LenientSimpleCookie()
2506 values = []
2507 for cookie in cookies:
2508 _, value = encoder.value_encode(cookie.value)
2509 values.append(f'{cookie.name}={value}')
2510 if cookie.domain:
2511 values.append(f'Domain={cookie.domain}')
2512 if cookie.path:
2513 values.append(f'Path={cookie.path}')
2514 if cookie.secure:
2515 values.append('Secure')
2516 if cookie.expires:
2517 values.append(f'Expires={cookie.expires}')
2518 if cookie.version:
2519 values.append(f'Version={cookie.version}')
2520 info_dict['cookies'] = '; '.join(values)
2521
2522 if 'X-Forwarded-For' not in res:
2523 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2524 if x_forwarded_for_ip:
2525 res['X-Forwarded-For'] = x_forwarded_for_ip
2526
2527 return res
2528
2529 def _calc_cookies(self, url):
2530 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2531 return self.cookiejar.get_cookie_header(url)
2532
2533 def _sort_thumbnails(self, thumbnails):
2534 thumbnails.sort(key=lambda t: (
2535 t.get('preference') if t.get('preference') is not None else -1,
2536 t.get('width') if t.get('width') is not None else -1,
2537 t.get('height') if t.get('height') is not None else -1,
2538 t.get('id') if t.get('id') is not None else '',
2539 t.get('url')))
2540
2541 def _sanitize_thumbnails(self, info_dict):
2542 thumbnails = info_dict.get('thumbnails')
2543 if thumbnails is None:
2544 thumbnail = info_dict.get('thumbnail')
2545 if thumbnail:
2546 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2547 if not thumbnails:
2548 return
2549
2550 def check_thumbnails(thumbnails):
2551 for t in thumbnails:
2552 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2553 try:
2554 self.urlopen(HEADRequest(t['url']))
2555 except network_exceptions as err:
2556 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2557 continue
2558 yield t
2559
2560 self._sort_thumbnails(thumbnails)
2561 for i, t in enumerate(thumbnails):
2562 if t.get('id') is None:
2563 t['id'] = '%d' % i
2564 if t.get('width') and t.get('height'):
2565 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2566 t['url'] = sanitize_url(t['url'])
2567
2568 if self.params.get('check_formats') is True:
2569 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2570 else:
2571 info_dict['thumbnails'] = thumbnails
2572
2573 def _fill_common_fields(self, info_dict, final=True):
2574 # TODO: move sanitization here
2575 if final:
2576 title = info_dict['fulltitle'] = info_dict.get('title')
2577 if not title:
2578 if title == '':
2579 self.write_debug('Extractor gave empty title. Creating a generic title')
2580 else:
2581 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2582 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2583
2584 if info_dict.get('duration') is not None:
2585 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2586
2587 for ts_key, date_key in (
2588 ('timestamp', 'upload_date'),
2589 ('release_timestamp', 'release_date'),
2590 ('modified_timestamp', 'modified_date'),
2591 ):
2592 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2593 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2594 # see http://bugs.python.org/issue1646728)
2595 with contextlib.suppress(ValueError, OverflowError, OSError):
2596 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2597 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2598
2599 live_keys = ('is_live', 'was_live')
2600 live_status = info_dict.get('live_status')
2601 if live_status is None:
2602 for key in live_keys:
2603 if info_dict.get(key) is False:
2604 continue
2605 if info_dict.get(key):
2606 live_status = key
2607 break
2608 if all(info_dict.get(key) is False for key in live_keys):
2609 live_status = 'not_live'
2610 if live_status:
2611 info_dict['live_status'] = live_status
2612 for key in live_keys:
2613 if info_dict.get(key) is None:
2614 info_dict[key] = (live_status == key)
2615 if live_status == 'post_live':
2616 info_dict['was_live'] = True
2617
2618 # Auto generate title fields corresponding to the *_number fields when missing
2619 # in order to always have clean titles. This is very common for TV series.
2620 for field in ('chapter', 'season', 'episode'):
2621 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2622 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2623
2624 def _raise_pending_errors(self, info):
2625 err = info.pop('__pending_error', None)
2626 if err:
2627 self.report_error(err, tb=False)
2628
2629 def sort_formats(self, info_dict):
2630 formats = self._get_formats(info_dict)
2631 formats.sort(key=FormatSorter(
2632 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2633
2634 def process_video_result(self, info_dict, download=True):
2635 assert info_dict.get('_type', 'video') == 'video'
2636 self._num_videos += 1
2637
2638 if 'id' not in info_dict:
2639 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2640 elif not info_dict.get('id'):
2641 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2642
2643 def report_force_conversion(field, field_not, conversion):
2644 self.report_warning(
2645 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2646 % (field, field_not, conversion))
2647
2648 def sanitize_string_field(info, string_field):
2649 field = info.get(string_field)
2650 if field is None or isinstance(field, str):
2651 return
2652 report_force_conversion(string_field, 'a string', 'string')
2653 info[string_field] = str(field)
2654
2655 def sanitize_numeric_fields(info):
2656 for numeric_field in self._NUMERIC_FIELDS:
2657 field = info.get(numeric_field)
2658 if field is None or isinstance(field, (int, float)):
2659 continue
2660 report_force_conversion(numeric_field, 'numeric', 'int')
2661 info[numeric_field] = int_or_none(field)
2662
2663 sanitize_string_field(info_dict, 'id')
2664 sanitize_numeric_fields(info_dict)
2665 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2666 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2667 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2668 self.report_warning('"duration" field is negative, there is an error in extractor')
2669
2670 chapters = info_dict.get('chapters') or []
2671 if chapters and chapters[0].get('start_time'):
2672 chapters.insert(0, {'start_time': 0})
2673
2674 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2675 for idx, (prev, current, next_) in enumerate(zip(
2676 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2677 if current.get('start_time') is None:
2678 current['start_time'] = prev.get('end_time')
2679 if not current.get('end_time'):
2680 current['end_time'] = next_.get('start_time')
2681 if not current.get('title'):
2682 current['title'] = f'<Untitled Chapter {idx}>'
2683
2684 if 'playlist' not in info_dict:
2685 # It isn't part of a playlist
2686 info_dict['playlist'] = None
2687 info_dict['playlist_index'] = None
2688
2689 self._sanitize_thumbnails(info_dict)
2690
2691 thumbnail = info_dict.get('thumbnail')
2692 thumbnails = info_dict.get('thumbnails')
2693 if thumbnail:
2694 info_dict['thumbnail'] = sanitize_url(thumbnail)
2695 elif thumbnails:
2696 info_dict['thumbnail'] = thumbnails[-1]['url']
2697
2698 if info_dict.get('display_id') is None and 'id' in info_dict:
2699 info_dict['display_id'] = info_dict['id']
2700
2701 self._fill_common_fields(info_dict)
2702
2703 for cc_kind in ('subtitles', 'automatic_captions'):
2704 cc = info_dict.get(cc_kind)
2705 if cc:
2706 for _, subtitle in cc.items():
2707 for subtitle_format in subtitle:
2708 if subtitle_format.get('url'):
2709 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2710 if subtitle_format.get('ext') is None:
2711 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2712
2713 automatic_captions = info_dict.get('automatic_captions')
2714 subtitles = info_dict.get('subtitles')
2715
2716 info_dict['requested_subtitles'] = self.process_subtitles(
2717 info_dict['id'], subtitles, automatic_captions)
2718
2719 formats = self._get_formats(info_dict)
2720
2721 # Backward compatibility with InfoExtractor._sort_formats
2722 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2723 if field_preference:
2724 info_dict['_format_sort_fields'] = field_preference
2725
2726 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2727 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2728 if not self.params.get('allow_unplayable_formats'):
2729 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2730
2731 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2732 self.report_warning(
2733 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2734 'only images are available for download. Use --list-formats to see them'.capitalize())
2735
2736 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2737 if not get_from_start:
2738 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2739 if info_dict.get('is_live') and formats:
2740 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2741 if get_from_start and not formats:
2742 self.raise_no_formats(info_dict, msg=(
2743 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2744 'If you want to download from the current time, use --no-live-from-start'))
2745
2746 def is_wellformed(f):
2747 url = f.get('url')
2748 if not url:
2749 self.report_warning(
2750 '"url" field is missing or empty - skipping format, '
2751 'there is an error in extractor')
2752 return False
2753 if isinstance(url, bytes):
2754 sanitize_string_field(f, 'url')
2755 return True
2756
2757 # Filter out malformed formats for better extraction robustness
2758 formats = list(filter(is_wellformed, formats or []))
2759
2760 if not formats:
2761 self.raise_no_formats(info_dict)
2762
2763 for format in formats:
2764 sanitize_string_field(format, 'format_id')
2765 sanitize_numeric_fields(format)
2766 format['url'] = sanitize_url(format['url'])
2767 if format.get('ext') is None:
2768 format['ext'] = determine_ext(format['url']).lower()
2769 if format.get('protocol') is None:
2770 format['protocol'] = determine_protocol(format)
2771 if format.get('resolution') is None:
2772 format['resolution'] = self.format_resolution(format, default=None)
2773 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2774 format['dynamic_range'] = 'SDR'
2775 if format.get('aspect_ratio') is None:
2776 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2777 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2778 and info_dict.get('duration') and format.get('tbr')
2779 and not format.get('filesize') and not format.get('filesize_approx')):
2780 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2781 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict), load_cookies=True)
2782
2783 # Safeguard against old/insecure infojson when using --load-info-json
2784 if info_dict.get('http_headers'):
2785 info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers'])
2786 info_dict['http_headers'].pop('Cookie', None)
2787
2788 # This is copied to http_headers by the above _calc_headers and can now be removed
2789 if '__x_forwarded_for_ip' in info_dict:
2790 del info_dict['__x_forwarded_for_ip']
2791
2792 self.sort_formats({
2793 'formats': formats,
2794 '_format_sort_fields': info_dict.get('_format_sort_fields')
2795 })
2796
2797 # Sanitize and group by format_id
2798 formats_dict = {}
2799 for i, format in enumerate(formats):
2800 if not format.get('format_id'):
2801 format['format_id'] = str(i)
2802 else:
2803 # Sanitize format_id from characters used in format selector expression
2804 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2805 formats_dict.setdefault(format['format_id'], []).append(format)
2806
2807 # Make sure all formats have unique format_id
2808 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2809 for format_id, ambiguous_formats in formats_dict.items():
2810 ambigious_id = len(ambiguous_formats) > 1
2811 for i, format in enumerate(ambiguous_formats):
2812 if ambigious_id:
2813 format['format_id'] = '%s-%d' % (format_id, i)
2814 # Ensure there is no conflict between id and ext in format selection
2815 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2816 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2817 format['format_id'] = 'f%s' % format['format_id']
2818
2819 if format.get('format') is None:
2820 format['format'] = '{id} - {res}{note}'.format(
2821 id=format['format_id'],
2822 res=self.format_resolution(format),
2823 note=format_field(format, 'format_note', ' (%s)'),
2824 )
2825
2826 if self.params.get('check_formats') is True:
2827 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2828
2829 if not formats or formats[0] is not info_dict:
2830 # only set the 'formats' fields if the original info_dict list them
2831 # otherwise we end up with a circular reference, the first (and unique)
2832 # element in the 'formats' field in info_dict is info_dict itself,
2833 # which can't be exported to json
2834 info_dict['formats'] = formats
2835
2836 info_dict, _ = self.pre_process(info_dict)
2837
2838 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2839 return info_dict
2840
2841 self.post_extract(info_dict)
2842 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2843
2844 # The pre-processors may have modified the formats
2845 formats = self._get_formats(info_dict)
2846
2847 list_only = self.params.get('simulate') == 'list_only'
2848 interactive_format_selection = not list_only and self.format_selector == '-'
2849 if self.params.get('list_thumbnails'):
2850 self.list_thumbnails(info_dict)
2851 if self.params.get('listsubtitles'):
2852 if 'automatic_captions' in info_dict:
2853 self.list_subtitles(
2854 info_dict['id'], automatic_captions, 'automatic captions')
2855 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2856 if self.params.get('listformats') or interactive_format_selection:
2857 self.list_formats(info_dict)
2858 if list_only:
2859 # Without this printing, -F --print-json will not work
2860 self.__forced_printings(info_dict)
2861 return info_dict
2862
2863 format_selector = self.format_selector
2864 while True:
2865 if interactive_format_selection:
2866 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2867 + '(Press ENTER for default, or Ctrl+C to quit)'
2868 + self._format_screen(': ', self.Styles.EMPHASIS))
2869 try:
2870 format_selector = self.build_format_selector(req_format) if req_format else None
2871 except SyntaxError as err:
2872 self.report_error(err, tb=False, is_error=False)
2873 continue
2874
2875 if format_selector is None:
2876 req_format = self._default_format_spec(info_dict, download=download)
2877 self.write_debug(f'Default format spec: {req_format}')
2878 format_selector = self.build_format_selector(req_format)
2879
2880 formats_to_download = list(format_selector({
2881 'formats': formats,
2882 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2883 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2884 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2885 }))
2886 if interactive_format_selection and not formats_to_download:
2887 self.report_error('Requested format is not available', tb=False, is_error=False)
2888 continue
2889 break
2890
2891 if not formats_to_download:
2892 if not self.params.get('ignore_no_formats_error'):
2893 raise ExtractorError(
2894 'Requested format is not available. Use --list-formats for a list of available formats',
2895 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2896 self.report_warning('Requested format is not available')
2897 # Process what we can, even without any available formats.
2898 formats_to_download = [{}]
2899
2900 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2901 best_format, downloaded_formats = formats_to_download[-1], []
2902 if download:
2903 if best_format and requested_ranges:
2904 def to_screen(*msg):
2905 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2906
2907 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2908 (f['format_id'] for f in formats_to_download))
2909 if requested_ranges != ({}, ):
2910 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2911 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2912 max_downloads_reached = False
2913
2914 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2915 new_info = self._copy_infodict(info_dict)
2916 new_info.update(fmt)
2917 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2918 end_time = offset + min(chapter.get('end_time', duration), duration)
2919 # duration may not be accurate. So allow deviations <1sec
2920 if end_time == float('inf') or end_time > offset + duration + 1:
2921 end_time = None
2922 if chapter or offset:
2923 new_info.update({
2924 'section_start': offset + chapter.get('start_time', 0),
2925 'section_end': end_time,
2926 'section_title': chapter.get('title'),
2927 'section_number': chapter.get('index'),
2928 })
2929 downloaded_formats.append(new_info)
2930 try:
2931 self.process_info(new_info)
2932 except MaxDownloadsReached:
2933 max_downloads_reached = True
2934 self._raise_pending_errors(new_info)
2935 # Remove copied info
2936 for key, val in tuple(new_info.items()):
2937 if info_dict.get(key) == val:
2938 new_info.pop(key)
2939 if max_downloads_reached:
2940 break
2941
2942 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2943 assert write_archive.issubset({True, False, 'ignore'})
2944 if True in write_archive and False not in write_archive:
2945 self.record_download_archive(info_dict)
2946
2947 info_dict['requested_downloads'] = downloaded_formats
2948 info_dict = self.run_all_pps('after_video', info_dict)
2949 if max_downloads_reached:
2950 raise MaxDownloadsReached()
2951
2952 # We update the info dict with the selected best quality format (backwards compatibility)
2953 info_dict.update(best_format)
2954 return info_dict
2955
2956 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2957 """Select the requested subtitles and their format"""
2958 available_subs, normal_sub_langs = {}, []
2959 if normal_subtitles and self.params.get('writesubtitles'):
2960 available_subs.update(normal_subtitles)
2961 normal_sub_langs = tuple(normal_subtitles.keys())
2962 if automatic_captions and self.params.get('writeautomaticsub'):
2963 for lang, cap_info in automatic_captions.items():
2964 if lang not in available_subs:
2965 available_subs[lang] = cap_info
2966
2967 if not available_subs or (
2968 not self.params.get('writesubtitles')
2969 and not self.params.get('writeautomaticsub')):
2970 return None
2971
2972 all_sub_langs = tuple(available_subs.keys())
2973 if self.params.get('allsubtitles', False):
2974 requested_langs = all_sub_langs
2975 elif self.params.get('subtitleslangs', False):
2976 try:
2977 requested_langs = orderedSet_from_options(
2978 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2979 except re.error as e:
2980 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2981 else:
2982 requested_langs = LazyList(itertools.chain(
2983 ['en'] if 'en' in normal_sub_langs else [],
2984 filter(lambda f: f.startswith('en'), normal_sub_langs),
2985 ['en'] if 'en' in all_sub_langs else [],
2986 filter(lambda f: f.startswith('en'), all_sub_langs),
2987 normal_sub_langs, all_sub_langs,
2988 ))[:1]
2989 if requested_langs:
2990 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2991
2992 formats_query = self.params.get('subtitlesformat', 'best')
2993 formats_preference = formats_query.split('/') if formats_query else []
2994 subs = {}
2995 for lang in requested_langs:
2996 formats = available_subs.get(lang)
2997 if formats is None:
2998 self.report_warning(f'{lang} subtitles not available for {video_id}')
2999 continue
3000 for ext in formats_preference:
3001 if ext == 'best':
3002 f = formats[-1]
3003 break
3004 matches = list(filter(lambda f: f['ext'] == ext, formats))
3005 if matches:
3006 f = matches[-1]
3007 break
3008 else:
3009 f = formats[-1]
3010 self.report_warning(
3011 'No subtitle format found matching "%s" for language %s, '
3012 'using %s' % (formats_query, lang, f['ext']))
3013 subs[lang] = f
3014 return subs
3015
3016 def _forceprint(self, key, info_dict):
3017 if info_dict is None:
3018 return
3019 info_copy = info_dict.copy()
3020 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3021 if info_dict.get('requested_formats') is not None:
3022 # For RTMP URLs, also include the playpath
3023 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3024 elif info_dict.get('url'):
3025 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3026 info_copy['formats_table'] = self.render_formats_table(info_dict)
3027 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3028 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3029 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3030
3031 def format_tmpl(tmpl):
3032 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3033 if not mobj:
3034 return tmpl
3035
3036 fmt = '%({})s'
3037 if tmpl.startswith('{'):
3038 tmpl, fmt = f'.{tmpl}', '%({})j'
3039 if tmpl.endswith('='):
3040 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3041 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3042
3043 for tmpl in self.params['forceprint'].get(key, []):
3044 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3045
3046 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3047 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3048 tmpl = format_tmpl(tmpl)
3049 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3050 if self._ensure_dir_exists(filename):
3051 with open(filename, 'a', encoding='utf-8', newline='') as f:
3052 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3053
3054 return info_copy
3055
3056 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3057 if (self.params.get('forcejson')
3058 or self.params['forceprint'].get('video')
3059 or self.params['print_to_file'].get('video')):
3060 self.post_extract(info_dict)
3061 if filename:
3062 info_dict['filename'] = filename
3063 info_copy = self._forceprint('video', info_dict)
3064
3065 def print_field(field, actual_field=None, optional=False):
3066 if actual_field is None:
3067 actual_field = field
3068 if self.params.get(f'force{field}') and (
3069 info_copy.get(field) is not None or (not optional and not incomplete)):
3070 self.to_stdout(info_copy[actual_field])
3071
3072 print_field('title')
3073 print_field('id')
3074 print_field('url', 'urls')
3075 print_field('thumbnail', optional=True)
3076 print_field('description', optional=True)
3077 print_field('filename')
3078 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3079 self.to_stdout(formatSeconds(info_copy['duration']))
3080 print_field('format')
3081
3082 if self.params.get('forcejson'):
3083 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3084
3085 def dl(self, name, info, subtitle=False, test=False):
3086 if not info.get('url'):
3087 self.raise_no_formats(info, True)
3088
3089 if test:
3090 verbose = self.params.get('verbose')
3091 params = {
3092 'test': True,
3093 'quiet': self.params.get('quiet') or not verbose,
3094 'verbose': verbose,
3095 'noprogress': not verbose,
3096 'nopart': True,
3097 'skip_unavailable_fragments': False,
3098 'keep_fragments': False,
3099 'overwrites': True,
3100 '_no_ytdl_file': True,
3101 }
3102 else:
3103 params = self.params
3104 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3105 if not test:
3106 for ph in self._progress_hooks:
3107 fd.add_progress_hook(ph)
3108 urls = '", "'.join(
3109 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3110 for f in info.get('requested_formats', []) or [info])
3111 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3112
3113 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3114 # But it may contain objects that are not deep-copyable
3115 new_info = self._copy_infodict(info)
3116 if new_info.get('http_headers') is None:
3117 new_info['http_headers'] = self._calc_headers(new_info)
3118 return fd.download(name, new_info, subtitle)
3119
3120 def existing_file(self, filepaths, *, default_overwrite=True):
3121 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3122 if existing_files and not self.params.get('overwrites', default_overwrite):
3123 return existing_files[0]
3124
3125 for file in existing_files:
3126 self.report_file_delete(file)
3127 os.remove(file)
3128 return None
3129
3130 def process_info(self, info_dict):
3131 """Process a single resolved IE result. (Modifies it in-place)"""
3132
3133 assert info_dict.get('_type', 'video') == 'video'
3134 original_infodict = info_dict
3135
3136 if 'format' not in info_dict and 'ext' in info_dict:
3137 info_dict['format'] = info_dict['ext']
3138
3139 if self._match_entry(info_dict) is not None:
3140 info_dict['__write_download_archive'] = 'ignore'
3141 return
3142
3143 # Does nothing under normal operation - for backward compatibility of process_info
3144 self.post_extract(info_dict)
3145
3146 def replace_info_dict(new_info):
3147 nonlocal info_dict
3148 if new_info == info_dict:
3149 return
3150 info_dict.clear()
3151 info_dict.update(new_info)
3152
3153 new_info, _ = self.pre_process(info_dict, 'video')
3154 replace_info_dict(new_info)
3155 self._num_downloads += 1
3156
3157 # info_dict['_filename'] needs to be set for backward compatibility
3158 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3159 temp_filename = self.prepare_filename(info_dict, 'temp')
3160 files_to_move = {}
3161
3162 # Forced printings
3163 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3164
3165 def check_max_downloads():
3166 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3167 raise MaxDownloadsReached()
3168
3169 if self.params.get('simulate'):
3170 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3171 check_max_downloads()
3172 return
3173
3174 if full_filename is None:
3175 return
3176 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3177 return
3178 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3179 return
3180
3181 if self._write_description('video', info_dict,
3182 self.prepare_filename(info_dict, 'description')) is None:
3183 return
3184
3185 sub_files = self._write_subtitles(info_dict, temp_filename)
3186 if sub_files is None:
3187 return
3188 files_to_move.update(dict(sub_files))
3189
3190 thumb_files = self._write_thumbnails(
3191 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3192 if thumb_files is None:
3193 return
3194 files_to_move.update(dict(thumb_files))
3195
3196 infofn = self.prepare_filename(info_dict, 'infojson')
3197 _infojson_written = self._write_info_json('video', info_dict, infofn)
3198 if _infojson_written:
3199 info_dict['infojson_filename'] = infofn
3200 # For backward compatibility, even though it was a private field
3201 info_dict['__infojson_filename'] = infofn
3202 elif _infojson_written is None:
3203 return
3204
3205 # Note: Annotations are deprecated
3206 annofn = None
3207 if self.params.get('writeannotations', False):
3208 annofn = self.prepare_filename(info_dict, 'annotation')
3209 if annofn:
3210 if not self._ensure_dir_exists(encodeFilename(annofn)):
3211 return
3212 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3213 self.to_screen('[info] Video annotations are already present')
3214 elif not info_dict.get('annotations'):
3215 self.report_warning('There are no annotations to write.')
3216 else:
3217 try:
3218 self.to_screen('[info] Writing video annotations to: ' + annofn)
3219 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3220 annofile.write(info_dict['annotations'])
3221 except (KeyError, TypeError):
3222 self.report_warning('There are no annotations to write.')
3223 except OSError:
3224 self.report_error('Cannot write annotations file: ' + annofn)
3225 return
3226
3227 # Write internet shortcut files
3228 def _write_link_file(link_type):
3229 url = try_get(info_dict['webpage_url'], iri_to_uri)
3230 if not url:
3231 self.report_warning(
3232 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3233 return True
3234 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3235 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3236 return False
3237 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3238 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3239 return True
3240 try:
3241 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3242 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3243 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3244 template_vars = {'url': url}
3245 if link_type == 'desktop':
3246 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3247 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3248 except OSError:
3249 self.report_error(f'Cannot write internet shortcut {linkfn}')
3250 return False
3251 return True
3252
3253 write_links = {
3254 'url': self.params.get('writeurllink'),
3255 'webloc': self.params.get('writewebloclink'),
3256 'desktop': self.params.get('writedesktoplink'),
3257 }
3258 if self.params.get('writelink'):
3259 link_type = ('webloc' if sys.platform == 'darwin'
3260 else 'desktop' if sys.platform.startswith('linux')
3261 else 'url')
3262 write_links[link_type] = True
3263
3264 if any(should_write and not _write_link_file(link_type)
3265 for link_type, should_write in write_links.items()):
3266 return
3267
3268 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3269 replace_info_dict(new_info)
3270
3271 if self.params.get('skip_download'):
3272 info_dict['filepath'] = temp_filename
3273 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3274 info_dict['__files_to_move'] = files_to_move
3275 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3276 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3277 else:
3278 # Download
3279 info_dict.setdefault('__postprocessors', [])
3280 try:
3281
3282 def existing_video_file(*filepaths):
3283 ext = info_dict.get('ext')
3284 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3285 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3286 default_overwrite=False)
3287 if file:
3288 info_dict['ext'] = os.path.splitext(file)[1][1:]
3289 return file
3290
3291 fd, success = None, True
3292 if info_dict.get('protocol') or info_dict.get('url'):
3293 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3294 if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3295 info_dict.get('section_start') or info_dict.get('section_end')):
3296 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3297 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3298 self.report_error(f'{msg}. Aborting')
3299 return
3300
3301 if info_dict.get('requested_formats') is not None:
3302 old_ext = info_dict['ext']
3303 if self.params.get('merge_output_format') is None:
3304 if (info_dict['ext'] == 'webm'
3305 and info_dict.get('thumbnails')
3306 # check with type instead of pp_key, __name__, or isinstance
3307 # since we dont want any custom PPs to trigger this
3308 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3309 info_dict['ext'] = 'mkv'
3310 self.report_warning(
3311 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3312 new_ext = info_dict['ext']
3313
3314 def correct_ext(filename, ext=new_ext):
3315 if filename == '-':
3316 return filename
3317 filename_real_ext = os.path.splitext(filename)[1][1:]
3318 filename_wo_ext = (
3319 os.path.splitext(filename)[0]
3320 if filename_real_ext in (old_ext, new_ext)
3321 else filename)
3322 return f'{filename_wo_ext}.{ext}'
3323
3324 # Ensure filename always has a correct extension for successful merge
3325 full_filename = correct_ext(full_filename)
3326 temp_filename = correct_ext(temp_filename)
3327 dl_filename = existing_video_file(full_filename, temp_filename)
3328
3329 info_dict['__real_download'] = False
3330 # NOTE: Copy so that original format dicts are not modified
3331 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3332
3333 merger = FFmpegMergerPP(self)
3334 downloaded = []
3335 if dl_filename is not None:
3336 self.report_file_already_downloaded(dl_filename)
3337 elif fd:
3338 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3339 f['filepath'] = fname = prepend_extension(
3340 correct_ext(temp_filename, info_dict['ext']),
3341 'f%s' % f['format_id'], info_dict['ext'])
3342 downloaded.append(fname)
3343 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3344 success, real_download = self.dl(temp_filename, info_dict)
3345 info_dict['__real_download'] = real_download
3346 else:
3347 if self.params.get('allow_unplayable_formats'):
3348 self.report_warning(
3349 'You have requested merging of multiple formats '
3350 'while also allowing unplayable formats to be downloaded. '
3351 'The formats won\'t be merged to prevent data corruption.')
3352 elif not merger.available:
3353 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3354 if not self.params.get('ignoreerrors'):
3355 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3356 return
3357 self.report_warning(f'{msg}. The formats won\'t be merged')
3358
3359 if temp_filename == '-':
3360 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3361 else 'but the formats are incompatible for simultaneous download' if merger.available
3362 else 'but ffmpeg is not installed')
3363 self.report_warning(
3364 f'You have requested downloading multiple formats to stdout {reason}. '
3365 'The formats will be streamed one after the other')
3366 fname = temp_filename
3367 for f in info_dict['requested_formats']:
3368 new_info = dict(info_dict)
3369 del new_info['requested_formats']
3370 new_info.update(f)
3371 if temp_filename != '-':
3372 fname = prepend_extension(
3373 correct_ext(temp_filename, new_info['ext']),
3374 'f%s' % f['format_id'], new_info['ext'])
3375 if not self._ensure_dir_exists(fname):
3376 return
3377 f['filepath'] = fname
3378 downloaded.append(fname)
3379 partial_success, real_download = self.dl(fname, new_info)
3380 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3381 success = success and partial_success
3382
3383 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3384 info_dict['__postprocessors'].append(merger)
3385 info_dict['__files_to_merge'] = downloaded
3386 # Even if there were no downloads, it is being merged only now
3387 info_dict['__real_download'] = True
3388 else:
3389 for file in downloaded:
3390 files_to_move[file] = None
3391 else:
3392 # Just a single file
3393 dl_filename = existing_video_file(full_filename, temp_filename)
3394 if dl_filename is None or dl_filename == temp_filename:
3395 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3396 # So we should try to resume the download
3397 success, real_download = self.dl(temp_filename, info_dict)
3398 info_dict['__real_download'] = real_download
3399 else:
3400 self.report_file_already_downloaded(dl_filename)
3401
3402 dl_filename = dl_filename or temp_filename
3403 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3404
3405 except network_exceptions as err:
3406 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3407 return
3408 except OSError as err:
3409 raise UnavailableVideoError(err)
3410 except (ContentTooShortError, ) as err:
3411 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3412 return
3413
3414 self._raise_pending_errors(info_dict)
3415 if success and full_filename != '-':
3416
3417 def fixup():
3418 do_fixup = True
3419 fixup_policy = self.params.get('fixup')
3420 vid = info_dict['id']
3421
3422 if fixup_policy in ('ignore', 'never'):
3423 return
3424 elif fixup_policy == 'warn':
3425 do_fixup = 'warn'
3426 elif fixup_policy != 'force':
3427 assert fixup_policy in ('detect_or_warn', None)
3428 if not info_dict.get('__real_download'):
3429 do_fixup = False
3430
3431 def ffmpeg_fixup(cndn, msg, cls):
3432 if not (do_fixup and cndn):
3433 return
3434 elif do_fixup == 'warn':
3435 self.report_warning(f'{vid}: {msg}')
3436 return
3437 pp = cls(self)
3438 if pp.available:
3439 info_dict['__postprocessors'].append(pp)
3440 else:
3441 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3442
3443 stretched_ratio = info_dict.get('stretched_ratio')
3444 ffmpeg_fixup(stretched_ratio not in (1, None),
3445 f'Non-uniform pixel ratio {stretched_ratio}',
3446 FFmpegFixupStretchedPP)
3447
3448 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3449 downloader = downloader.FD_NAME if downloader else None
3450
3451 ext = info_dict.get('ext')
3452 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3453 isinstance(pp, FFmpegVideoConvertorPP)
3454 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3455 ) for pp in self._pps['post_process']) or fd == FFmpegFD
3456
3457 if not postprocessed_by_ffmpeg:
3458 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3459 'writing DASH m4a. Only some players support this container',
3460 FFmpegFixupM4aPP)
3461 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3462 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3463 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3464 FFmpegFixupM3u8PP)
3465 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3466 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3467
3468 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3469 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3470
3471 fixup()
3472 try:
3473 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3474 except PostProcessingError as err:
3475 self.report_error('Postprocessing: %s' % str(err))
3476 return
3477 try:
3478 for ph in self._post_hooks:
3479 ph(info_dict['filepath'])
3480 except Exception as err:
3481 self.report_error('post hooks: %s' % str(err))
3482 return
3483 info_dict['__write_download_archive'] = True
3484
3485 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3486 if self.params.get('force_write_download_archive'):
3487 info_dict['__write_download_archive'] = True
3488 check_max_downloads()
3489
3490 def __download_wrapper(self, func):
3491 @functools.wraps(func)
3492 def wrapper(*args, **kwargs):
3493 try:
3494 res = func(*args, **kwargs)
3495 except UnavailableVideoError as e:
3496 self.report_error(e)
3497 except DownloadCancelled as e:
3498 self.to_screen(f'[info] {e}')
3499 if not self.params.get('break_per_url'):
3500 raise
3501 self._num_downloads = 0
3502 else:
3503 if self.params.get('dump_single_json', False):
3504 self.post_extract(res)
3505 self.to_stdout(json.dumps(self.sanitize_info(res)))
3506 return wrapper
3507
3508 def download(self, url_list):
3509 """Download a given list of URLs."""
3510 url_list = variadic(url_list) # Passing a single URL is a common mistake
3511 outtmpl = self.params['outtmpl']['default']
3512 if (len(url_list) > 1
3513 and outtmpl != '-'
3514 and '%' not in outtmpl
3515 and self.params.get('max_downloads') != 1):
3516 raise SameFileError(outtmpl)
3517
3518 for url in url_list:
3519 self.__download_wrapper(self.extract_info)(
3520 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3521
3522 return self._download_retcode
3523
3524 def download_with_info_file(self, info_filename):
3525 with contextlib.closing(fileinput.FileInput(
3526 [info_filename], mode='r',
3527 openhook=fileinput.hook_encoded('utf-8'))) as f:
3528 # FileInput doesn't have a read method, we can't call json.load
3529 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3530 for info in variadic(json.loads('\n'.join(f)))]
3531 for info in infos:
3532 try:
3533 self.__download_wrapper(self.process_ie_result)(info, download=True)
3534 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3535 if not isinstance(e, EntryNotInPlaylist):
3536 self.to_stderr('\r')
3537 webpage_url = info.get('webpage_url')
3538 if webpage_url is None:
3539 raise
3540 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3541 self.download([webpage_url])
3542 return self._download_retcode
3543
3544 @staticmethod
3545 def sanitize_info(info_dict, remove_private_keys=False):
3546 ''' Sanitize the infodict for converting to json '''
3547 if info_dict is None:
3548 return info_dict
3549 info_dict.setdefault('epoch', int(time.time()))
3550 info_dict.setdefault('_type', 'video')
3551 info_dict.setdefault('_version', {
3552 'version': __version__,
3553 'current_git_head': current_git_head(),
3554 'release_git_head': RELEASE_GIT_HEAD,
3555 'repository': REPOSITORY,
3556 })
3557
3558 if remove_private_keys:
3559 reject = lambda k, v: v is None or k.startswith('__') or k in {
3560 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3561 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3562 'playlist_autonumber', '_format_sort_fields',
3563 }
3564 else:
3565 reject = lambda k, v: False
3566
3567 def filter_fn(obj):
3568 if isinstance(obj, dict):
3569 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3570 elif isinstance(obj, (list, tuple, set, LazyList)):
3571 return list(map(filter_fn, obj))
3572 elif obj is None or isinstance(obj, (str, int, float, bool)):
3573 return obj
3574 else:
3575 return repr(obj)
3576
3577 return filter_fn(info_dict)
3578
3579 @staticmethod
3580 def filter_requested_info(info_dict, actually_filter=True):
3581 ''' Alias of sanitize_info for backward compatibility '''
3582 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3583
3584 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3585 for filename in set(filter(None, files_to_delete)):
3586 if msg:
3587 self.to_screen(msg % filename)
3588 try:
3589 os.remove(filename)
3590 except OSError:
3591 self.report_warning(f'Unable to delete file {filename}')
3592 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3593 del info['__files_to_move'][filename]
3594
3595 @staticmethod
3596 def post_extract(info_dict):
3597 def actual_post_extract(info_dict):
3598 if info_dict.get('_type') in ('playlist', 'multi_video'):
3599 for video_dict in info_dict.get('entries', {}):
3600 actual_post_extract(video_dict or {})
3601 return
3602
3603 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3604 info_dict.update(post_extractor())
3605
3606 actual_post_extract(info_dict or {})
3607
3608 def run_pp(self, pp, infodict):
3609 files_to_delete = []
3610 if '__files_to_move' not in infodict:
3611 infodict['__files_to_move'] = {}
3612 try:
3613 files_to_delete, infodict = pp.run(infodict)
3614 except PostProcessingError as e:
3615 # Must be True and not 'only_download'
3616 if self.params.get('ignoreerrors') is True:
3617 self.report_error(e)
3618 return infodict
3619 raise
3620
3621 if not files_to_delete:
3622 return infodict
3623 if self.params.get('keepvideo', False):
3624 for f in files_to_delete:
3625 infodict['__files_to_move'].setdefault(f, '')
3626 else:
3627 self._delete_downloaded_files(
3628 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3629 return infodict
3630
3631 def run_all_pps(self, key, info, *, additional_pps=None):
3632 if key != 'video':
3633 self._forceprint(key, info)
3634 for pp in (additional_pps or []) + self._pps[key]:
3635 info = self.run_pp(pp, info)
3636 return info
3637
3638 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3639 info = dict(ie_info)
3640 info['__files_to_move'] = files_to_move or {}
3641 try:
3642 info = self.run_all_pps(key, info)
3643 except PostProcessingError as err:
3644 msg = f'Preprocessing: {err}'
3645 info.setdefault('__pending_error', msg)
3646 self.report_error(msg, is_error=False)
3647 return info, info.pop('__files_to_move', None)
3648
3649 def post_process(self, filename, info, files_to_move=None):
3650 """Run all the postprocessors on the given file."""
3651 info['filepath'] = filename
3652 info['__files_to_move'] = files_to_move or {}
3653 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3654 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3655 del info['__files_to_move']
3656 return self.run_all_pps('after_move', info)
3657
3658 def _make_archive_id(self, info_dict):
3659 video_id = info_dict.get('id')
3660 if not video_id:
3661 return
3662 # Future-proof against any change in case
3663 # and backwards compatibility with prior versions
3664 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3665 if extractor is None:
3666 url = str_or_none(info_dict.get('url'))
3667 if not url:
3668 return
3669 # Try to find matching extractor for the URL and take its ie_key
3670 for ie_key, ie in self._ies.items():
3671 if ie.suitable(url):
3672 extractor = ie_key
3673 break
3674 else:
3675 return
3676 return make_archive_id(extractor, video_id)
3677
3678 def in_download_archive(self, info_dict):
3679 if not self.archive:
3680 return False
3681
3682 vid_ids = [self._make_archive_id(info_dict)]
3683 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3684 return any(id_ in self.archive for id_ in vid_ids)
3685
3686 def record_download_archive(self, info_dict):
3687 fn = self.params.get('download_archive')
3688 if fn is None:
3689 return
3690 vid_id = self._make_archive_id(info_dict)
3691 assert vid_id
3692
3693 self.write_debug(f'Adding to archive: {vid_id}')
3694 if is_path_like(fn):
3695 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3696 archive_file.write(vid_id + '\n')
3697 self.archive.add(vid_id)
3698
3699 @staticmethod
3700 def format_resolution(format, default='unknown'):
3701 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3702 return 'audio only'
3703 if format.get('resolution') is not None:
3704 return format['resolution']
3705 if format.get('width') and format.get('height'):
3706 return '%dx%d' % (format['width'], format['height'])
3707 elif format.get('height'):
3708 return '%sp' % format['height']
3709 elif format.get('width'):
3710 return '%dx?' % format['width']
3711 return default
3712
3713 def _list_format_headers(self, *headers):
3714 if self.params.get('listformats_table', True) is not False:
3715 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3716 return headers
3717
3718 def _format_note(self, fdict):
3719 res = ''
3720 if fdict.get('ext') in ['f4f', 'f4m']:
3721 res += '(unsupported)'
3722 if fdict.get('language'):
3723 if res:
3724 res += ' '
3725 res += '[%s]' % fdict['language']
3726 if fdict.get('format_note') is not None:
3727 if res:
3728 res += ' '
3729 res += fdict['format_note']
3730 if fdict.get('tbr') is not None:
3731 if res:
3732 res += ', '
3733 res += '%4dk' % fdict['tbr']
3734 if fdict.get('container') is not None:
3735 if res:
3736 res += ', '
3737 res += '%s container' % fdict['container']
3738 if (fdict.get('vcodec') is not None
3739 and fdict.get('vcodec') != 'none'):
3740 if res:
3741 res += ', '
3742 res += fdict['vcodec']
3743 if fdict.get('vbr') is not None:
3744 res += '@'
3745 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3746 res += 'video@'
3747 if fdict.get('vbr') is not None:
3748 res += '%4dk' % fdict['vbr']
3749 if fdict.get('fps') is not None:
3750 if res:
3751 res += ', '
3752 res += '%sfps' % fdict['fps']
3753 if fdict.get('acodec') is not None:
3754 if res:
3755 res += ', '
3756 if fdict['acodec'] == 'none':
3757 res += 'video only'
3758 else:
3759 res += '%-5s' % fdict['acodec']
3760 elif fdict.get('abr') is not None:
3761 if res:
3762 res += ', '
3763 res += 'audio'
3764 if fdict.get('abr') is not None:
3765 res += '@%3dk' % fdict['abr']
3766 if fdict.get('asr') is not None:
3767 res += ' (%5dHz)' % fdict['asr']
3768 if fdict.get('filesize') is not None:
3769 if res:
3770 res += ', '
3771 res += format_bytes(fdict['filesize'])
3772 elif fdict.get('filesize_approx') is not None:
3773 if res:
3774 res += ', '
3775 res += '~' + format_bytes(fdict['filesize_approx'])
3776 return res
3777
3778 def _get_formats(self, info_dict):
3779 if info_dict.get('formats') is None:
3780 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3781 return [info_dict]
3782 return []
3783 return info_dict['formats']
3784
3785 def render_formats_table(self, info_dict):
3786 formats = self._get_formats(info_dict)
3787 if not formats:
3788 return
3789 if not self.params.get('listformats_table', True) is not False:
3790 table = [
3791 [
3792 format_field(f, 'format_id'),
3793 format_field(f, 'ext'),
3794 self.format_resolution(f),
3795 self._format_note(f)
3796 ] for f in formats if (f.get('preference') or 0) >= -1000]
3797 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3798
3799 def simplified_codec(f, field):
3800 assert field in ('acodec', 'vcodec')
3801 codec = f.get(field)
3802 if not codec:
3803 return 'unknown'
3804 elif codec != 'none':
3805 return '.'.join(codec.split('.')[:4])
3806
3807 if field == 'vcodec' and f.get('acodec') == 'none':
3808 return 'images'
3809 elif field == 'acodec' and f.get('vcodec') == 'none':
3810 return ''
3811 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3812 self.Styles.SUPPRESS)
3813
3814 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3815 table = [
3816 [
3817 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3818 format_field(f, 'ext'),
3819 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3820 format_field(f, 'fps', '\t%d', func=round),
3821 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3822 format_field(f, 'audio_channels', '\t%s'),
3823 delim, (
3824 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3825 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3826 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3827 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3828 format_field(f, 'tbr', '\t%dk', func=round),
3829 shorten_protocol_name(f.get('protocol', '')),
3830 delim,
3831 simplified_codec(f, 'vcodec'),
3832 format_field(f, 'vbr', '\t%dk', func=round),
3833 simplified_codec(f, 'acodec'),
3834 format_field(f, 'abr', '\t%dk', func=round),
3835 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3836 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3837 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3838 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3839 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3840 format_field(f, 'format_note'),
3841 format_field(f, 'container', ignore=(None, f.get('ext'))),
3842 delim=', '), delim=' '),
3843 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3844 header_line = self._list_format_headers(
3845 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3846 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3847
3848 return render_table(
3849 header_line, table, hide_empty=True,
3850 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3851
3852 def render_thumbnails_table(self, info_dict):
3853 thumbnails = list(info_dict.get('thumbnails') or [])
3854 if not thumbnails:
3855 return None
3856 return render_table(
3857 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3858 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3859
3860 def render_subtitles_table(self, video_id, subtitles):
3861 def _row(lang, formats):
3862 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3863 if len(set(names)) == 1:
3864 names = [] if names[0] == 'unknown' else names[:1]
3865 return [lang, ', '.join(names), ', '.join(exts)]
3866
3867 if not subtitles:
3868 return None
3869 return render_table(
3870 self._list_format_headers('Language', 'Name', 'Formats'),
3871 [_row(lang, formats) for lang, formats in subtitles.items()],
3872 hide_empty=True)
3873
3874 def __list_table(self, video_id, name, func, *args):
3875 table = func(*args)
3876 if not table:
3877 self.to_screen(f'{video_id} has no {name}')
3878 return
3879 self.to_screen(f'[info] Available {name} for {video_id}:')
3880 self.to_stdout(table)
3881
3882 def list_formats(self, info_dict):
3883 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3884
3885 def list_thumbnails(self, info_dict):
3886 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3887
3888 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3889 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3890
3891 def print_debug_header(self):
3892 if not self.params.get('verbose'):
3893 return
3894
3895 from . import _IN_CLI # Must be delayed import
3896
3897 # These imports can be slow. So import them only as needed
3898 from .extractor.extractors import _LAZY_LOADER
3899 from .extractor.extractors import (
3900 _PLUGIN_CLASSES as plugin_ies,
3901 _PLUGIN_OVERRIDES as plugin_ie_overrides
3902 )
3903
3904 def get_encoding(stream):
3905 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3906 additional_info = []
3907 if os.environ.get('TERM', '').lower() == 'dumb':
3908 additional_info.append('dumb')
3909 if not supports_terminal_sequences(stream):
3910 from .utils import WINDOWS_VT_MODE # Must be imported locally
3911 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3912 if additional_info:
3913 ret = f'{ret} ({",".join(additional_info)})'
3914 return ret
3915
3916 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3917 locale.getpreferredencoding(),
3918 sys.getfilesystemencoding(),
3919 self.get_encoding(),
3920 ', '.join(
3921 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3922 if stream is not None and key != 'console')
3923 )
3924
3925 logger = self.params.get('logger')
3926 if logger:
3927 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3928 write_debug(encoding_str)
3929 else:
3930 write_string(f'[debug] {encoding_str}\n', encoding=None)
3931 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3932
3933 source = detect_variant()
3934 if VARIANT not in (None, 'pip'):
3935 source += '*'
3936 klass = type(self)
3937 write_debug(join_nonempty(
3938 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3939 f'{CHANNEL}@{__version__}',
3940 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3941 '' if source == 'unknown' else f'({source})',
3942 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3943 delim=' '))
3944
3945 if not _IN_CLI:
3946 write_debug(f'params: {self.params}')
3947
3948 if not _LAZY_LOADER:
3949 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3950 write_debug('Lazy loading extractors is forcibly disabled')
3951 else:
3952 write_debug('Lazy loading extractors is disabled')
3953 if self.params['compat_opts']:
3954 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3955
3956 if current_git_head():
3957 write_debug(f'Git HEAD: {current_git_head()}')
3958 write_debug(system_identifier())
3959
3960 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3961 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3962 if ffmpeg_features:
3963 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3964
3965 exe_versions['rtmpdump'] = rtmpdump_version()
3966 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3967 exe_str = ', '.join(
3968 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3969 ) or 'none'
3970 write_debug('exe versions: %s' % exe_str)
3971
3972 from .compat.compat_utils import get_package_info
3973 from .dependencies import available_dependencies
3974
3975 write_debug('Optional libraries: %s' % (', '.join(sorted({
3976 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3977 })) or 'none'))
3978
3979 write_debug(f'Proxy map: {self.proxies}')
3980 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3981 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3982 display_list = ['%s%s' % (
3983 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3984 for name, klass in plugins.items()]
3985 if plugin_type == 'Extractor':
3986 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3987 for parent, plugins in plugin_ie_overrides.items())
3988 if not display_list:
3989 continue
3990 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3991
3992 plugin_dirs = plugin_directories()
3993 if plugin_dirs:
3994 write_debug(f'Plugin directories: {plugin_dirs}')
3995
3996 # Not implemented
3997 if False and self.params.get('call_home'):
3998 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3999 write_debug('Public IP address: %s' % ipaddr)
4000 latest_version = self.urlopen(
4001 'https://yt-dl.org/latest/version').read().decode()
4002 if version_tuple(latest_version) > version_tuple(__version__):
4003 self.report_warning(
4004 'You are using an outdated version (newest version: %s)! '
4005 'See https://yt-dl.org/update if you need help updating.' %
4006 latest_version)
4007
4008 @functools.cached_property
4009 def proxies(self):
4010 """Global proxy configuration"""
4011 opts_proxy = self.params.get('proxy')
4012 if opts_proxy is not None:
4013 if opts_proxy == '':
4014 opts_proxy = '__noproxy__'
4015 proxies = {'all': opts_proxy}
4016 else:
4017 proxies = urllib.request.getproxies()
4018 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4019 if 'http' in proxies and 'https' not in proxies:
4020 proxies['https'] = proxies['http']
4021
4022 return proxies
4023
4024 @functools.cached_property
4025 def cookiejar(self):
4026 """Global cookiejar instance"""
4027 return load_cookies(
4028 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4029
4030 @property
4031 def _opener(self):
4032 """
4033 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4034 """
4035 self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()')
4036 handler = self._request_director.handlers['Urllib']
4037 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4038
4039 def urlopen(self, req):
4040 """ Start an HTTP download """
4041 if isinstance(req, str):
4042 req = Request(req)
4043 elif isinstance(req, urllib.request.Request):
4044 self.deprecation_warning(
4045 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4046 'Use yt_dlp.networking.common.Request instead.')
4047 req = urllib_req_to_req(req)
4048 assert isinstance(req, Request)
4049
4050 # compat: Assume user:pass url params are basic auth
4051 url, basic_auth_header = extract_basic_auth(req.url)
4052 if basic_auth_header:
4053 req.headers['Authorization'] = basic_auth_header
4054 req.url = sanitize_url(url)
4055
4056 clean_proxies(proxies=req.proxies, headers=req.headers)
4057 clean_headers(req.headers)
4058
4059 try:
4060 return self._request_director.send(req)
4061 except NoSupportingHandlers as e:
4062 for ue in e.unsupported_errors:
4063 if not (ue.handler and ue.msg):
4064 continue
4065 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4066 raise RequestError(
4067 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4068 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4069 raise
4070 except SSLError as e:
4071 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4072 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4073 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4074 raise RequestError(
4075 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4076 'Try using --legacy-server-connect', cause=e) from e
4077 raise
4078 except HTTPError as e: # TODO: Remove in a future release
4079 raise _CompatHTTPError(e) from e
4080
4081 def build_request_director(self, handlers):
4082 logger = _YDLLogger(self)
4083 headers = self.params.get('http_headers').copy()
4084 proxies = self.proxies.copy()
4085 clean_headers(headers)
4086 clean_proxies(proxies, headers)
4087
4088 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4089 for handler in handlers:
4090 director.add_handler(handler(
4091 logger=logger,
4092 headers=headers,
4093 cookiejar=self.cookiejar,
4094 proxies=proxies,
4095 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4096 verify=not self.params.get('nocheckcertificate'),
4097 **traverse_obj(self.params, {
4098 'verbose': 'debug_printtraffic',
4099 'source_address': 'source_address',
4100 'timeout': 'socket_timeout',
4101 'legacy_ssl_support': 'legacyserverconnect',
4102 'enable_file_urls': 'enable_file_urls',
4103 'client_cert': {
4104 'client_certificate': 'client_certificate',
4105 'client_certificate_key': 'client_certificate_key',
4106 'client_certificate_password': 'client_certificate_password',
4107 },
4108 }),
4109 ))
4110 return director
4111
4112 def encode(self, s):
4113 if isinstance(s, bytes):
4114 return s # Already encoded
4115
4116 try:
4117 return s.encode(self.get_encoding())
4118 except UnicodeEncodeError as err:
4119 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4120 raise
4121
4122 def get_encoding(self):
4123 encoding = self.params.get('encoding')
4124 if encoding is None:
4125 encoding = preferredencoding()
4126 return encoding
4127
4128 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4129 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4130 if overwrite is None:
4131 overwrite = self.params.get('overwrites', True)
4132 if not self.params.get('writeinfojson'):
4133 return False
4134 elif not infofn:
4135 self.write_debug(f'Skipping writing {label} infojson')
4136 return False
4137 elif not self._ensure_dir_exists(infofn):
4138 return None
4139 elif not overwrite and os.path.exists(infofn):
4140 self.to_screen(f'[info] {label.title()} metadata is already present')
4141 return 'exists'
4142
4143 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4144 try:
4145 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4146 return True
4147 except OSError:
4148 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4149 return None
4150
4151 def _write_description(self, label, ie_result, descfn):
4152 ''' Write description and returns True = written, False = skip, None = error '''
4153 if not self.params.get('writedescription'):
4154 return False
4155 elif not descfn:
4156 self.write_debug(f'Skipping writing {label} description')
4157 return False
4158 elif not self._ensure_dir_exists(descfn):
4159 return None
4160 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4161 self.to_screen(f'[info] {label.title()} description is already present')
4162 elif ie_result.get('description') is None:
4163 self.to_screen(f'[info] There\'s no {label} description to write')
4164 return False
4165 else:
4166 try:
4167 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4168 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4169 descfile.write(ie_result['description'])
4170 except OSError:
4171 self.report_error(f'Cannot write {label} description file {descfn}')
4172 return None
4173 return True
4174
4175 def _write_subtitles(self, info_dict, filename):
4176 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4177 ret = []
4178 subtitles = info_dict.get('requested_subtitles')
4179 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4180 # subtitles download errors are already managed as troubles in relevant IE
4181 # that way it will silently go on when used with unsupporting IE
4182 return ret
4183 elif not subtitles:
4184 self.to_screen('[info] There are no subtitles for the requested languages')
4185 return ret
4186 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4187 if not sub_filename_base:
4188 self.to_screen('[info] Skipping writing video subtitles')
4189 return ret
4190
4191 for sub_lang, sub_info in subtitles.items():
4192 sub_format = sub_info['ext']
4193 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4194 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4195 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4196 if existing_sub:
4197 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4198 sub_info['filepath'] = existing_sub
4199 ret.append((existing_sub, sub_filename_final))
4200 continue
4201
4202 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4203 if sub_info.get('data') is not None:
4204 try:
4205 # Use newline='' to prevent conversion of newline characters
4206 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4207 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4208 subfile.write(sub_info['data'])
4209 sub_info['filepath'] = sub_filename
4210 ret.append((sub_filename, sub_filename_final))
4211 continue
4212 except OSError:
4213 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4214 return None
4215
4216 try:
4217 sub_copy = sub_info.copy()
4218 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4219 self.dl(sub_filename, sub_copy, subtitle=True)
4220 sub_info['filepath'] = sub_filename
4221 ret.append((sub_filename, sub_filename_final))
4222 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4223 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4224 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4225 if not self.params.get('ignoreerrors'):
4226 self.report_error(msg)
4227 raise DownloadError(msg)
4228 self.report_warning(msg)
4229 return ret
4230
4231 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4232 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4233 write_all = self.params.get('write_all_thumbnails', False)
4234 thumbnails, ret = [], []
4235 if write_all or self.params.get('writethumbnail', False):
4236 thumbnails = info_dict.get('thumbnails') or []
4237 if not thumbnails:
4238 self.to_screen(f'[info] There are no {label} thumbnails to download')
4239 return ret
4240 multiple = write_all and len(thumbnails) > 1
4241
4242 if thumb_filename_base is None:
4243 thumb_filename_base = filename
4244 if thumbnails and not thumb_filename_base:
4245 self.write_debug(f'Skipping writing {label} thumbnail')
4246 return ret
4247
4248 for idx, t in list(enumerate(thumbnails))[::-1]:
4249 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4250 thumb_display_id = f'{label} thumbnail {t["id"]}'
4251 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4252 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4253
4254 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4255 if existing_thumb:
4256 self.to_screen('[info] %s is already present' % (
4257 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4258 t['filepath'] = existing_thumb
4259 ret.append((existing_thumb, thumb_filename_final))
4260 else:
4261 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4262 try:
4263 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4264 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4265 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4266 shutil.copyfileobj(uf, thumbf)
4267 ret.append((thumb_filename, thumb_filename_final))
4268 t['filepath'] = thumb_filename
4269 except network_exceptions as err:
4270 if isinstance(err, HTTPError) and err.status == 404:
4271 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4272 else:
4273 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4274 thumbnails.pop(idx)
4275 if ret and not write_all:
4276 break
4277 return ret