]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[networking] Rewrite architecture (#2861)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 )
45 from .plugins import directories as plugin_directories
46 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
47 from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
57 FFmpegVideoConvertorPP,
58 MoveFilesAfterDownloadPP,
59 get_postprocessor,
60 )
61 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
62 from .update import REPOSITORY, current_git_head, detect_variant
63 from .utils import (
64 DEFAULT_OUTTMPL,
65 IDENTITY,
66 LINK_TEMPLATES,
67 MEDIA_EXTENSIONS,
68 NO_DEFAULT,
69 NUMBER_RE,
70 OUTTMPL_TYPES,
71 POSTPROCESS_WHEN,
72 STR_FORMAT_RE_TMPL,
73 STR_FORMAT_TYPES,
74 ContentTooShortError,
75 DateRange,
76 DownloadCancelled,
77 DownloadError,
78 EntryNotInPlaylist,
79 ExistingVideoReached,
80 ExtractorError,
81 FormatSorter,
82 GeoRestrictedError,
83 HEADRequest,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 network_exceptions,
126 number_of_digits,
127 orderedSet,
128 orderedSet_from_options,
129 parse_filesize,
130 preferredencoding,
131 prepend_extension,
132 remove_terminal_sequences,
133 render_table,
134 replace_extension,
135 sanitize_filename,
136 sanitize_path,
137 sanitize_url,
138 std_headers,
139 str_or_none,
140 strftime_or_none,
141 subtitles_filename,
142 supports_terminal_sequences,
143 system_identifier,
144 timetuple_from_msec,
145 to_high_limit_path,
146 traverse_obj,
147 try_call,
148 try_get,
149 url_basename,
150 variadic,
151 version_tuple,
152 windows_enable_vt_mode,
153 write_json_file,
154 write_string,
155 )
156 from .utils._utils import _YDLLogger
157 from .utils.networking import (
158 HTTPHeaderDict,
159 clean_headers,
160 clean_proxies,
161 )
162 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
163
164 if compat_os_name == 'nt':
165 import ctypes
166
167
168 class YoutubeDL:
169 """YoutubeDL class.
170
171 YoutubeDL objects are the ones responsible of downloading the
172 actual video file and writing it to disk if the user has requested
173 it, among some other tasks. In most cases there should be one per
174 program. As, given a video URL, the downloader doesn't know how to
175 extract all the needed information, task that InfoExtractors do, it
176 has to pass the URL to one of them.
177
178 For this, YoutubeDL objects have a method that allows
179 InfoExtractors to be registered in a given order. When it is passed
180 a URL, the YoutubeDL object handles it to the first InfoExtractor it
181 finds that reports being able to handle it. The InfoExtractor extracts
182 all the information about the video or videos the URL refers to, and
183 YoutubeDL process the extracted information, possibly using a File
184 Downloader to download the video.
185
186 YoutubeDL objects accept a lot of parameters. In order not to saturate
187 the object constructor with arguments, it receives a dictionary of
188 options instead. These options are available through the params
189 attribute for the InfoExtractors to use. The YoutubeDL also
190 registers itself as the downloader in charge for the InfoExtractors
191 that are added to it, so this is a "mutual registration".
192
193 Available options:
194
195 username: Username for authentication purposes.
196 password: Password for authentication purposes.
197 videopassword: Password for accessing a video.
198 ap_mso: Adobe Pass multiple-system operator identifier.
199 ap_username: Multiple-system operator account username.
200 ap_password: Multiple-system operator account password.
201 usenetrc: Use netrc for authentication instead.
202 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
203 netrc_cmd: Use a shell command to get credentials
204 verbose: Print additional info to stdout.
205 quiet: Do not print messages to stdout.
206 no_warnings: Do not print out anything for warnings.
207 forceprint: A dict with keys WHEN mapped to a list of templates to
208 print to stdout. The allowed keys are video or any of the
209 items in utils.POSTPROCESS_WHEN.
210 For compatibility, a single list is also accepted
211 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
212 a list of tuples with (template, filename)
213 forcejson: Force printing info_dict as JSON.
214 dump_single_json: Force printing the info_dict of the whole playlist
215 (or video) as a single JSON line.
216 force_write_download_archive: Force writing download archive regardless
217 of 'skip_download' or 'simulate'.
218 simulate: Do not download the video files. If unset (or None),
219 simulate only if listsubtitles, listformats or list_thumbnails is used
220 format: Video format code. see "FORMAT SELECTION" for more details.
221 You can also pass a function. The function takes 'ctx' as
222 argument and returns the formats to download.
223 See "build_format_selector" for an implementation
224 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
225 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
226 extracting metadata even if the video is not actually
227 available for download (experimental)
228 format_sort: A list of fields by which to sort the video formats.
229 See "Sorting Formats" for more details.
230 format_sort_force: Force the given format_sort. see "Sorting Formats"
231 for more details.
232 prefer_free_formats: Whether to prefer video formats with free containers
233 over non-free ones of same quality.
234 allow_multiple_video_streams: Allow multiple video streams to be merged
235 into a single file
236 allow_multiple_audio_streams: Allow multiple audio streams to be merged
237 into a single file
238 check_formats Whether to test if the formats are downloadable.
239 Can be True (check all), False (check none),
240 'selected' (check selected formats),
241 or None (check only if requested by extractor)
242 paths: Dictionary of output paths. The allowed keys are 'home'
243 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
244 outtmpl: Dictionary of templates for output names. Allowed keys
245 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
246 For compatibility with youtube-dl, a single string can also be used
247 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
248 restrictfilenames: Do not allow "&" and spaces in file names
249 trim_file_name: Limit length of filename (extension excluded)
250 windowsfilenames: Force the filenames to be windows compatible
251 ignoreerrors: Do not stop on download/postprocessing errors.
252 Can be 'only_download' to ignore only download errors.
253 Default is 'only_download' for CLI, but False for API
254 skip_playlist_after_errors: Number of allowed failures until the rest of
255 the playlist is skipped
256 allowed_extractors: List of regexes to match against extractor names that are allowed
257 overwrites: Overwrite all video and metadata files if True,
258 overwrite only non-video files if None
259 and don't overwrite any file if False
260 For compatibility with youtube-dl,
261 "nooverwrites" may also be used instead
262 playlist_items: Specific indices of playlist to download.
263 playlistrandom: Download playlist items in random order.
264 lazy_playlist: Process playlist entries as they are received.
265 matchtitle: Download only matching titles.
266 rejecttitle: Reject downloads for matching titles.
267 logger: Log messages to a logging.Logger instance.
268 logtostderr: Print everything to stderr instead of stdout.
269 consoletitle: Display progress in console window's titlebar.
270 writedescription: Write the video description to a .description file
271 writeinfojson: Write the video description to a .info.json file
272 clean_infojson: Remove internal metadata from the infojson
273 getcomments: Extract video comments. This will not be written to disk
274 unless writeinfojson is also given
275 writeannotations: Write the video annotations to a .annotations.xml file
276 writethumbnail: Write the thumbnail image to a file
277 allow_playlist_files: Whether to write playlists' description, infojson etc
278 also to disk when using the 'write*' options
279 write_all_thumbnails: Write all thumbnail formats to files
280 writelink: Write an internet shortcut file, depending on the
281 current platform (.url/.webloc/.desktop)
282 writeurllink: Write a Windows internet shortcut file (.url)
283 writewebloclink: Write a macOS internet shortcut file (.webloc)
284 writedesktoplink: Write a Linux internet shortcut file (.desktop)
285 writesubtitles: Write the video subtitles to a file
286 writeautomaticsub: Write the automatically generated subtitles to a file
287 listsubtitles: Lists all available subtitles for the video
288 subtitlesformat: The format code for subtitles
289 subtitleslangs: List of languages of the subtitles to download (can be regex).
290 The list may contain "all" to refer to all the available
291 subtitles. The language can be prefixed with a "-" to
292 exclude it from the requested languages, e.g. ['all', '-live_chat']
293 keepvideo: Keep the video file after post-processing
294 daterange: A utils.DateRange object, download only if the upload_date is in the range.
295 skip_download: Skip the actual download of the video file
296 cachedir: Location of the cache files in the filesystem.
297 False to disable filesystem cache.
298 noplaylist: Download single video instead of a playlist if in doubt.
299 age_limit: An integer representing the user's age in years.
300 Unsuitable videos for the given age are skipped.
301 min_views: An integer representing the minimum view count the video
302 must have in order to not be skipped.
303 Videos without view count information are always
304 downloaded. None for no limit.
305 max_views: An integer representing the maximum view count.
306 Videos that are more popular than that are not
307 downloaded.
308 Videos without view count information are always
309 downloaded. None for no limit.
310 download_archive: A set, or the name of a file where all downloads are recorded.
311 Videos already present in the file are not downloaded again.
312 break_on_existing: Stop the download process after attempting to download a
313 file that is in the archive.
314 break_per_url: Whether break_on_reject and break_on_existing
315 should act on each input URL as opposed to for the entire queue
316 cookiefile: File name or text stream from where cookies should be read and dumped to
317 cookiesfrombrowser: A tuple containing the name of the browser, the profile
318 name/path from where cookies are loaded, the name of the keyring,
319 and the container name, e.g. ('chrome', ) or
320 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
321 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
322 support RFC 5746 secure renegotiation
323 nocheckcertificate: Do not verify SSL certificates
324 client_certificate: Path to client certificate file in PEM format. May include the private key
325 client_certificate_key: Path to private key file for client certificate
326 client_certificate_password: Password for client certificate private key, if encrypted.
327 If not provided and the key is encrypted, yt-dlp will ask interactively
328 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
329 (Only supported by some extractors)
330 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
331 http_headers: A dictionary of custom headers to be used for all requests
332 proxy: URL of the proxy server to use
333 geo_verification_proxy: URL of the proxy to use for IP address verification
334 on geo-restricted sites.
335 socket_timeout: Time to wait for unresponsive hosts, in seconds
336 bidi_workaround: Work around buggy terminals without bidirectional text
337 support, using fridibi
338 debug_printtraffic:Print out sent and received HTTP traffic
339 default_search: Prepend this string if an input url is not valid.
340 'auto' for elaborate guessing
341 encoding: Use this encoding instead of the system-specified.
342 extract_flat: Whether to resolve and process url_results further
343 * False: Always process. Default for API
344 * True: Never process
345 * 'in_playlist': Do not process inside playlist/multi_video
346 * 'discard': Always process, but don't return the result
347 from inside playlist/multi_video
348 * 'discard_in_playlist': Same as "discard", but only for
349 playlists (not multi_video). Default for CLI
350 wait_for_video: If given, wait for scheduled streams to become available.
351 The value should be a tuple containing the range
352 (min_secs, max_secs) to wait between retries
353 postprocessors: A list of dictionaries, each with an entry
354 * key: The name of the postprocessor. See
355 yt_dlp/postprocessor/__init__.py for a list.
356 * when: When to run the postprocessor. Allowed values are
357 the entries of utils.POSTPROCESS_WHEN
358 Assumed to be 'post_process' if not given
359 progress_hooks: A list of functions that get called on download
360 progress, with a dictionary with the entries
361 * status: One of "downloading", "error", or "finished".
362 Check this first and ignore unknown values.
363 * info_dict: The extracted info_dict
364
365 If status is one of "downloading", or "finished", the
366 following properties may also be present:
367 * filename: The final filename (always present)
368 * tmpfilename: The filename we're currently writing to
369 * downloaded_bytes: Bytes on disk
370 * total_bytes: Size of the whole file, None if unknown
371 * total_bytes_estimate: Guess of the eventual file size,
372 None if unavailable.
373 * elapsed: The number of seconds since download started.
374 * eta: The estimated time in seconds, None if unknown
375 * speed: The download speed in bytes/second, None if
376 unknown
377 * fragment_index: The counter of the currently
378 downloaded video fragment.
379 * fragment_count: The number of fragments (= individual
380 files that will be merged)
381
382 Progress hooks are guaranteed to be called at least once
383 (with status "finished") if the download is successful.
384 postprocessor_hooks: A list of functions that get called on postprocessing
385 progress, with a dictionary with the entries
386 * status: One of "started", "processing", or "finished".
387 Check this first and ignore unknown values.
388 * postprocessor: Name of the postprocessor
389 * info_dict: The extracted info_dict
390
391 Progress hooks are guaranteed to be called at least twice
392 (with status "started" and "finished") if the processing is successful.
393 merge_output_format: "/" separated list of extensions to use when merging formats.
394 final_ext: Expected final extension; used to detect when the file was
395 already downloaded and converted
396 fixup: Automatically correct known faults of the file.
397 One of:
398 - "never": do nothing
399 - "warn": only emit a warning
400 - "detect_or_warn": check whether we can do anything
401 about it, warn otherwise (default)
402 source_address: Client-side IP address to bind to.
403 sleep_interval_requests: Number of seconds to sleep between requests
404 during extraction
405 sleep_interval: Number of seconds to sleep before each download when
406 used alone or a lower bound of a range for randomized
407 sleep before each download (minimum possible number
408 of seconds to sleep) when used along with
409 max_sleep_interval.
410 max_sleep_interval:Upper bound of a range for randomized sleep before each
411 download (maximum possible number of seconds to sleep).
412 Must only be used along with sleep_interval.
413 Actual sleep time will be a random float from range
414 [sleep_interval; max_sleep_interval].
415 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
416 listformats: Print an overview of available video formats and exit.
417 list_thumbnails: Print a table of all thumbnails and exit.
418 match_filter: A function that gets called for every video with the signature
419 (info_dict, *, incomplete: bool) -> Optional[str]
420 For backward compatibility with youtube-dl, the signature
421 (info_dict) -> Optional[str] is also allowed.
422 - If it returns a message, the video is ignored.
423 - If it returns None, the video is downloaded.
424 - If it returns utils.NO_DEFAULT, the user is interactively
425 asked whether to download the video.
426 - Raise utils.DownloadCancelled(msg) to abort remaining
427 downloads when a video is rejected.
428 match_filter_func in utils.py is one example for this.
429 color: A Dictionary with output stream names as keys
430 and their respective color policy as values.
431 Can also just be a single color policy,
432 in which case it applies to all outputs.
433 Valid stream names are 'stdout' and 'stderr'.
434 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
435 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
436 HTTP header
437 geo_bypass_country:
438 Two-letter ISO 3166-2 country code that will be used for
439 explicit geographic restriction bypassing via faking
440 X-Forwarded-For HTTP header
441 geo_bypass_ip_block:
442 IP range in CIDR notation that will be used similarly to
443 geo_bypass_country
444 external_downloader: A dictionary of protocol keys and the executable of the
445 external downloader to use for it. The allowed protocols
446 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
447 Set the value to 'native' to use the native downloader
448 compat_opts: Compatibility options. See "Differences in default behavior".
449 The following options do not work when used through the API:
450 filename, abort-on-error, multistreams, no-live-chat, format-sort
451 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
452 Refer __init__.py for their implementation
453 progress_template: Dictionary of templates for progress outputs.
454 Allowed keys are 'download', 'postprocess',
455 'download-title' (console title) and 'postprocess-title'.
456 The template is mapped on a dictionary with keys 'progress' and 'info'
457 retry_sleep_functions: Dictionary of functions that takes the number of attempts
458 as argument and returns the time to sleep in seconds.
459 Allowed keys are 'http', 'fragment', 'file_access'
460 download_ranges: A callback function that gets called for every video with
461 the signature (info_dict, ydl) -> Iterable[Section].
462 Only the returned sections will be downloaded.
463 Each Section is a dict with the following keys:
464 * start_time: Start time of the section in seconds
465 * end_time: End time of the section in seconds
466 * title: Section title (Optional)
467 * index: Section number (Optional)
468 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
469 noprogress: Do not print the progress bar
470 live_from_start: Whether to download livestreams videos from the start
471
472 The following parameters are not used by YoutubeDL itself, they are used by
473 the downloader (see yt_dlp/downloader/common.py):
474 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
475 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
476 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
477 external_downloader_args, concurrent_fragment_downloads.
478
479 The following options are used by the post processors:
480 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
481 to the binary or its containing directory.
482 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
483 and a list of additional command-line arguments for the
484 postprocessor/executable. The dict can also have "PP+EXE" keys
485 which are used when the given exe is used by the given PP.
486 Use 'default' as the name for arguments to passed to all PP
487 For compatibility with youtube-dl, a single list of args
488 can also be used
489
490 The following options are used by the extractors:
491 extractor_retries: Number of times to retry for known errors (default: 3)
492 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
493 hls_split_discontinuity: Split HLS playlists to different formats at
494 discontinuities such as ad breaks (default: False)
495 extractor_args: A dictionary of arguments to be passed to the extractors.
496 See "EXTRACTOR ARGUMENTS" for details.
497 E.g. {'youtube': {'skip': ['dash', 'hls']}}
498 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
499
500 The following options are deprecated and may be removed in the future:
501
502 break_on_reject: Stop the download process when encountering a video that
503 has been filtered out.
504 - `raise DownloadCancelled(msg)` in match_filter instead
505 force_generic_extractor: Force downloader to use the generic extractor
506 - Use allowed_extractors = ['generic', 'default']
507 playliststart: - Use playlist_items
508 Playlist item to start at.
509 playlistend: - Use playlist_items
510 Playlist item to end at.
511 playlistreverse: - Use playlist_items
512 Download playlist items in reverse order.
513 forceurl: - Use forceprint
514 Force printing final URL.
515 forcetitle: - Use forceprint
516 Force printing title.
517 forceid: - Use forceprint
518 Force printing ID.
519 forcethumbnail: - Use forceprint
520 Force printing thumbnail URL.
521 forcedescription: - Use forceprint
522 Force printing description.
523 forcefilename: - Use forceprint
524 Force printing final filename.
525 forceduration: - Use forceprint
526 Force printing duration.
527 allsubtitles: - Use subtitleslangs = ['all']
528 Downloads all the subtitles of the video
529 (requires writesubtitles or writeautomaticsub)
530 include_ads: - Doesn't work
531 Download ads as well
532 call_home: - Not implemented
533 Boolean, true iff we are allowed to contact the
534 yt-dlp servers for debugging.
535 post_hooks: - Register a custom postprocessor
536 A list of functions that get called as the final step
537 for each video file, after all postprocessors have been
538 called. The filename will be passed as the only argument.
539 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
540 Use the native HLS downloader instead of ffmpeg/avconv
541 if True, otherwise use ffmpeg/avconv if False, otherwise
542 use downloader suggested by extractor if None.
543 prefer_ffmpeg: - avconv support is deprecated
544 If False, use avconv instead of ffmpeg if both are available,
545 otherwise prefer ffmpeg.
546 youtube_include_dash_manifest: - Use extractor_args
547 If True (default), DASH manifests and related
548 data will be downloaded and processed by extractor.
549 You can reduce network I/O by disabling it if you don't
550 care about DASH. (only for youtube)
551 youtube_include_hls_manifest: - Use extractor_args
552 If True (default), HLS manifests and related
553 data will be downloaded and processed by extractor.
554 You can reduce network I/O by disabling it if you don't
555 care about HLS. (only for youtube)
556 no_color: Same as `color='no_color'`
557 """
558
559 _NUMERIC_FIELDS = {
560 'width', 'height', 'asr', 'audio_channels', 'fps',
561 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
562 'timestamp', 'release_timestamp',
563 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
564 'average_rating', 'comment_count', 'age_limit',
565 'start_time', 'end_time',
566 'chapter_number', 'season_number', 'episode_number',
567 'track_number', 'disc_number', 'release_year',
568 }
569
570 _format_fields = {
571 # NB: Keep in sync with the docstring of extractor/common.py
572 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
573 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
574 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
575 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
576 'preference', 'language', 'language_preference', 'quality', 'source_preference',
577 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
578 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
579 }
580 _format_selection_exts = {
581 'audio': set(MEDIA_EXTENSIONS.common_audio),
582 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
583 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
584 }
585
586 def __init__(self, params=None, auto_init=True):
587 """Create a FileDownloader object with the given options.
588 @param auto_init Whether to load the default extractors and print header (if verbose).
589 Set to 'no_verbose_header' to not print the header
590 """
591 if params is None:
592 params = {}
593 self.params = params
594 self._ies = {}
595 self._ies_instances = {}
596 self._pps = {k: [] for k in POSTPROCESS_WHEN}
597 self._printed_messages = set()
598 self._first_webpage_request = True
599 self._post_hooks = []
600 self._progress_hooks = []
601 self._postprocessor_hooks = []
602 self._download_retcode = 0
603 self._num_downloads = 0
604 self._num_videos = 0
605 self._playlist_level = 0
606 self._playlist_urls = set()
607 self.cache = Cache(self)
608
609 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
610 self._out_files = Namespace(
611 out=stdout,
612 error=sys.stderr,
613 screen=sys.stderr if self.params.get('quiet') else stdout,
614 console=None if compat_os_name == 'nt' else next(
615 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
616 )
617
618 try:
619 windows_enable_vt_mode()
620 except Exception as e:
621 self.write_debug(f'Failed to enable VT mode: {e}')
622
623 if self.params.get('no_color'):
624 if self.params.get('color') is not None:
625 self.report_warning('Overwriting params from "color" with "no_color"')
626 self.params['color'] = 'no_color'
627
628 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
629
630 def process_color_policy(stream):
631 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
632 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
633 if policy in ('auto', None):
634 return term_allow_color and supports_terminal_sequences(stream)
635 assert policy in ('always', 'never', 'no_color')
636 return {'always': True, 'never': False}.get(policy, policy)
637
638 self._allow_colors = Namespace(**{
639 name: process_color_policy(stream)
640 for name, stream in self._out_files.items_ if name != 'console'
641 })
642
643 # The code is left like this to be reused for future deprecations
644 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
645 current_version = sys.version_info[:2]
646 if current_version < MIN_RECOMMENDED:
647 msg = ('Support for Python version %d.%d has been deprecated. '
648 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
649 '\n You will no longer receive updates on this version')
650 if current_version < MIN_SUPPORTED:
651 msg = 'Python version %d.%d is no longer supported'
652 self.deprecated_feature(
653 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
654
655 if self.params.get('allow_unplayable_formats'):
656 self.report_warning(
657 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
658 'This is a developer option intended for debugging. \n'
659 ' If you experience any issues while using this option, '
660 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
661
662 if self.params.get('bidi_workaround', False):
663 try:
664 import pty
665 master, slave = pty.openpty()
666 width = shutil.get_terminal_size().columns
667 width_args = [] if width is None else ['-w', str(width)]
668 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
669 try:
670 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
671 except OSError:
672 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
673 self._output_channel = os.fdopen(master, 'rb')
674 except OSError as ose:
675 if ose.errno == errno.ENOENT:
676 self.report_warning(
677 'Could not find fribidi executable, ignoring --bidi-workaround. '
678 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
679 else:
680 raise
681
682 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
683 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
684 self._request_director = self.build_request_director(
685 sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
686 if auto_init and auto_init != 'no_verbose_header':
687 self.print_debug_header()
688
689 self.__header_cookies = []
690 self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
691
692 def check_deprecated(param, option, suggestion):
693 if self.params.get(param) is not None:
694 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
695 return True
696 return False
697
698 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
699 if self.params.get('geo_verification_proxy') is None:
700 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
701
702 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
703 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
704 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
705
706 for msg in self.params.get('_warnings', []):
707 self.report_warning(msg)
708 for msg in self.params.get('_deprecation_warnings', []):
709 self.deprecated_feature(msg)
710
711 if 'list-formats' in self.params['compat_opts']:
712 self.params['listformats_table'] = False
713
714 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
715 # nooverwrites was unnecessarily changed to overwrites
716 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
717 # This ensures compatibility with both keys
718 self.params['overwrites'] = not self.params['nooverwrites']
719 elif self.params.get('overwrites') is None:
720 self.params.pop('overwrites', None)
721 else:
722 self.params['nooverwrites'] = not self.params['overwrites']
723
724 if self.params.get('simulate') is None and any((
725 self.params.get('list_thumbnails'),
726 self.params.get('listformats'),
727 self.params.get('listsubtitles'),
728 )):
729 self.params['simulate'] = 'list_only'
730
731 self.params.setdefault('forceprint', {})
732 self.params.setdefault('print_to_file', {})
733
734 # Compatibility with older syntax
735 if not isinstance(params['forceprint'], dict):
736 self.params['forceprint'] = {'video': params['forceprint']}
737
738 if auto_init:
739 self.add_default_info_extractors()
740
741 if (sys.platform != 'win32'
742 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
743 and not self.params.get('restrictfilenames', False)):
744 # Unicode filesystem API will throw errors (#1474, #13027)
745 self.report_warning(
746 'Assuming --restrict-filenames since file system encoding '
747 'cannot encode all characters. '
748 'Set the LC_ALL environment variable to fix this.')
749 self.params['restrictfilenames'] = True
750
751 self._parse_outtmpl()
752
753 # Creating format selector here allows us to catch syntax errors before the extraction
754 self.format_selector = (
755 self.params.get('format') if self.params.get('format') in (None, '-')
756 else self.params['format'] if callable(self.params['format'])
757 else self.build_format_selector(self.params['format']))
758
759 hooks = {
760 'post_hooks': self.add_post_hook,
761 'progress_hooks': self.add_progress_hook,
762 'postprocessor_hooks': self.add_postprocessor_hook,
763 }
764 for opt, fn in hooks.items():
765 for ph in self.params.get(opt, []):
766 fn(ph)
767
768 for pp_def_raw in self.params.get('postprocessors', []):
769 pp_def = dict(pp_def_raw)
770 when = pp_def.pop('when', 'post_process')
771 self.add_post_processor(
772 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
773 when=when)
774
775 def preload_download_archive(fn):
776 """Preload the archive, if any is specified"""
777 archive = set()
778 if fn is None:
779 return archive
780 elif not is_path_like(fn):
781 return fn
782
783 self.write_debug(f'Loading archive file {fn!r}')
784 try:
785 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
786 for line in archive_file:
787 archive.add(line.strip())
788 except OSError as ioe:
789 if ioe.errno != errno.ENOENT:
790 raise
791 return archive
792
793 self.archive = preload_download_archive(self.params.get('download_archive'))
794
795 def warn_if_short_id(self, argv):
796 # short YouTube ID starting with dash?
797 idxs = [
798 i for i, a in enumerate(argv)
799 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
800 if idxs:
801 correct_argv = (
802 ['yt-dlp']
803 + [a for i, a in enumerate(argv) if i not in idxs]
804 + ['--'] + [argv[i] for i in idxs]
805 )
806 self.report_warning(
807 'Long argument string detected. '
808 'Use -- to separate parameters and URLs, like this:\n%s' %
809 args_to_str(correct_argv))
810
811 def add_info_extractor(self, ie):
812 """Add an InfoExtractor object to the end of the list."""
813 ie_key = ie.ie_key()
814 self._ies[ie_key] = ie
815 if not isinstance(ie, type):
816 self._ies_instances[ie_key] = ie
817 ie.set_downloader(self)
818
819 def get_info_extractor(self, ie_key):
820 """
821 Get an instance of an IE with name ie_key, it will try to get one from
822 the _ies list, if there's no instance it will create a new one and add
823 it to the extractor list.
824 """
825 ie = self._ies_instances.get(ie_key)
826 if ie is None:
827 ie = get_info_extractor(ie_key)()
828 self.add_info_extractor(ie)
829 return ie
830
831 def add_default_info_extractors(self):
832 """
833 Add the InfoExtractors returned by gen_extractors to the end of the list
834 """
835 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
836 all_ies['end'] = UnsupportedURLIE()
837 try:
838 ie_names = orderedSet_from_options(
839 self.params.get('allowed_extractors', ['default']), {
840 'all': list(all_ies),
841 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
842 }, use_regex=True)
843 except re.error as e:
844 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
845 for name in ie_names:
846 self.add_info_extractor(all_ies[name])
847 self.write_debug(f'Loaded {len(ie_names)} extractors')
848
849 def add_post_processor(self, pp, when='post_process'):
850 """Add a PostProcessor object to the end of the chain."""
851 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
852 self._pps[when].append(pp)
853 pp.set_downloader(self)
854
855 def add_post_hook(self, ph):
856 """Add the post hook"""
857 self._post_hooks.append(ph)
858
859 def add_progress_hook(self, ph):
860 """Add the download progress hook"""
861 self._progress_hooks.append(ph)
862
863 def add_postprocessor_hook(self, ph):
864 """Add the postprocessing progress hook"""
865 self._postprocessor_hooks.append(ph)
866 for pps in self._pps.values():
867 for pp in pps:
868 pp.add_progress_hook(ph)
869
870 def _bidi_workaround(self, message):
871 if not hasattr(self, '_output_channel'):
872 return message
873
874 assert hasattr(self, '_output_process')
875 assert isinstance(message, str)
876 line_count = message.count('\n') + 1
877 self._output_process.stdin.write((message + '\n').encode())
878 self._output_process.stdin.flush()
879 res = ''.join(self._output_channel.readline().decode()
880 for _ in range(line_count))
881 return res[:-len('\n')]
882
883 def _write_string(self, message, out=None, only_once=False):
884 if only_once:
885 if message in self._printed_messages:
886 return
887 self._printed_messages.add(message)
888 write_string(message, out=out, encoding=self.params.get('encoding'))
889
890 def to_stdout(self, message, skip_eol=False, quiet=None):
891 """Print message to stdout"""
892 if quiet is not None:
893 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
894 'Use "YoutubeDL.to_screen" instead')
895 if skip_eol is not False:
896 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
897 'Use "YoutubeDL.to_screen" instead')
898 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
899
900 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
901 """Print message to screen if not in quiet mode"""
902 if self.params.get('logger'):
903 self.params['logger'].debug(message)
904 return
905 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
906 return
907 self._write_string(
908 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
909 self._out_files.screen, only_once=only_once)
910
911 def to_stderr(self, message, only_once=False):
912 """Print message to stderr"""
913 assert isinstance(message, str)
914 if self.params.get('logger'):
915 self.params['logger'].error(message)
916 else:
917 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
918
919 def _send_console_code(self, code):
920 if compat_os_name == 'nt' or not self._out_files.console:
921 return
922 self._write_string(code, self._out_files.console)
923
924 def to_console_title(self, message):
925 if not self.params.get('consoletitle', False):
926 return
927 message = remove_terminal_sequences(message)
928 if compat_os_name == 'nt':
929 if ctypes.windll.kernel32.GetConsoleWindow():
930 # c_wchar_p() might not be necessary if `message` is
931 # already of type unicode()
932 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
933 else:
934 self._send_console_code(f'\033]0;{message}\007')
935
936 def save_console_title(self):
937 if not self.params.get('consoletitle') or self.params.get('simulate'):
938 return
939 self._send_console_code('\033[22;0t') # Save the title on stack
940
941 def restore_console_title(self):
942 if not self.params.get('consoletitle') or self.params.get('simulate'):
943 return
944 self._send_console_code('\033[23;0t') # Restore the title from stack
945
946 def __enter__(self):
947 self.save_console_title()
948 return self
949
950 def save_cookies(self):
951 if self.params.get('cookiefile') is not None:
952 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
953
954 def __exit__(self, *args):
955 self.restore_console_title()
956 self.close()
957
958 def close(self):
959 self.save_cookies()
960 self._request_director.close()
961
962 def trouble(self, message=None, tb=None, is_error=True):
963 """Determine action to take when a download problem appears.
964
965 Depending on if the downloader has been configured to ignore
966 download errors or not, this method may throw an exception or
967 not when errors are found, after printing the message.
968
969 @param tb If given, is additional traceback information
970 @param is_error Whether to raise error according to ignorerrors
971 """
972 if message is not None:
973 self.to_stderr(message)
974 if self.params.get('verbose'):
975 if tb is None:
976 if sys.exc_info()[0]: # if .trouble has been called from an except block
977 tb = ''
978 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
979 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
980 tb += encode_compat_str(traceback.format_exc())
981 else:
982 tb_data = traceback.format_list(traceback.extract_stack())
983 tb = ''.join(tb_data)
984 if tb:
985 self.to_stderr(tb)
986 if not is_error:
987 return
988 if not self.params.get('ignoreerrors'):
989 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
990 exc_info = sys.exc_info()[1].exc_info
991 else:
992 exc_info = sys.exc_info()
993 raise DownloadError(message, exc_info)
994 self._download_retcode = 1
995
996 Styles = Namespace(
997 HEADERS='yellow',
998 EMPHASIS='light blue',
999 FILENAME='green',
1000 ID='green',
1001 DELIM='blue',
1002 ERROR='red',
1003 BAD_FORMAT='light red',
1004 WARNING='yellow',
1005 SUPPRESS='light black',
1006 )
1007
1008 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1009 text = str(text)
1010 if test_encoding:
1011 original_text = text
1012 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1013 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1014 text = text.encode(encoding, 'ignore').decode(encoding)
1015 if fallback is not None and text != original_text:
1016 text = fallback
1017 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1018
1019 def _format_out(self, *args, **kwargs):
1020 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1021
1022 def _format_screen(self, *args, **kwargs):
1023 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1024
1025 def _format_err(self, *args, **kwargs):
1026 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1027
1028 def report_warning(self, message, only_once=False):
1029 '''
1030 Print the message to stderr, it will be prefixed with 'WARNING:'
1031 If stderr is a tty file the 'WARNING:' will be colored
1032 '''
1033 if self.params.get('logger') is not None:
1034 self.params['logger'].warning(message)
1035 else:
1036 if self.params.get('no_warnings'):
1037 return
1038 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1039
1040 def deprecation_warning(self, message, *, stacklevel=0):
1041 deprecation_warning(
1042 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1043
1044 def deprecated_feature(self, message):
1045 if self.params.get('logger') is not None:
1046 self.params['logger'].warning(f'Deprecated Feature: {message}')
1047 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1048
1049 def report_error(self, message, *args, **kwargs):
1050 '''
1051 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1052 in red if stderr is a tty file.
1053 '''
1054 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1055
1056 def write_debug(self, message, only_once=False):
1057 '''Log debug message or Print message to stderr'''
1058 if not self.params.get('verbose', False):
1059 return
1060 message = f'[debug] {message}'
1061 if self.params.get('logger'):
1062 self.params['logger'].debug(message)
1063 else:
1064 self.to_stderr(message, only_once)
1065
1066 def report_file_already_downloaded(self, file_name):
1067 """Report file has already been fully downloaded."""
1068 try:
1069 self.to_screen('[download] %s has already been downloaded' % file_name)
1070 except UnicodeEncodeError:
1071 self.to_screen('[download] The file has already been downloaded')
1072
1073 def report_file_delete(self, file_name):
1074 """Report that existing file will be deleted."""
1075 try:
1076 self.to_screen('Deleting existing file %s' % file_name)
1077 except UnicodeEncodeError:
1078 self.to_screen('Deleting existing file')
1079
1080 def raise_no_formats(self, info, forced=False, *, msg=None):
1081 has_drm = info.get('_has_drm')
1082 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1083 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1084 if forced or not ignored:
1085 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1086 expected=has_drm or ignored or expected)
1087 else:
1088 self.report_warning(msg)
1089
1090 def parse_outtmpl(self):
1091 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1092 self._parse_outtmpl()
1093 return self.params['outtmpl']
1094
1095 def _parse_outtmpl(self):
1096 sanitize = IDENTITY
1097 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1098 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1099
1100 outtmpl = self.params.setdefault('outtmpl', {})
1101 if not isinstance(outtmpl, dict):
1102 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1103 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1104
1105 def get_output_path(self, dir_type='', filename=None):
1106 paths = self.params.get('paths', {})
1107 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1108 path = os.path.join(
1109 expand_path(paths.get('home', '').strip()),
1110 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1111 filename or '')
1112 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1113
1114 @staticmethod
1115 def _outtmpl_expandpath(outtmpl):
1116 # expand_path translates '%%' into '%' and '$$' into '$'
1117 # correspondingly that is not what we want since we need to keep
1118 # '%%' intact for template dict substitution step. Working around
1119 # with boundary-alike separator hack.
1120 sep = ''.join(random.choices(string.ascii_letters, k=32))
1121 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1122
1123 # outtmpl should be expand_path'ed before template dict substitution
1124 # because meta fields may contain env variables we don't want to
1125 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1126 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1127 return expand_path(outtmpl).replace(sep, '')
1128
1129 @staticmethod
1130 def escape_outtmpl(outtmpl):
1131 ''' Escape any remaining strings like %s, %abc% etc. '''
1132 return re.sub(
1133 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1134 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1135 outtmpl)
1136
1137 @classmethod
1138 def validate_outtmpl(cls, outtmpl):
1139 ''' @return None or Exception object '''
1140 outtmpl = re.sub(
1141 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1142 lambda mobj: f'{mobj.group(0)[:-1]}s',
1143 cls._outtmpl_expandpath(outtmpl))
1144 try:
1145 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1146 return None
1147 except ValueError as err:
1148 return err
1149
1150 @staticmethod
1151 def _copy_infodict(info_dict):
1152 info_dict = dict(info_dict)
1153 info_dict.pop('__postprocessors', None)
1154 info_dict.pop('__pending_error', None)
1155 return info_dict
1156
1157 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1158 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1159 @param sanitize Whether to sanitize the output as a filename.
1160 For backward compatibility, a function can also be passed
1161 """
1162
1163 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1164
1165 info_dict = self._copy_infodict(info_dict)
1166 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1167 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1168 if info_dict.get('duration', None) is not None
1169 else None)
1170 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1171 info_dict['video_autonumber'] = self._num_videos
1172 if info_dict.get('resolution') is None:
1173 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1174
1175 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1176 # of %(field)s to %(field)0Nd for backward compatibility
1177 field_size_compat_map = {
1178 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1179 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1180 'autonumber': self.params.get('autonumber_size') or 5,
1181 }
1182
1183 TMPL_DICT = {}
1184 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1185 MATH_FUNCTIONS = {
1186 '+': float.__add__,
1187 '-': float.__sub__,
1188 }
1189 # Field is of the form key1.key2...
1190 # where keys (except first) can be string, int, slice or "{field, ...}"
1191 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1192 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1193 'inner': FIELD_INNER_RE,
1194 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1195 }
1196 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1197 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1198 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1199 (?P<negate>-)?
1200 (?P<fields>{FIELD_RE})
1201 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1202 (?:>(?P<strf_format>.+?))?
1203 (?P<remaining>
1204 (?P<alternate>(?<!\\),[^|&)]+)?
1205 (?:&(?P<replacement>.*?))?
1206 (?:\|(?P<default>.*?))?
1207 )$''')
1208
1209 def _traverse_infodict(fields):
1210 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1211 for f in ([x] if x.startswith('{') else x.split('.'))]
1212 for i in (0, -1):
1213 if fields and not fields[i]:
1214 fields.pop(i)
1215
1216 for i, f in enumerate(fields):
1217 if not f.startswith('{'):
1218 continue
1219 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1220 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1221
1222 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1223
1224 def get_value(mdict):
1225 # Object traversal
1226 value = _traverse_infodict(mdict['fields'])
1227 # Negative
1228 if mdict['negate']:
1229 value = float_or_none(value)
1230 if value is not None:
1231 value *= -1
1232 # Do maths
1233 offset_key = mdict['maths']
1234 if offset_key:
1235 value = float_or_none(value)
1236 operator = None
1237 while offset_key:
1238 item = re.match(
1239 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1240 offset_key).group(0)
1241 offset_key = offset_key[len(item):]
1242 if operator is None:
1243 operator = MATH_FUNCTIONS[item]
1244 continue
1245 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1246 offset = float_or_none(item)
1247 if offset is None:
1248 offset = float_or_none(_traverse_infodict(item))
1249 try:
1250 value = operator(value, multiplier * offset)
1251 except (TypeError, ZeroDivisionError):
1252 return None
1253 operator = None
1254 # Datetime formatting
1255 if mdict['strf_format']:
1256 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1257
1258 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1259 if sanitize and value == '':
1260 value = None
1261 return value
1262
1263 na = self.params.get('outtmpl_na_placeholder', 'NA')
1264
1265 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1266 return sanitize_filename(str(value), restricted=restricted, is_id=(
1267 bool(re.search(r'(^|[_.])id(\.|$)', key))
1268 if 'filename-sanitization' in self.params['compat_opts']
1269 else NO_DEFAULT))
1270
1271 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1272 sanitize = bool(sanitize)
1273
1274 def _dumpjson_default(obj):
1275 if isinstance(obj, (set, LazyList)):
1276 return list(obj)
1277 return repr(obj)
1278
1279 class _ReplacementFormatter(string.Formatter):
1280 def get_field(self, field_name, args, kwargs):
1281 if field_name.isdigit():
1282 return args[0], -1
1283 raise ValueError('Unsupported field')
1284
1285 replacement_formatter = _ReplacementFormatter()
1286
1287 def create_key(outer_mobj):
1288 if not outer_mobj.group('has_key'):
1289 return outer_mobj.group(0)
1290 key = outer_mobj.group('key')
1291 mobj = re.match(INTERNAL_FORMAT_RE, key)
1292 value, replacement, default, last_field = None, None, na, ''
1293 while mobj:
1294 mobj = mobj.groupdict()
1295 default = mobj['default'] if mobj['default'] is not None else default
1296 value = get_value(mobj)
1297 last_field, replacement = mobj['fields'], mobj['replacement']
1298 if value is None and mobj['alternate']:
1299 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1300 else:
1301 break
1302
1303 fmt = outer_mobj.group('format')
1304 if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1305 fmt = f'0{field_size_compat_map[last_field]:d}d'
1306
1307 if None not in (value, replacement):
1308 try:
1309 value = replacement_formatter.format(replacement, value)
1310 except ValueError:
1311 value, default = None, na
1312
1313 flags = outer_mobj.group('conversion') or ''
1314 str_fmt = f'{fmt[:-1]}s'
1315 if value is None:
1316 value, fmt = default, 's'
1317 elif fmt[-1] == 'l': # list
1318 delim = '\n' if '#' in flags else ', '
1319 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1320 elif fmt[-1] == 'j': # json
1321 value, fmt = json.dumps(
1322 value, default=_dumpjson_default,
1323 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1324 elif fmt[-1] == 'h': # html
1325 value, fmt = escapeHTML(str(value)), str_fmt
1326 elif fmt[-1] == 'q': # quoted
1327 value = map(str, variadic(value) if '#' in flags else [value])
1328 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1329 elif fmt[-1] == 'B': # bytes
1330 value = f'%{str_fmt}'.encode() % str(value).encode()
1331 value, fmt = value.decode('utf-8', 'ignore'), 's'
1332 elif fmt[-1] == 'U': # unicode normalized
1333 value, fmt = unicodedata.normalize(
1334 # "+" = compatibility equivalence, "#" = NFD
1335 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1336 value), str_fmt
1337 elif fmt[-1] == 'D': # decimal suffix
1338 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1339 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1340 factor=1024 if '#' in flags else 1000)
1341 elif fmt[-1] == 'S': # filename sanitization
1342 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1343 elif fmt[-1] == 'c':
1344 if value:
1345 value = str(value)[0]
1346 else:
1347 fmt = str_fmt
1348 elif fmt[-1] not in 'rsa': # numeric
1349 value = float_or_none(value)
1350 if value is None:
1351 value, fmt = default, 's'
1352
1353 if sanitize:
1354 # If value is an object, sanitize might convert it to a string
1355 # So we convert it to repr first
1356 if fmt[-1] == 'r':
1357 value, fmt = repr(value), str_fmt
1358 elif fmt[-1] == 'a':
1359 value, fmt = ascii(value), str_fmt
1360 if fmt[-1] in 'csra':
1361 value = sanitizer(last_field, value)
1362
1363 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1364 TMPL_DICT[key] = value
1365 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1366
1367 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1368
1369 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1370 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1371 return self.escape_outtmpl(outtmpl) % info_dict
1372
1373 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1374 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1375 if outtmpl is None:
1376 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1377 try:
1378 outtmpl = self._outtmpl_expandpath(outtmpl)
1379 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1380 if not filename:
1381 return None
1382
1383 if tmpl_type in ('', 'temp'):
1384 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1385 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1386 filename = replace_extension(filename, ext, final_ext)
1387 elif tmpl_type:
1388 force_ext = OUTTMPL_TYPES[tmpl_type]
1389 if force_ext:
1390 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1391
1392 # https://github.com/blackjack4494/youtube-dlc/issues/85
1393 trim_file_name = self.params.get('trim_file_name', False)
1394 if trim_file_name:
1395 no_ext, *ext = filename.rsplit('.', 2)
1396 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1397
1398 return filename
1399 except ValueError as err:
1400 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1401 return None
1402
1403 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1404 """Generate the output filename"""
1405 if outtmpl:
1406 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1407 dir_type = None
1408 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1409 if not filename and dir_type not in ('', 'temp'):
1410 return ''
1411
1412 if warn:
1413 if not self.params.get('paths'):
1414 pass
1415 elif filename == '-':
1416 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1417 elif os.path.isabs(filename):
1418 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1419 if filename == '-' or not filename:
1420 return filename
1421
1422 return self.get_output_path(dir_type, filename)
1423
1424 def _match_entry(self, info_dict, incomplete=False, silent=False):
1425 """Returns None if the file should be downloaded"""
1426 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1427 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1428
1429 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1430
1431 def check_filter():
1432 if _type in ('playlist', 'multi_video'):
1433 return
1434 elif _type in ('url', 'url_transparent') and not try_call(
1435 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1436 return
1437
1438 if 'title' in info_dict:
1439 # This can happen when we're just evaluating the playlist
1440 title = info_dict['title']
1441 matchtitle = self.params.get('matchtitle', False)
1442 if matchtitle:
1443 if not re.search(matchtitle, title, re.IGNORECASE):
1444 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1445 rejecttitle = self.params.get('rejecttitle', False)
1446 if rejecttitle:
1447 if re.search(rejecttitle, title, re.IGNORECASE):
1448 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1449
1450 date = info_dict.get('upload_date')
1451 if date is not None:
1452 dateRange = self.params.get('daterange', DateRange())
1453 if date not in dateRange:
1454 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1455 view_count = info_dict.get('view_count')
1456 if view_count is not None:
1457 min_views = self.params.get('min_views')
1458 if min_views is not None and view_count < min_views:
1459 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1460 max_views = self.params.get('max_views')
1461 if max_views is not None and view_count > max_views:
1462 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1463 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1464 return 'Skipping "%s" because it is age restricted' % video_title
1465
1466 match_filter = self.params.get('match_filter')
1467 if match_filter is None:
1468 return None
1469
1470 cancelled = None
1471 try:
1472 try:
1473 ret = match_filter(info_dict, incomplete=incomplete)
1474 except TypeError:
1475 # For backward compatibility
1476 ret = None if incomplete else match_filter(info_dict)
1477 except DownloadCancelled as err:
1478 if err.msg is not NO_DEFAULT:
1479 raise
1480 ret, cancelled = err.msg, err
1481
1482 if ret is NO_DEFAULT:
1483 while True:
1484 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1485 reply = input(self._format_screen(
1486 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1487 if reply in {'y', ''}:
1488 return None
1489 elif reply == 'n':
1490 if cancelled:
1491 raise type(cancelled)(f'Skipping {video_title}')
1492 return f'Skipping {video_title}'
1493 return ret
1494
1495 if self.in_download_archive(info_dict):
1496 reason = '%s has already been recorded in the archive' % video_title
1497 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1498 else:
1499 try:
1500 reason = check_filter()
1501 except DownloadCancelled as e:
1502 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1503 else:
1504 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1505 if reason is not None:
1506 if not silent:
1507 self.to_screen('[download] ' + reason)
1508 if self.params.get(break_opt, False):
1509 raise break_err()
1510 return reason
1511
1512 @staticmethod
1513 def add_extra_info(info_dict, extra_info):
1514 '''Set the keys from extra_info in info dict if they are missing'''
1515 for key, value in extra_info.items():
1516 info_dict.setdefault(key, value)
1517
1518 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1519 process=True, force_generic_extractor=False):
1520 """
1521 Extract and return the information dictionary of the URL
1522
1523 Arguments:
1524 @param url URL to extract
1525
1526 Keyword arguments:
1527 @param download Whether to download videos
1528 @param process Whether to resolve all unresolved references (URLs, playlist items).
1529 Must be True for download to work
1530 @param ie_key Use only the extractor with this key
1531
1532 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1533 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1534 """
1535
1536 if extra_info is None:
1537 extra_info = {}
1538
1539 if not ie_key and force_generic_extractor:
1540 ie_key = 'Generic'
1541
1542 if ie_key:
1543 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1544 else:
1545 ies = self._ies
1546
1547 for key, ie in ies.items():
1548 if not ie.suitable(url):
1549 continue
1550
1551 if not ie.working():
1552 self.report_warning('The program functionality for this site has been marked as broken, '
1553 'and will probably not work.')
1554
1555 temp_id = ie.get_temp_id(url)
1556 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1557 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1558 if self.params.get('break_on_existing', False):
1559 raise ExistingVideoReached()
1560 break
1561 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1562 else:
1563 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1564 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1565 tb=False if extractors_restricted else None)
1566
1567 def _handle_extraction_exceptions(func):
1568 @functools.wraps(func)
1569 def wrapper(self, *args, **kwargs):
1570 while True:
1571 try:
1572 return func(self, *args, **kwargs)
1573 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1574 raise
1575 except ReExtractInfo as e:
1576 if e.expected:
1577 self.to_screen(f'{e}; Re-extracting data')
1578 else:
1579 self.to_stderr('\r')
1580 self.report_warning(f'{e}; Re-extracting data')
1581 continue
1582 except GeoRestrictedError as e:
1583 msg = e.msg
1584 if e.countries:
1585 msg += '\nThis video is available in %s.' % ', '.join(
1586 map(ISO3166Utils.short2full, e.countries))
1587 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1588 self.report_error(msg)
1589 except ExtractorError as e: # An error we somewhat expected
1590 self.report_error(str(e), e.format_traceback())
1591 except Exception as e:
1592 if self.params.get('ignoreerrors'):
1593 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1594 else:
1595 raise
1596 break
1597 return wrapper
1598
1599 def _wait_for_video(self, ie_result={}):
1600 if (not self.params.get('wait_for_video')
1601 or ie_result.get('_type', 'video') != 'video'
1602 or ie_result.get('formats') or ie_result.get('url')):
1603 return
1604
1605 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1606 last_msg = ''
1607
1608 def progress(msg):
1609 nonlocal last_msg
1610 full_msg = f'{msg}\n'
1611 if not self.params.get('noprogress'):
1612 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1613 elif last_msg:
1614 return
1615 self.to_screen(full_msg, skip_eol=True)
1616 last_msg = msg
1617
1618 min_wait, max_wait = self.params.get('wait_for_video')
1619 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1620 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1621 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1622 self.report_warning('Release time of video is not known')
1623 elif ie_result and (diff or 0) <= 0:
1624 self.report_warning('Video should already be available according to extracted info')
1625 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1626 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1627
1628 wait_till = time.time() + diff
1629 try:
1630 while True:
1631 diff = wait_till - time.time()
1632 if diff <= 0:
1633 progress('')
1634 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1635 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1636 time.sleep(1)
1637 except KeyboardInterrupt:
1638 progress('')
1639 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1640 except BaseException as e:
1641 if not isinstance(e, ReExtractInfo):
1642 self.to_screen('')
1643 raise
1644
1645 def _load_cookies(self, data, *, from_headers=True):
1646 """Loads cookies from a `Cookie` header
1647
1648 This tries to work around the security vulnerability of passing cookies to every domain.
1649 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1650 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1651
1652 @param data The Cookie header as string to load the cookies from
1653 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1654 """
1655 for cookie in LenientSimpleCookie(data).values():
1656 if from_headers and any(cookie.values()):
1657 raise ValueError('Invalid syntax in Cookie Header')
1658
1659 domain = cookie.get('domain') or ''
1660 expiry = cookie.get('expires')
1661 if expiry == '': # 0 is valid
1662 expiry = None
1663 prepared_cookie = http.cookiejar.Cookie(
1664 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1665 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1666 cookie.get('secure') or False, expiry, False, None, None, {})
1667
1668 if domain:
1669 self.cookiejar.set_cookie(prepared_cookie)
1670 elif from_headers:
1671 self.deprecated_feature(
1672 'Passing cookies as a header is a potential security risk; '
1673 'they will be scoped to the domain of the downloaded urls. '
1674 'Please consider loading cookies from a file or browser instead.')
1675 self.__header_cookies.append(prepared_cookie)
1676 else:
1677 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1678 tb=False, is_error=False)
1679
1680 def _apply_header_cookies(self, url):
1681 """Applies stray header cookies to the provided url
1682
1683 This loads header cookies and scopes them to the domain provided in `url`.
1684 While this is not ideal, it helps reduce the risk of them being sent
1685 to an unintended destination while mostly maintaining compatibility.
1686 """
1687 parsed = urllib.parse.urlparse(url)
1688 if not parsed.hostname:
1689 return
1690
1691 for cookie in map(copy.copy, self.__header_cookies):
1692 cookie.domain = f'.{parsed.hostname}'
1693 self.cookiejar.set_cookie(cookie)
1694
1695 @_handle_extraction_exceptions
1696 def __extract_info(self, url, ie, download, extra_info, process):
1697 self._apply_header_cookies(url)
1698
1699 try:
1700 ie_result = ie.extract(url)
1701 except UserNotLive as e:
1702 if process:
1703 if self.params.get('wait_for_video'):
1704 self.report_warning(e)
1705 self._wait_for_video()
1706 raise
1707 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1708 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1709 return
1710 if isinstance(ie_result, list):
1711 # Backwards compatibility: old IE result format
1712 ie_result = {
1713 '_type': 'compat_list',
1714 'entries': ie_result,
1715 }
1716 if extra_info.get('original_url'):
1717 ie_result.setdefault('original_url', extra_info['original_url'])
1718 self.add_default_extra_info(ie_result, ie, url)
1719 if process:
1720 self._wait_for_video(ie_result)
1721 return self.process_ie_result(ie_result, download, extra_info)
1722 else:
1723 return ie_result
1724
1725 def add_default_extra_info(self, ie_result, ie, url):
1726 if url is not None:
1727 self.add_extra_info(ie_result, {
1728 'webpage_url': url,
1729 'original_url': url,
1730 })
1731 webpage_url = ie_result.get('webpage_url')
1732 if webpage_url:
1733 self.add_extra_info(ie_result, {
1734 'webpage_url_basename': url_basename(webpage_url),
1735 'webpage_url_domain': get_domain(webpage_url),
1736 })
1737 if ie is not None:
1738 self.add_extra_info(ie_result, {
1739 'extractor': ie.IE_NAME,
1740 'extractor_key': ie.ie_key(),
1741 })
1742
1743 def process_ie_result(self, ie_result, download=True, extra_info=None):
1744 """
1745 Take the result of the ie(may be modified) and resolve all unresolved
1746 references (URLs, playlist items).
1747
1748 It will also download the videos if 'download'.
1749 Returns the resolved ie_result.
1750 """
1751 if extra_info is None:
1752 extra_info = {}
1753 result_type = ie_result.get('_type', 'video')
1754
1755 if result_type in ('url', 'url_transparent'):
1756 ie_result['url'] = sanitize_url(
1757 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1758 if ie_result.get('original_url') and not extra_info.get('original_url'):
1759 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1760
1761 extract_flat = self.params.get('extract_flat', False)
1762 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1763 or extract_flat is True):
1764 info_copy = ie_result.copy()
1765 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1766 if ie and not ie_result.get('id'):
1767 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1768 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1769 self.add_extra_info(info_copy, extra_info)
1770 info_copy, _ = self.pre_process(info_copy)
1771 self._fill_common_fields(info_copy, False)
1772 self.__forced_printings(info_copy)
1773 self._raise_pending_errors(info_copy)
1774 if self.params.get('force_write_download_archive', False):
1775 self.record_download_archive(info_copy)
1776 return ie_result
1777
1778 if result_type == 'video':
1779 self.add_extra_info(ie_result, extra_info)
1780 ie_result = self.process_video_result(ie_result, download=download)
1781 self._raise_pending_errors(ie_result)
1782 additional_urls = (ie_result or {}).get('additional_urls')
1783 if additional_urls:
1784 # TODO: Improve MetadataParserPP to allow setting a list
1785 if isinstance(additional_urls, str):
1786 additional_urls = [additional_urls]
1787 self.to_screen(
1788 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1789 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1790 ie_result['additional_entries'] = [
1791 self.extract_info(
1792 url, download, extra_info=extra_info,
1793 force_generic_extractor=self.params.get('force_generic_extractor'))
1794 for url in additional_urls
1795 ]
1796 return ie_result
1797 elif result_type == 'url':
1798 # We have to add extra_info to the results because it may be
1799 # contained in a playlist
1800 return self.extract_info(
1801 ie_result['url'], download,
1802 ie_key=ie_result.get('ie_key'),
1803 extra_info=extra_info)
1804 elif result_type == 'url_transparent':
1805 # Use the information from the embedding page
1806 info = self.extract_info(
1807 ie_result['url'], ie_key=ie_result.get('ie_key'),
1808 extra_info=extra_info, download=False, process=False)
1809
1810 # extract_info may return None when ignoreerrors is enabled and
1811 # extraction failed with an error, don't crash and return early
1812 # in this case
1813 if not info:
1814 return info
1815
1816 exempted_fields = {'_type', 'url', 'ie_key'}
1817 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1818 # For video clips, the id etc of the clip extractor should be used
1819 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1820
1821 new_result = info.copy()
1822 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1823
1824 # Extracted info may not be a video result (i.e.
1825 # info.get('_type', 'video') != video) but rather an url or
1826 # url_transparent. In such cases outer metadata (from ie_result)
1827 # should be propagated to inner one (info). For this to happen
1828 # _type of info should be overridden with url_transparent. This
1829 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1830 if new_result.get('_type') == 'url':
1831 new_result['_type'] = 'url_transparent'
1832
1833 return self.process_ie_result(
1834 new_result, download=download, extra_info=extra_info)
1835 elif result_type in ('playlist', 'multi_video'):
1836 # Protect from infinite recursion due to recursively nested playlists
1837 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1838 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1839 if webpage_url and webpage_url in self._playlist_urls:
1840 self.to_screen(
1841 '[download] Skipping already downloaded playlist: %s'
1842 % ie_result.get('title') or ie_result.get('id'))
1843 return
1844
1845 self._playlist_level += 1
1846 self._playlist_urls.add(webpage_url)
1847 self._fill_common_fields(ie_result, False)
1848 self._sanitize_thumbnails(ie_result)
1849 try:
1850 return self.__process_playlist(ie_result, download)
1851 finally:
1852 self._playlist_level -= 1
1853 if not self._playlist_level:
1854 self._playlist_urls.clear()
1855 elif result_type == 'compat_list':
1856 self.report_warning(
1857 'Extractor %s returned a compat_list result. '
1858 'It needs to be updated.' % ie_result.get('extractor'))
1859
1860 def _fixup(r):
1861 self.add_extra_info(r, {
1862 'extractor': ie_result['extractor'],
1863 'webpage_url': ie_result['webpage_url'],
1864 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1865 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1866 'extractor_key': ie_result['extractor_key'],
1867 })
1868 return r
1869 ie_result['entries'] = [
1870 self.process_ie_result(_fixup(r), download, extra_info)
1871 for r in ie_result['entries']
1872 ]
1873 return ie_result
1874 else:
1875 raise Exception('Invalid result type: %s' % result_type)
1876
1877 def _ensure_dir_exists(self, path):
1878 return make_dir(path, self.report_error)
1879
1880 @staticmethod
1881 def _playlist_infodict(ie_result, strict=False, **kwargs):
1882 info = {
1883 'playlist_count': ie_result.get('playlist_count'),
1884 'playlist': ie_result.get('title') or ie_result.get('id'),
1885 'playlist_id': ie_result.get('id'),
1886 'playlist_title': ie_result.get('title'),
1887 'playlist_uploader': ie_result.get('uploader'),
1888 'playlist_uploader_id': ie_result.get('uploader_id'),
1889 **kwargs,
1890 }
1891 if strict:
1892 return info
1893 if ie_result.get('webpage_url'):
1894 info.update({
1895 'webpage_url': ie_result['webpage_url'],
1896 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1897 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1898 })
1899 return {
1900 **info,
1901 'playlist_index': 0,
1902 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1903 'extractor': ie_result['extractor'],
1904 'extractor_key': ie_result['extractor_key'],
1905 }
1906
1907 def __process_playlist(self, ie_result, download):
1908 """Process each entry in the playlist"""
1909 assert ie_result['_type'] in ('playlist', 'multi_video')
1910
1911 common_info = self._playlist_infodict(ie_result, strict=True)
1912 title = common_info.get('playlist') or '<Untitled>'
1913 if self._match_entry(common_info, incomplete=True) is not None:
1914 return
1915 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1916
1917 all_entries = PlaylistEntries(self, ie_result)
1918 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1919
1920 lazy = self.params.get('lazy_playlist')
1921 if lazy:
1922 resolved_entries, n_entries = [], 'N/A'
1923 ie_result['requested_entries'], ie_result['entries'] = None, None
1924 else:
1925 entries = resolved_entries = list(entries)
1926 n_entries = len(resolved_entries)
1927 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1928 if not ie_result.get('playlist_count'):
1929 # Better to do this after potentially exhausting entries
1930 ie_result['playlist_count'] = all_entries.get_full_count()
1931
1932 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1933 ie_copy = collections.ChainMap(ie_result, extra)
1934
1935 _infojson_written = False
1936 write_playlist_files = self.params.get('allow_playlist_files', True)
1937 if write_playlist_files and self.params.get('list_thumbnails'):
1938 self.list_thumbnails(ie_result)
1939 if write_playlist_files and not self.params.get('simulate'):
1940 _infojson_written = self._write_info_json(
1941 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1942 if _infojson_written is None:
1943 return
1944 if self._write_description('playlist', ie_result,
1945 self.prepare_filename(ie_copy, 'pl_description')) is None:
1946 return
1947 # TODO: This should be passed to ThumbnailsConvertor if necessary
1948 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1949
1950 if lazy:
1951 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1952 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1953 elif self.params.get('playlistreverse'):
1954 entries.reverse()
1955 elif self.params.get('playlistrandom'):
1956 random.shuffle(entries)
1957
1958 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1959 f'{format_field(ie_result, "playlist_count", " of %s")}')
1960
1961 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1962 if self.params.get('extract_flat') == 'discard_in_playlist':
1963 keep_resolved_entries = ie_result['_type'] != 'playlist'
1964 if keep_resolved_entries:
1965 self.write_debug('The information of all playlist entries will be held in memory')
1966
1967 failures = 0
1968 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1969 for i, (playlist_index, entry) in enumerate(entries):
1970 if lazy:
1971 resolved_entries.append((playlist_index, entry))
1972 if not entry:
1973 continue
1974
1975 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1976 if not lazy and 'playlist-index' in self.params['compat_opts']:
1977 playlist_index = ie_result['requested_entries'][i]
1978
1979 entry_copy = collections.ChainMap(entry, {
1980 **common_info,
1981 'n_entries': int_or_none(n_entries),
1982 'playlist_index': playlist_index,
1983 'playlist_autonumber': i + 1,
1984 })
1985
1986 if self._match_entry(entry_copy, incomplete=True) is not None:
1987 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1988 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1989 continue
1990
1991 self.to_screen('[download] Downloading item %s of %s' % (
1992 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1993
1994 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1995 'playlist_index': playlist_index,
1996 'playlist_autonumber': i + 1,
1997 }, extra))
1998 if not entry_result:
1999 failures += 1
2000 if failures >= max_failures:
2001 self.report_error(
2002 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2003 break
2004 if keep_resolved_entries:
2005 resolved_entries[i] = (playlist_index, entry_result)
2006
2007 # Update with processed data
2008 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2009 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2010 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2011 # Do not set for full playlist
2012 ie_result.pop('requested_entries')
2013
2014 # Write the updated info to json
2015 if _infojson_written is True and self._write_info_json(
2016 'updated playlist', ie_result,
2017 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2018 return
2019
2020 ie_result = self.run_all_pps('playlist', ie_result)
2021 self.to_screen(f'[download] Finished downloading playlist: {title}')
2022 return ie_result
2023
2024 @_handle_extraction_exceptions
2025 def __process_iterable_entry(self, entry, download, extra_info):
2026 return self.process_ie_result(
2027 entry, download=download, extra_info=extra_info)
2028
2029 def _build_format_filter(self, filter_spec):
2030 " Returns a function to filter the formats according to the filter_spec "
2031
2032 OPERATORS = {
2033 '<': operator.lt,
2034 '<=': operator.le,
2035 '>': operator.gt,
2036 '>=': operator.ge,
2037 '=': operator.eq,
2038 '!=': operator.ne,
2039 }
2040 operator_rex = re.compile(r'''(?x)\s*
2041 (?P<key>[\w.-]+)\s*
2042 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2043 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2044 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2045 m = operator_rex.fullmatch(filter_spec)
2046 if m:
2047 try:
2048 comparison_value = int(m.group('value'))
2049 except ValueError:
2050 comparison_value = parse_filesize(m.group('value'))
2051 if comparison_value is None:
2052 comparison_value = parse_filesize(m.group('value') + 'B')
2053 if comparison_value is None:
2054 raise ValueError(
2055 'Invalid value %r in format specification %r' % (
2056 m.group('value'), filter_spec))
2057 op = OPERATORS[m.group('op')]
2058
2059 if not m:
2060 STR_OPERATORS = {
2061 '=': operator.eq,
2062 '^=': lambda attr, value: attr.startswith(value),
2063 '$=': lambda attr, value: attr.endswith(value),
2064 '*=': lambda attr, value: value in attr,
2065 '~=': lambda attr, value: value.search(attr) is not None
2066 }
2067 str_operator_rex = re.compile(r'''(?x)\s*
2068 (?P<key>[a-zA-Z0-9._-]+)\s*
2069 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2070 (?P<quote>["'])?
2071 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2072 (?(quote)(?P=quote))\s*
2073 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2074 m = str_operator_rex.fullmatch(filter_spec)
2075 if m:
2076 if m.group('op') == '~=':
2077 comparison_value = re.compile(m.group('value'))
2078 else:
2079 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2080 str_op = STR_OPERATORS[m.group('op')]
2081 if m.group('negation'):
2082 op = lambda attr, value: not str_op(attr, value)
2083 else:
2084 op = str_op
2085
2086 if not m:
2087 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2088
2089 def _filter(f):
2090 actual_value = f.get(m.group('key'))
2091 if actual_value is None:
2092 return m.group('none_inclusive')
2093 return op(actual_value, comparison_value)
2094 return _filter
2095
2096 def _check_formats(self, formats):
2097 for f in formats:
2098 self.to_screen('[info] Testing format %s' % f['format_id'])
2099 path = self.get_output_path('temp')
2100 if not self._ensure_dir_exists(f'{path}/'):
2101 continue
2102 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2103 temp_file.close()
2104 try:
2105 success, _ = self.dl(temp_file.name, f, test=True)
2106 except (DownloadError, OSError, ValueError) + network_exceptions:
2107 success = False
2108 finally:
2109 if os.path.exists(temp_file.name):
2110 try:
2111 os.remove(temp_file.name)
2112 except OSError:
2113 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2114 if success:
2115 yield f
2116 else:
2117 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2118
2119 def _default_format_spec(self, info_dict, download=True):
2120
2121 def can_merge():
2122 merger = FFmpegMergerPP(self)
2123 return merger.available and merger.can_merge()
2124
2125 prefer_best = (
2126 not self.params.get('simulate')
2127 and download
2128 and (
2129 not can_merge()
2130 or info_dict.get('is_live') and not self.params.get('live_from_start')
2131 or self.params['outtmpl']['default'] == '-'))
2132 compat = (
2133 prefer_best
2134 or self.params.get('allow_multiple_audio_streams', False)
2135 or 'format-spec' in self.params['compat_opts'])
2136
2137 return (
2138 'best/bestvideo+bestaudio' if prefer_best
2139 else 'bestvideo*+bestaudio/best' if not compat
2140 else 'bestvideo+bestaudio/best')
2141
2142 def build_format_selector(self, format_spec):
2143 def syntax_error(note, start):
2144 message = (
2145 'Invalid format specification: '
2146 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2147 return SyntaxError(message)
2148
2149 PICKFIRST = 'PICKFIRST'
2150 MERGE = 'MERGE'
2151 SINGLE = 'SINGLE'
2152 GROUP = 'GROUP'
2153 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2154
2155 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2156 'video': self.params.get('allow_multiple_video_streams', False)}
2157
2158 def _parse_filter(tokens):
2159 filter_parts = []
2160 for type, string_, start, _, _ in tokens:
2161 if type == tokenize.OP and string_ == ']':
2162 return ''.join(filter_parts)
2163 else:
2164 filter_parts.append(string_)
2165
2166 def _remove_unused_ops(tokens):
2167 # Remove operators that we don't use and join them with the surrounding strings.
2168 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2169 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2170 last_string, last_start, last_end, last_line = None, None, None, None
2171 for type, string_, start, end, line in tokens:
2172 if type == tokenize.OP and string_ == '[':
2173 if last_string:
2174 yield tokenize.NAME, last_string, last_start, last_end, last_line
2175 last_string = None
2176 yield type, string_, start, end, line
2177 # everything inside brackets will be handled by _parse_filter
2178 for type, string_, start, end, line in tokens:
2179 yield type, string_, start, end, line
2180 if type == tokenize.OP and string_ == ']':
2181 break
2182 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2183 if last_string:
2184 yield tokenize.NAME, last_string, last_start, last_end, last_line
2185 last_string = None
2186 yield type, string_, start, end, line
2187 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2188 if not last_string:
2189 last_string = string_
2190 last_start = start
2191 last_end = end
2192 else:
2193 last_string += string_
2194 if last_string:
2195 yield tokenize.NAME, last_string, last_start, last_end, last_line
2196
2197 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2198 selectors = []
2199 current_selector = None
2200 for type, string_, start, _, _ in tokens:
2201 # ENCODING is only defined in python 3.x
2202 if type == getattr(tokenize, 'ENCODING', None):
2203 continue
2204 elif type in [tokenize.NAME, tokenize.NUMBER]:
2205 current_selector = FormatSelector(SINGLE, string_, [])
2206 elif type == tokenize.OP:
2207 if string_ == ')':
2208 if not inside_group:
2209 # ')' will be handled by the parentheses group
2210 tokens.restore_last_token()
2211 break
2212 elif inside_merge and string_ in ['/', ',']:
2213 tokens.restore_last_token()
2214 break
2215 elif inside_choice and string_ == ',':
2216 tokens.restore_last_token()
2217 break
2218 elif string_ == ',':
2219 if not current_selector:
2220 raise syntax_error('"," must follow a format selector', start)
2221 selectors.append(current_selector)
2222 current_selector = None
2223 elif string_ == '/':
2224 if not current_selector:
2225 raise syntax_error('"/" must follow a format selector', start)
2226 first_choice = current_selector
2227 second_choice = _parse_format_selection(tokens, inside_choice=True)
2228 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2229 elif string_ == '[':
2230 if not current_selector:
2231 current_selector = FormatSelector(SINGLE, 'best', [])
2232 format_filter = _parse_filter(tokens)
2233 current_selector.filters.append(format_filter)
2234 elif string_ == '(':
2235 if current_selector:
2236 raise syntax_error('Unexpected "("', start)
2237 group = _parse_format_selection(tokens, inside_group=True)
2238 current_selector = FormatSelector(GROUP, group, [])
2239 elif string_ == '+':
2240 if not current_selector:
2241 raise syntax_error('Unexpected "+"', start)
2242 selector_1 = current_selector
2243 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2244 if not selector_2:
2245 raise syntax_error('Expected a selector', start)
2246 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2247 else:
2248 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2249 elif type == tokenize.ENDMARKER:
2250 break
2251 if current_selector:
2252 selectors.append(current_selector)
2253 return selectors
2254
2255 def _merge(formats_pair):
2256 format_1, format_2 = formats_pair
2257
2258 formats_info = []
2259 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2260 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2261
2262 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2263 get_no_more = {'video': False, 'audio': False}
2264 for (i, fmt_info) in enumerate(formats_info):
2265 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2266 formats_info.pop(i)
2267 continue
2268 for aud_vid in ['audio', 'video']:
2269 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2270 if get_no_more[aud_vid]:
2271 formats_info.pop(i)
2272 break
2273 get_no_more[aud_vid] = True
2274
2275 if len(formats_info) == 1:
2276 return formats_info[0]
2277
2278 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2279 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2280
2281 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2282 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2283
2284 output_ext = get_compatible_ext(
2285 vcodecs=[f.get('vcodec') for f in video_fmts],
2286 acodecs=[f.get('acodec') for f in audio_fmts],
2287 vexts=[f['ext'] for f in video_fmts],
2288 aexts=[f['ext'] for f in audio_fmts],
2289 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2290 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2291
2292 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2293
2294 new_dict = {
2295 'requested_formats': formats_info,
2296 'format': '+'.join(filtered('format')),
2297 'format_id': '+'.join(filtered('format_id')),
2298 'ext': output_ext,
2299 'protocol': '+'.join(map(determine_protocol, formats_info)),
2300 'language': '+'.join(orderedSet(filtered('language'))) or None,
2301 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2302 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2303 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2304 }
2305
2306 if the_only_video:
2307 new_dict.update({
2308 'width': the_only_video.get('width'),
2309 'height': the_only_video.get('height'),
2310 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2311 'fps': the_only_video.get('fps'),
2312 'dynamic_range': the_only_video.get('dynamic_range'),
2313 'vcodec': the_only_video.get('vcodec'),
2314 'vbr': the_only_video.get('vbr'),
2315 'stretched_ratio': the_only_video.get('stretched_ratio'),
2316 'aspect_ratio': the_only_video.get('aspect_ratio'),
2317 })
2318
2319 if the_only_audio:
2320 new_dict.update({
2321 'acodec': the_only_audio.get('acodec'),
2322 'abr': the_only_audio.get('abr'),
2323 'asr': the_only_audio.get('asr'),
2324 'audio_channels': the_only_audio.get('audio_channels')
2325 })
2326
2327 return new_dict
2328
2329 def _check_formats(formats):
2330 if (self.params.get('check_formats') is not None
2331 or self.params.get('allow_unplayable_formats')):
2332 yield from formats
2333 return
2334 elif self.params.get('check_formats') == 'selected':
2335 yield from self._check_formats(formats)
2336 return
2337
2338 for f in formats:
2339 if f.get('has_drm'):
2340 yield from self._check_formats([f])
2341 else:
2342 yield f
2343
2344 def _build_selector_function(selector):
2345 if isinstance(selector, list): # ,
2346 fs = [_build_selector_function(s) for s in selector]
2347
2348 def selector_function(ctx):
2349 for f in fs:
2350 yield from f(ctx)
2351 return selector_function
2352
2353 elif selector.type == GROUP: # ()
2354 selector_function = _build_selector_function(selector.selector)
2355
2356 elif selector.type == PICKFIRST: # /
2357 fs = [_build_selector_function(s) for s in selector.selector]
2358
2359 def selector_function(ctx):
2360 for f in fs:
2361 picked_formats = list(f(ctx))
2362 if picked_formats:
2363 return picked_formats
2364 return []
2365
2366 elif selector.type == MERGE: # +
2367 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2368
2369 def selector_function(ctx):
2370 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2371 yield _merge(pair)
2372
2373 elif selector.type == SINGLE: # atom
2374 format_spec = selector.selector or 'best'
2375
2376 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2377 if format_spec == 'all':
2378 def selector_function(ctx):
2379 yield from _check_formats(ctx['formats'][::-1])
2380 elif format_spec == 'mergeall':
2381 def selector_function(ctx):
2382 formats = list(_check_formats(
2383 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2384 if not formats:
2385 return
2386 merged_format = formats[-1]
2387 for f in formats[-2::-1]:
2388 merged_format = _merge((merged_format, f))
2389 yield merged_format
2390
2391 else:
2392 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2393 mobj = re.match(
2394 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2395 format_spec)
2396 if mobj is not None:
2397 format_idx = int_or_none(mobj.group('n'), default=1)
2398 format_reverse = mobj.group('bw')[0] == 'b'
2399 format_type = (mobj.group('type') or [None])[0]
2400 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2401 format_modified = mobj.group('mod') is not None
2402
2403 format_fallback = not format_type and not format_modified # for b, w
2404 _filter_f = (
2405 (lambda f: f.get('%scodec' % format_type) != 'none')
2406 if format_type and format_modified # bv*, ba*, wv*, wa*
2407 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2408 if format_type # bv, ba, wv, wa
2409 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2410 if not format_modified # b, w
2411 else lambda f: True) # b*, w*
2412 filter_f = lambda f: _filter_f(f) and (
2413 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2414 else:
2415 if format_spec in self._format_selection_exts['audio']:
2416 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2417 elif format_spec in self._format_selection_exts['video']:
2418 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2419 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2420 elif format_spec in self._format_selection_exts['storyboards']:
2421 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2422 else:
2423 filter_f = lambda f: f.get('format_id') == format_spec # id
2424
2425 def selector_function(ctx):
2426 formats = list(ctx['formats'])
2427 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2428 if not matches:
2429 if format_fallback and ctx['incomplete_formats']:
2430 # for extractors with incomplete formats (audio only (soundcloud)
2431 # or video only (imgur)) best/worst will fallback to
2432 # best/worst {video,audio}-only format
2433 matches = formats
2434 elif seperate_fallback and not ctx['has_merged_format']:
2435 # for compatibility with youtube-dl when there is no pre-merged format
2436 matches = list(filter(seperate_fallback, formats))
2437 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2438 try:
2439 yield matches[format_idx - 1]
2440 except LazyList.IndexError:
2441 return
2442
2443 filters = [self._build_format_filter(f) for f in selector.filters]
2444
2445 def final_selector(ctx):
2446 ctx_copy = dict(ctx)
2447 for _filter in filters:
2448 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2449 return selector_function(ctx_copy)
2450 return final_selector
2451
2452 stream = io.BytesIO(format_spec.encode())
2453 try:
2454 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2455 except tokenize.TokenError:
2456 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2457
2458 class TokenIterator:
2459 def __init__(self, tokens):
2460 self.tokens = tokens
2461 self.counter = 0
2462
2463 def __iter__(self):
2464 return self
2465
2466 def __next__(self):
2467 if self.counter >= len(self.tokens):
2468 raise StopIteration()
2469 value = self.tokens[self.counter]
2470 self.counter += 1
2471 return value
2472
2473 next = __next__
2474
2475 def restore_last_token(self):
2476 self.counter -= 1
2477
2478 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2479 return _build_selector_function(parsed_selector)
2480
2481 def _calc_headers(self, info_dict):
2482 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2483 clean_headers(res)
2484 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2485 if cookies:
2486 encoder = LenientSimpleCookie()
2487 values = []
2488 for cookie in cookies:
2489 _, value = encoder.value_encode(cookie.value)
2490 values.append(f'{cookie.name}={value}')
2491 if cookie.domain:
2492 values.append(f'Domain={cookie.domain}')
2493 if cookie.path:
2494 values.append(f'Path={cookie.path}')
2495 if cookie.secure:
2496 values.append('Secure')
2497 if cookie.expires:
2498 values.append(f'Expires={cookie.expires}')
2499 if cookie.version:
2500 values.append(f'Version={cookie.version}')
2501 info_dict['cookies'] = '; '.join(values)
2502
2503 if 'X-Forwarded-For' not in res:
2504 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2505 if x_forwarded_for_ip:
2506 res['X-Forwarded-For'] = x_forwarded_for_ip
2507
2508 return res
2509
2510 def _calc_cookies(self, url):
2511 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2512 return self.cookiejar.get_cookie_header(url)
2513
2514 def _sort_thumbnails(self, thumbnails):
2515 thumbnails.sort(key=lambda t: (
2516 t.get('preference') if t.get('preference') is not None else -1,
2517 t.get('width') if t.get('width') is not None else -1,
2518 t.get('height') if t.get('height') is not None else -1,
2519 t.get('id') if t.get('id') is not None else '',
2520 t.get('url')))
2521
2522 def _sanitize_thumbnails(self, info_dict):
2523 thumbnails = info_dict.get('thumbnails')
2524 if thumbnails is None:
2525 thumbnail = info_dict.get('thumbnail')
2526 if thumbnail:
2527 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2528 if not thumbnails:
2529 return
2530
2531 def check_thumbnails(thumbnails):
2532 for t in thumbnails:
2533 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2534 try:
2535 self.urlopen(HEADRequest(t['url']))
2536 except network_exceptions as err:
2537 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2538 continue
2539 yield t
2540
2541 self._sort_thumbnails(thumbnails)
2542 for i, t in enumerate(thumbnails):
2543 if t.get('id') is None:
2544 t['id'] = '%d' % i
2545 if t.get('width') and t.get('height'):
2546 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2547 t['url'] = sanitize_url(t['url'])
2548
2549 if self.params.get('check_formats') is True:
2550 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2551 else:
2552 info_dict['thumbnails'] = thumbnails
2553
2554 def _fill_common_fields(self, info_dict, final=True):
2555 # TODO: move sanitization here
2556 if final:
2557 title = info_dict['fulltitle'] = info_dict.get('title')
2558 if not title:
2559 if title == '':
2560 self.write_debug('Extractor gave empty title. Creating a generic title')
2561 else:
2562 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2563 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2564
2565 if info_dict.get('duration') is not None:
2566 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2567
2568 for ts_key, date_key in (
2569 ('timestamp', 'upload_date'),
2570 ('release_timestamp', 'release_date'),
2571 ('modified_timestamp', 'modified_date'),
2572 ):
2573 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2574 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2575 # see http://bugs.python.org/issue1646728)
2576 with contextlib.suppress(ValueError, OverflowError, OSError):
2577 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2578 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2579
2580 live_keys = ('is_live', 'was_live')
2581 live_status = info_dict.get('live_status')
2582 if live_status is None:
2583 for key in live_keys:
2584 if info_dict.get(key) is False:
2585 continue
2586 if info_dict.get(key):
2587 live_status = key
2588 break
2589 if all(info_dict.get(key) is False for key in live_keys):
2590 live_status = 'not_live'
2591 if live_status:
2592 info_dict['live_status'] = live_status
2593 for key in live_keys:
2594 if info_dict.get(key) is None:
2595 info_dict[key] = (live_status == key)
2596 if live_status == 'post_live':
2597 info_dict['was_live'] = True
2598
2599 # Auto generate title fields corresponding to the *_number fields when missing
2600 # in order to always have clean titles. This is very common for TV series.
2601 for field in ('chapter', 'season', 'episode'):
2602 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2603 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2604
2605 def _raise_pending_errors(self, info):
2606 err = info.pop('__pending_error', None)
2607 if err:
2608 self.report_error(err, tb=False)
2609
2610 def sort_formats(self, info_dict):
2611 formats = self._get_formats(info_dict)
2612 formats.sort(key=FormatSorter(
2613 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2614
2615 def process_video_result(self, info_dict, download=True):
2616 assert info_dict.get('_type', 'video') == 'video'
2617 self._num_videos += 1
2618
2619 if 'id' not in info_dict:
2620 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2621 elif not info_dict.get('id'):
2622 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2623
2624 def report_force_conversion(field, field_not, conversion):
2625 self.report_warning(
2626 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2627 % (field, field_not, conversion))
2628
2629 def sanitize_string_field(info, string_field):
2630 field = info.get(string_field)
2631 if field is None or isinstance(field, str):
2632 return
2633 report_force_conversion(string_field, 'a string', 'string')
2634 info[string_field] = str(field)
2635
2636 def sanitize_numeric_fields(info):
2637 for numeric_field in self._NUMERIC_FIELDS:
2638 field = info.get(numeric_field)
2639 if field is None or isinstance(field, (int, float)):
2640 continue
2641 report_force_conversion(numeric_field, 'numeric', 'int')
2642 info[numeric_field] = int_or_none(field)
2643
2644 sanitize_string_field(info_dict, 'id')
2645 sanitize_numeric_fields(info_dict)
2646 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2647 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2648 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2649 self.report_warning('"duration" field is negative, there is an error in extractor')
2650
2651 chapters = info_dict.get('chapters') or []
2652 if chapters and chapters[0].get('start_time'):
2653 chapters.insert(0, {'start_time': 0})
2654
2655 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2656 for idx, (prev, current, next_) in enumerate(zip(
2657 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2658 if current.get('start_time') is None:
2659 current['start_time'] = prev.get('end_time')
2660 if not current.get('end_time'):
2661 current['end_time'] = next_.get('start_time')
2662 if not current.get('title'):
2663 current['title'] = f'<Untitled Chapter {idx}>'
2664
2665 if 'playlist' not in info_dict:
2666 # It isn't part of a playlist
2667 info_dict['playlist'] = None
2668 info_dict['playlist_index'] = None
2669
2670 self._sanitize_thumbnails(info_dict)
2671
2672 thumbnail = info_dict.get('thumbnail')
2673 thumbnails = info_dict.get('thumbnails')
2674 if thumbnail:
2675 info_dict['thumbnail'] = sanitize_url(thumbnail)
2676 elif thumbnails:
2677 info_dict['thumbnail'] = thumbnails[-1]['url']
2678
2679 if info_dict.get('display_id') is None and 'id' in info_dict:
2680 info_dict['display_id'] = info_dict['id']
2681
2682 self._fill_common_fields(info_dict)
2683
2684 for cc_kind in ('subtitles', 'automatic_captions'):
2685 cc = info_dict.get(cc_kind)
2686 if cc:
2687 for _, subtitle in cc.items():
2688 for subtitle_format in subtitle:
2689 if subtitle_format.get('url'):
2690 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2691 if subtitle_format.get('ext') is None:
2692 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2693
2694 automatic_captions = info_dict.get('automatic_captions')
2695 subtitles = info_dict.get('subtitles')
2696
2697 info_dict['requested_subtitles'] = self.process_subtitles(
2698 info_dict['id'], subtitles, automatic_captions)
2699
2700 formats = self._get_formats(info_dict)
2701
2702 # Backward compatibility with InfoExtractor._sort_formats
2703 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2704 if field_preference:
2705 info_dict['_format_sort_fields'] = field_preference
2706
2707 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2708 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2709 if not self.params.get('allow_unplayable_formats'):
2710 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2711
2712 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2713 self.report_warning(
2714 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2715 'only images are available for download. Use --list-formats to see them'.capitalize())
2716
2717 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2718 if not get_from_start:
2719 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2720 if info_dict.get('is_live') and formats:
2721 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2722 if get_from_start and not formats:
2723 self.raise_no_formats(info_dict, msg=(
2724 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2725 'If you want to download from the current time, use --no-live-from-start'))
2726
2727 def is_wellformed(f):
2728 url = f.get('url')
2729 if not url:
2730 self.report_warning(
2731 '"url" field is missing or empty - skipping format, '
2732 'there is an error in extractor')
2733 return False
2734 if isinstance(url, bytes):
2735 sanitize_string_field(f, 'url')
2736 return True
2737
2738 # Filter out malformed formats for better extraction robustness
2739 formats = list(filter(is_wellformed, formats or []))
2740
2741 if not formats:
2742 self.raise_no_formats(info_dict)
2743
2744 for format in formats:
2745 sanitize_string_field(format, 'format_id')
2746 sanitize_numeric_fields(format)
2747 format['url'] = sanitize_url(format['url'])
2748 if format.get('ext') is None:
2749 format['ext'] = determine_ext(format['url']).lower()
2750 if format.get('protocol') is None:
2751 format['protocol'] = determine_protocol(format)
2752 if format.get('resolution') is None:
2753 format['resolution'] = self.format_resolution(format, default=None)
2754 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2755 format['dynamic_range'] = 'SDR'
2756 if format.get('aspect_ratio') is None:
2757 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2758 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2759 and info_dict.get('duration') and format.get('tbr')
2760 and not format.get('filesize') and not format.get('filesize_approx')):
2761 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2762 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2763
2764 # This is copied to http_headers by the above _calc_headers and can now be removed
2765 if '__x_forwarded_for_ip' in info_dict:
2766 del info_dict['__x_forwarded_for_ip']
2767
2768 self.sort_formats({
2769 'formats': formats,
2770 '_format_sort_fields': info_dict.get('_format_sort_fields')
2771 })
2772
2773 # Sanitize and group by format_id
2774 formats_dict = {}
2775 for i, format in enumerate(formats):
2776 if not format.get('format_id'):
2777 format['format_id'] = str(i)
2778 else:
2779 # Sanitize format_id from characters used in format selector expression
2780 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2781 formats_dict.setdefault(format['format_id'], []).append(format)
2782
2783 # Make sure all formats have unique format_id
2784 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2785 for format_id, ambiguous_formats in formats_dict.items():
2786 ambigious_id = len(ambiguous_formats) > 1
2787 for i, format in enumerate(ambiguous_formats):
2788 if ambigious_id:
2789 format['format_id'] = '%s-%d' % (format_id, i)
2790 # Ensure there is no conflict between id and ext in format selection
2791 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2792 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2793 format['format_id'] = 'f%s' % format['format_id']
2794
2795 if format.get('format') is None:
2796 format['format'] = '{id} - {res}{note}'.format(
2797 id=format['format_id'],
2798 res=self.format_resolution(format),
2799 note=format_field(format, 'format_note', ' (%s)'),
2800 )
2801
2802 if self.params.get('check_formats') is True:
2803 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2804
2805 if not formats or formats[0] is not info_dict:
2806 # only set the 'formats' fields if the original info_dict list them
2807 # otherwise we end up with a circular reference, the first (and unique)
2808 # element in the 'formats' field in info_dict is info_dict itself,
2809 # which can't be exported to json
2810 info_dict['formats'] = formats
2811
2812 info_dict, _ = self.pre_process(info_dict)
2813
2814 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2815 return info_dict
2816
2817 self.post_extract(info_dict)
2818 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2819
2820 # The pre-processors may have modified the formats
2821 formats = self._get_formats(info_dict)
2822
2823 list_only = self.params.get('simulate') == 'list_only'
2824 interactive_format_selection = not list_only and self.format_selector == '-'
2825 if self.params.get('list_thumbnails'):
2826 self.list_thumbnails(info_dict)
2827 if self.params.get('listsubtitles'):
2828 if 'automatic_captions' in info_dict:
2829 self.list_subtitles(
2830 info_dict['id'], automatic_captions, 'automatic captions')
2831 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2832 if self.params.get('listformats') or interactive_format_selection:
2833 self.list_formats(info_dict)
2834 if list_only:
2835 # Without this printing, -F --print-json will not work
2836 self.__forced_printings(info_dict)
2837 return info_dict
2838
2839 format_selector = self.format_selector
2840 while True:
2841 if interactive_format_selection:
2842 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2843 + '(Press ENTER for default, or Ctrl+C to quit)'
2844 + self._format_screen(': ', self.Styles.EMPHASIS))
2845 try:
2846 format_selector = self.build_format_selector(req_format) if req_format else None
2847 except SyntaxError as err:
2848 self.report_error(err, tb=False, is_error=False)
2849 continue
2850
2851 if format_selector is None:
2852 req_format = self._default_format_spec(info_dict, download=download)
2853 self.write_debug(f'Default format spec: {req_format}')
2854 format_selector = self.build_format_selector(req_format)
2855
2856 formats_to_download = list(format_selector({
2857 'formats': formats,
2858 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2859 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2860 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2861 }))
2862 if interactive_format_selection and not formats_to_download:
2863 self.report_error('Requested format is not available', tb=False, is_error=False)
2864 continue
2865 break
2866
2867 if not formats_to_download:
2868 if not self.params.get('ignore_no_formats_error'):
2869 raise ExtractorError(
2870 'Requested format is not available. Use --list-formats for a list of available formats',
2871 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2872 self.report_warning('Requested format is not available')
2873 # Process what we can, even without any available formats.
2874 formats_to_download = [{}]
2875
2876 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2877 best_format, downloaded_formats = formats_to_download[-1], []
2878 if download:
2879 if best_format and requested_ranges:
2880 def to_screen(*msg):
2881 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2882
2883 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2884 (f['format_id'] for f in formats_to_download))
2885 if requested_ranges != ({}, ):
2886 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2887 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2888 max_downloads_reached = False
2889
2890 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2891 new_info = self._copy_infodict(info_dict)
2892 new_info.update(fmt)
2893 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2894 end_time = offset + min(chapter.get('end_time', duration), duration)
2895 # duration may not be accurate. So allow deviations <1sec
2896 if end_time == float('inf') or end_time > offset + duration + 1:
2897 end_time = None
2898 if chapter or offset:
2899 new_info.update({
2900 'section_start': offset + chapter.get('start_time', 0),
2901 'section_end': end_time,
2902 'section_title': chapter.get('title'),
2903 'section_number': chapter.get('index'),
2904 })
2905 downloaded_formats.append(new_info)
2906 try:
2907 self.process_info(new_info)
2908 except MaxDownloadsReached:
2909 max_downloads_reached = True
2910 self._raise_pending_errors(new_info)
2911 # Remove copied info
2912 for key, val in tuple(new_info.items()):
2913 if info_dict.get(key) == val:
2914 new_info.pop(key)
2915 if max_downloads_reached:
2916 break
2917
2918 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2919 assert write_archive.issubset({True, False, 'ignore'})
2920 if True in write_archive and False not in write_archive:
2921 self.record_download_archive(info_dict)
2922
2923 info_dict['requested_downloads'] = downloaded_formats
2924 info_dict = self.run_all_pps('after_video', info_dict)
2925 if max_downloads_reached:
2926 raise MaxDownloadsReached()
2927
2928 # We update the info dict with the selected best quality format (backwards compatibility)
2929 info_dict.update(best_format)
2930 return info_dict
2931
2932 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2933 """Select the requested subtitles and their format"""
2934 available_subs, normal_sub_langs = {}, []
2935 if normal_subtitles and self.params.get('writesubtitles'):
2936 available_subs.update(normal_subtitles)
2937 normal_sub_langs = tuple(normal_subtitles.keys())
2938 if automatic_captions and self.params.get('writeautomaticsub'):
2939 for lang, cap_info in automatic_captions.items():
2940 if lang not in available_subs:
2941 available_subs[lang] = cap_info
2942
2943 if not available_subs or (
2944 not self.params.get('writesubtitles')
2945 and not self.params.get('writeautomaticsub')):
2946 return None
2947
2948 all_sub_langs = tuple(available_subs.keys())
2949 if self.params.get('allsubtitles', False):
2950 requested_langs = all_sub_langs
2951 elif self.params.get('subtitleslangs', False):
2952 try:
2953 requested_langs = orderedSet_from_options(
2954 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2955 except re.error as e:
2956 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2957 else:
2958 requested_langs = LazyList(itertools.chain(
2959 ['en'] if 'en' in normal_sub_langs else [],
2960 filter(lambda f: f.startswith('en'), normal_sub_langs),
2961 ['en'] if 'en' in all_sub_langs else [],
2962 filter(lambda f: f.startswith('en'), all_sub_langs),
2963 normal_sub_langs, all_sub_langs,
2964 ))[:1]
2965 if requested_langs:
2966 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2967
2968 formats_query = self.params.get('subtitlesformat', 'best')
2969 formats_preference = formats_query.split('/') if formats_query else []
2970 subs = {}
2971 for lang in requested_langs:
2972 formats = available_subs.get(lang)
2973 if formats is None:
2974 self.report_warning(f'{lang} subtitles not available for {video_id}')
2975 continue
2976 for ext in formats_preference:
2977 if ext == 'best':
2978 f = formats[-1]
2979 break
2980 matches = list(filter(lambda f: f['ext'] == ext, formats))
2981 if matches:
2982 f = matches[-1]
2983 break
2984 else:
2985 f = formats[-1]
2986 self.report_warning(
2987 'No subtitle format found matching "%s" for language %s, '
2988 'using %s' % (formats_query, lang, f['ext']))
2989 subs[lang] = f
2990 return subs
2991
2992 def _forceprint(self, key, info_dict):
2993 if info_dict is None:
2994 return
2995 info_copy = info_dict.copy()
2996 info_copy.setdefault('filename', self.prepare_filename(info_dict))
2997 if info_dict.get('requested_formats') is not None:
2998 # For RTMP URLs, also include the playpath
2999 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3000 elif info_dict.get('url'):
3001 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3002 info_copy['formats_table'] = self.render_formats_table(info_dict)
3003 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3004 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3005 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3006
3007 def format_tmpl(tmpl):
3008 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3009 if not mobj:
3010 return tmpl
3011
3012 fmt = '%({})s'
3013 if tmpl.startswith('{'):
3014 tmpl, fmt = f'.{tmpl}', '%({})j'
3015 if tmpl.endswith('='):
3016 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3017 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3018
3019 for tmpl in self.params['forceprint'].get(key, []):
3020 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3021
3022 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3023 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3024 tmpl = format_tmpl(tmpl)
3025 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3026 if self._ensure_dir_exists(filename):
3027 with open(filename, 'a', encoding='utf-8', newline='') as f:
3028 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3029
3030 return info_copy
3031
3032 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3033 if (self.params.get('forcejson')
3034 or self.params['forceprint'].get('video')
3035 or self.params['print_to_file'].get('video')):
3036 self.post_extract(info_dict)
3037 if filename:
3038 info_dict['filename'] = filename
3039 info_copy = self._forceprint('video', info_dict)
3040
3041 def print_field(field, actual_field=None, optional=False):
3042 if actual_field is None:
3043 actual_field = field
3044 if self.params.get(f'force{field}') and (
3045 info_copy.get(field) is not None or (not optional and not incomplete)):
3046 self.to_stdout(info_copy[actual_field])
3047
3048 print_field('title')
3049 print_field('id')
3050 print_field('url', 'urls')
3051 print_field('thumbnail', optional=True)
3052 print_field('description', optional=True)
3053 print_field('filename')
3054 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3055 self.to_stdout(formatSeconds(info_copy['duration']))
3056 print_field('format')
3057
3058 if self.params.get('forcejson'):
3059 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3060
3061 def dl(self, name, info, subtitle=False, test=False):
3062 if not info.get('url'):
3063 self.raise_no_formats(info, True)
3064
3065 if test:
3066 verbose = self.params.get('verbose')
3067 params = {
3068 'test': True,
3069 'quiet': self.params.get('quiet') or not verbose,
3070 'verbose': verbose,
3071 'noprogress': not verbose,
3072 'nopart': True,
3073 'skip_unavailable_fragments': False,
3074 'keep_fragments': False,
3075 'overwrites': True,
3076 '_no_ytdl_file': True,
3077 }
3078 else:
3079 params = self.params
3080 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3081 if not test:
3082 for ph in self._progress_hooks:
3083 fd.add_progress_hook(ph)
3084 urls = '", "'.join(
3085 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3086 for f in info.get('requested_formats', []) or [info])
3087 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3088
3089 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3090 # But it may contain objects that are not deep-copyable
3091 new_info = self._copy_infodict(info)
3092 if new_info.get('http_headers') is None:
3093 new_info['http_headers'] = self._calc_headers(new_info)
3094 return fd.download(name, new_info, subtitle)
3095
3096 def existing_file(self, filepaths, *, default_overwrite=True):
3097 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3098 if existing_files and not self.params.get('overwrites', default_overwrite):
3099 return existing_files[0]
3100
3101 for file in existing_files:
3102 self.report_file_delete(file)
3103 os.remove(file)
3104 return None
3105
3106 def process_info(self, info_dict):
3107 """Process a single resolved IE result. (Modifies it in-place)"""
3108
3109 assert info_dict.get('_type', 'video') == 'video'
3110 original_infodict = info_dict
3111
3112 if 'format' not in info_dict and 'ext' in info_dict:
3113 info_dict['format'] = info_dict['ext']
3114
3115 if self._match_entry(info_dict) is not None:
3116 info_dict['__write_download_archive'] = 'ignore'
3117 return
3118
3119 # Does nothing under normal operation - for backward compatibility of process_info
3120 self.post_extract(info_dict)
3121
3122 def replace_info_dict(new_info):
3123 nonlocal info_dict
3124 if new_info == info_dict:
3125 return
3126 info_dict.clear()
3127 info_dict.update(new_info)
3128
3129 new_info, _ = self.pre_process(info_dict, 'video')
3130 replace_info_dict(new_info)
3131 self._num_downloads += 1
3132
3133 # info_dict['_filename'] needs to be set for backward compatibility
3134 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3135 temp_filename = self.prepare_filename(info_dict, 'temp')
3136 files_to_move = {}
3137
3138 # Forced printings
3139 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3140
3141 def check_max_downloads():
3142 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3143 raise MaxDownloadsReached()
3144
3145 if self.params.get('simulate'):
3146 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3147 check_max_downloads()
3148 return
3149
3150 if full_filename is None:
3151 return
3152 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3153 return
3154 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3155 return
3156
3157 if self._write_description('video', info_dict,
3158 self.prepare_filename(info_dict, 'description')) is None:
3159 return
3160
3161 sub_files = self._write_subtitles(info_dict, temp_filename)
3162 if sub_files is None:
3163 return
3164 files_to_move.update(dict(sub_files))
3165
3166 thumb_files = self._write_thumbnails(
3167 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3168 if thumb_files is None:
3169 return
3170 files_to_move.update(dict(thumb_files))
3171
3172 infofn = self.prepare_filename(info_dict, 'infojson')
3173 _infojson_written = self._write_info_json('video', info_dict, infofn)
3174 if _infojson_written:
3175 info_dict['infojson_filename'] = infofn
3176 # For backward compatibility, even though it was a private field
3177 info_dict['__infojson_filename'] = infofn
3178 elif _infojson_written is None:
3179 return
3180
3181 # Note: Annotations are deprecated
3182 annofn = None
3183 if self.params.get('writeannotations', False):
3184 annofn = self.prepare_filename(info_dict, 'annotation')
3185 if annofn:
3186 if not self._ensure_dir_exists(encodeFilename(annofn)):
3187 return
3188 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3189 self.to_screen('[info] Video annotations are already present')
3190 elif not info_dict.get('annotations'):
3191 self.report_warning('There are no annotations to write.')
3192 else:
3193 try:
3194 self.to_screen('[info] Writing video annotations to: ' + annofn)
3195 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3196 annofile.write(info_dict['annotations'])
3197 except (KeyError, TypeError):
3198 self.report_warning('There are no annotations to write.')
3199 except OSError:
3200 self.report_error('Cannot write annotations file: ' + annofn)
3201 return
3202
3203 # Write internet shortcut files
3204 def _write_link_file(link_type):
3205 url = try_get(info_dict['webpage_url'], iri_to_uri)
3206 if not url:
3207 self.report_warning(
3208 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3209 return True
3210 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3211 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3212 return False
3213 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3214 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3215 return True
3216 try:
3217 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3218 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3219 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3220 template_vars = {'url': url}
3221 if link_type == 'desktop':
3222 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3223 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3224 except OSError:
3225 self.report_error(f'Cannot write internet shortcut {linkfn}')
3226 return False
3227 return True
3228
3229 write_links = {
3230 'url': self.params.get('writeurllink'),
3231 'webloc': self.params.get('writewebloclink'),
3232 'desktop': self.params.get('writedesktoplink'),
3233 }
3234 if self.params.get('writelink'):
3235 link_type = ('webloc' if sys.platform == 'darwin'
3236 else 'desktop' if sys.platform.startswith('linux')
3237 else 'url')
3238 write_links[link_type] = True
3239
3240 if any(should_write and not _write_link_file(link_type)
3241 for link_type, should_write in write_links.items()):
3242 return
3243
3244 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3245 replace_info_dict(new_info)
3246
3247 if self.params.get('skip_download'):
3248 info_dict['filepath'] = temp_filename
3249 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3250 info_dict['__files_to_move'] = files_to_move
3251 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3252 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3253 else:
3254 # Download
3255 info_dict.setdefault('__postprocessors', [])
3256 try:
3257
3258 def existing_video_file(*filepaths):
3259 ext = info_dict.get('ext')
3260 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3261 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3262 default_overwrite=False)
3263 if file:
3264 info_dict['ext'] = os.path.splitext(file)[1][1:]
3265 return file
3266
3267 fd, success = None, True
3268 if info_dict.get('protocol') or info_dict.get('url'):
3269 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3270 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3271 info_dict.get('section_start') or info_dict.get('section_end')):
3272 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3273 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3274 self.report_error(f'{msg}. Aborting')
3275 return
3276
3277 if info_dict.get('requested_formats') is not None:
3278 old_ext = info_dict['ext']
3279 if self.params.get('merge_output_format') is None:
3280 if (info_dict['ext'] == 'webm'
3281 and info_dict.get('thumbnails')
3282 # check with type instead of pp_key, __name__, or isinstance
3283 # since we dont want any custom PPs to trigger this
3284 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3285 info_dict['ext'] = 'mkv'
3286 self.report_warning(
3287 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3288 new_ext = info_dict['ext']
3289
3290 def correct_ext(filename, ext=new_ext):
3291 if filename == '-':
3292 return filename
3293 filename_real_ext = os.path.splitext(filename)[1][1:]
3294 filename_wo_ext = (
3295 os.path.splitext(filename)[0]
3296 if filename_real_ext in (old_ext, new_ext)
3297 else filename)
3298 return f'{filename_wo_ext}.{ext}'
3299
3300 # Ensure filename always has a correct extension for successful merge
3301 full_filename = correct_ext(full_filename)
3302 temp_filename = correct_ext(temp_filename)
3303 dl_filename = existing_video_file(full_filename, temp_filename)
3304
3305 info_dict['__real_download'] = False
3306 # NOTE: Copy so that original format dicts are not modified
3307 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3308
3309 merger = FFmpegMergerPP(self)
3310 downloaded = []
3311 if dl_filename is not None:
3312 self.report_file_already_downloaded(dl_filename)
3313 elif fd:
3314 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3315 f['filepath'] = fname = prepend_extension(
3316 correct_ext(temp_filename, info_dict['ext']),
3317 'f%s' % f['format_id'], info_dict['ext'])
3318 downloaded.append(fname)
3319 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3320 success, real_download = self.dl(temp_filename, info_dict)
3321 info_dict['__real_download'] = real_download
3322 else:
3323 if self.params.get('allow_unplayable_formats'):
3324 self.report_warning(
3325 'You have requested merging of multiple formats '
3326 'while also allowing unplayable formats to be downloaded. '
3327 'The formats won\'t be merged to prevent data corruption.')
3328 elif not merger.available:
3329 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3330 if not self.params.get('ignoreerrors'):
3331 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3332 return
3333 self.report_warning(f'{msg}. The formats won\'t be merged')
3334
3335 if temp_filename == '-':
3336 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3337 else 'but the formats are incompatible for simultaneous download' if merger.available
3338 else 'but ffmpeg is not installed')
3339 self.report_warning(
3340 f'You have requested downloading multiple formats to stdout {reason}. '
3341 'The formats will be streamed one after the other')
3342 fname = temp_filename
3343 for f in info_dict['requested_formats']:
3344 new_info = dict(info_dict)
3345 del new_info['requested_formats']
3346 new_info.update(f)
3347 if temp_filename != '-':
3348 fname = prepend_extension(
3349 correct_ext(temp_filename, new_info['ext']),
3350 'f%s' % f['format_id'], new_info['ext'])
3351 if not self._ensure_dir_exists(fname):
3352 return
3353 f['filepath'] = fname
3354 downloaded.append(fname)
3355 partial_success, real_download = self.dl(fname, new_info)
3356 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3357 success = success and partial_success
3358
3359 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3360 info_dict['__postprocessors'].append(merger)
3361 info_dict['__files_to_merge'] = downloaded
3362 # Even if there were no downloads, it is being merged only now
3363 info_dict['__real_download'] = True
3364 else:
3365 for file in downloaded:
3366 files_to_move[file] = None
3367 else:
3368 # Just a single file
3369 dl_filename = existing_video_file(full_filename, temp_filename)
3370 if dl_filename is None or dl_filename == temp_filename:
3371 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3372 # So we should try to resume the download
3373 success, real_download = self.dl(temp_filename, info_dict)
3374 info_dict['__real_download'] = real_download
3375 else:
3376 self.report_file_already_downloaded(dl_filename)
3377
3378 dl_filename = dl_filename or temp_filename
3379 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3380
3381 except network_exceptions as err:
3382 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3383 return
3384 except OSError as err:
3385 raise UnavailableVideoError(err)
3386 except (ContentTooShortError, ) as err:
3387 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3388 return
3389
3390 self._raise_pending_errors(info_dict)
3391 if success and full_filename != '-':
3392
3393 def fixup():
3394 do_fixup = True
3395 fixup_policy = self.params.get('fixup')
3396 vid = info_dict['id']
3397
3398 if fixup_policy in ('ignore', 'never'):
3399 return
3400 elif fixup_policy == 'warn':
3401 do_fixup = 'warn'
3402 elif fixup_policy != 'force':
3403 assert fixup_policy in ('detect_or_warn', None)
3404 if not info_dict.get('__real_download'):
3405 do_fixup = False
3406
3407 def ffmpeg_fixup(cndn, msg, cls):
3408 if not (do_fixup and cndn):
3409 return
3410 elif do_fixup == 'warn':
3411 self.report_warning(f'{vid}: {msg}')
3412 return
3413 pp = cls(self)
3414 if pp.available:
3415 info_dict['__postprocessors'].append(pp)
3416 else:
3417 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3418
3419 stretched_ratio = info_dict.get('stretched_ratio')
3420 ffmpeg_fixup(stretched_ratio not in (1, None),
3421 f'Non-uniform pixel ratio {stretched_ratio}',
3422 FFmpegFixupStretchedPP)
3423
3424 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3425 downloader = downloader.FD_NAME if downloader else None
3426
3427 ext = info_dict.get('ext')
3428 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3429 isinstance(pp, FFmpegVideoConvertorPP)
3430 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3431 ) for pp in self._pps['post_process'])
3432
3433 if not postprocessed_by_ffmpeg:
3434 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3435 'writing DASH m4a. Only some players support this container',
3436 FFmpegFixupM4aPP)
3437 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3438 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3439 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3440 FFmpegFixupM3u8PP)
3441 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3442 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3443
3444 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3445 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3446
3447 fixup()
3448 try:
3449 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3450 except PostProcessingError as err:
3451 self.report_error('Postprocessing: %s' % str(err))
3452 return
3453 try:
3454 for ph in self._post_hooks:
3455 ph(info_dict['filepath'])
3456 except Exception as err:
3457 self.report_error('post hooks: %s' % str(err))
3458 return
3459 info_dict['__write_download_archive'] = True
3460
3461 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3462 if self.params.get('force_write_download_archive'):
3463 info_dict['__write_download_archive'] = True
3464 check_max_downloads()
3465
3466 def __download_wrapper(self, func):
3467 @functools.wraps(func)
3468 def wrapper(*args, **kwargs):
3469 try:
3470 res = func(*args, **kwargs)
3471 except UnavailableVideoError as e:
3472 self.report_error(e)
3473 except DownloadCancelled as e:
3474 self.to_screen(f'[info] {e}')
3475 if not self.params.get('break_per_url'):
3476 raise
3477 self._num_downloads = 0
3478 else:
3479 if self.params.get('dump_single_json', False):
3480 self.post_extract(res)
3481 self.to_stdout(json.dumps(self.sanitize_info(res)))
3482 return wrapper
3483
3484 def download(self, url_list):
3485 """Download a given list of URLs."""
3486 url_list = variadic(url_list) # Passing a single URL is a common mistake
3487 outtmpl = self.params['outtmpl']['default']
3488 if (len(url_list) > 1
3489 and outtmpl != '-'
3490 and '%' not in outtmpl
3491 and self.params.get('max_downloads') != 1):
3492 raise SameFileError(outtmpl)
3493
3494 for url in url_list:
3495 self.__download_wrapper(self.extract_info)(
3496 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3497
3498 return self._download_retcode
3499
3500 def download_with_info_file(self, info_filename):
3501 with contextlib.closing(fileinput.FileInput(
3502 [info_filename], mode='r',
3503 openhook=fileinput.hook_encoded('utf-8'))) as f:
3504 # FileInput doesn't have a read method, we can't call json.load
3505 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3506 for info in variadic(json.loads('\n'.join(f)))]
3507 for info in infos:
3508 self._load_cookies(info.get('cookies'), from_headers=False)
3509 self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
3510 try:
3511 self.__download_wrapper(self.process_ie_result)(info, download=True)
3512 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3513 if not isinstance(e, EntryNotInPlaylist):
3514 self.to_stderr('\r')
3515 webpage_url = info.get('webpage_url')
3516 if webpage_url is None:
3517 raise
3518 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3519 self.download([webpage_url])
3520 return self._download_retcode
3521
3522 @staticmethod
3523 def sanitize_info(info_dict, remove_private_keys=False):
3524 ''' Sanitize the infodict for converting to json '''
3525 if info_dict is None:
3526 return info_dict
3527 info_dict.setdefault('epoch', int(time.time()))
3528 info_dict.setdefault('_type', 'video')
3529 info_dict.setdefault('_version', {
3530 'version': __version__,
3531 'current_git_head': current_git_head(),
3532 'release_git_head': RELEASE_GIT_HEAD,
3533 'repository': REPOSITORY,
3534 })
3535
3536 if remove_private_keys:
3537 reject = lambda k, v: v is None or k.startswith('__') or k in {
3538 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3539 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3540 'playlist_autonumber', '_format_sort_fields',
3541 }
3542 else:
3543 reject = lambda k, v: False
3544
3545 def filter_fn(obj):
3546 if isinstance(obj, dict):
3547 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3548 elif isinstance(obj, (list, tuple, set, LazyList)):
3549 return list(map(filter_fn, obj))
3550 elif obj is None or isinstance(obj, (str, int, float, bool)):
3551 return obj
3552 else:
3553 return repr(obj)
3554
3555 return filter_fn(info_dict)
3556
3557 @staticmethod
3558 def filter_requested_info(info_dict, actually_filter=True):
3559 ''' Alias of sanitize_info for backward compatibility '''
3560 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3561
3562 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3563 for filename in set(filter(None, files_to_delete)):
3564 if msg:
3565 self.to_screen(msg % filename)
3566 try:
3567 os.remove(filename)
3568 except OSError:
3569 self.report_warning(f'Unable to delete file {filename}')
3570 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3571 del info['__files_to_move'][filename]
3572
3573 @staticmethod
3574 def post_extract(info_dict):
3575 def actual_post_extract(info_dict):
3576 if info_dict.get('_type') in ('playlist', 'multi_video'):
3577 for video_dict in info_dict.get('entries', {}):
3578 actual_post_extract(video_dict or {})
3579 return
3580
3581 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3582 info_dict.update(post_extractor())
3583
3584 actual_post_extract(info_dict or {})
3585
3586 def run_pp(self, pp, infodict):
3587 files_to_delete = []
3588 if '__files_to_move' not in infodict:
3589 infodict['__files_to_move'] = {}
3590 try:
3591 files_to_delete, infodict = pp.run(infodict)
3592 except PostProcessingError as e:
3593 # Must be True and not 'only_download'
3594 if self.params.get('ignoreerrors') is True:
3595 self.report_error(e)
3596 return infodict
3597 raise
3598
3599 if not files_to_delete:
3600 return infodict
3601 if self.params.get('keepvideo', False):
3602 for f in files_to_delete:
3603 infodict['__files_to_move'].setdefault(f, '')
3604 else:
3605 self._delete_downloaded_files(
3606 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3607 return infodict
3608
3609 def run_all_pps(self, key, info, *, additional_pps=None):
3610 if key != 'video':
3611 self._forceprint(key, info)
3612 for pp in (additional_pps or []) + self._pps[key]:
3613 info = self.run_pp(pp, info)
3614 return info
3615
3616 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3617 info = dict(ie_info)
3618 info['__files_to_move'] = files_to_move or {}
3619 try:
3620 info = self.run_all_pps(key, info)
3621 except PostProcessingError as err:
3622 msg = f'Preprocessing: {err}'
3623 info.setdefault('__pending_error', msg)
3624 self.report_error(msg, is_error=False)
3625 return info, info.pop('__files_to_move', None)
3626
3627 def post_process(self, filename, info, files_to_move=None):
3628 """Run all the postprocessors on the given file."""
3629 info['filepath'] = filename
3630 info['__files_to_move'] = files_to_move or {}
3631 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3632 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3633 del info['__files_to_move']
3634 return self.run_all_pps('after_move', info)
3635
3636 def _make_archive_id(self, info_dict):
3637 video_id = info_dict.get('id')
3638 if not video_id:
3639 return
3640 # Future-proof against any change in case
3641 # and backwards compatibility with prior versions
3642 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3643 if extractor is None:
3644 url = str_or_none(info_dict.get('url'))
3645 if not url:
3646 return
3647 # Try to find matching extractor for the URL and take its ie_key
3648 for ie_key, ie in self._ies.items():
3649 if ie.suitable(url):
3650 extractor = ie_key
3651 break
3652 else:
3653 return
3654 return make_archive_id(extractor, video_id)
3655
3656 def in_download_archive(self, info_dict):
3657 if not self.archive:
3658 return False
3659
3660 vid_ids = [self._make_archive_id(info_dict)]
3661 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3662 return any(id_ in self.archive for id_ in vid_ids)
3663
3664 def record_download_archive(self, info_dict):
3665 fn = self.params.get('download_archive')
3666 if fn is None:
3667 return
3668 vid_id = self._make_archive_id(info_dict)
3669 assert vid_id
3670
3671 self.write_debug(f'Adding to archive: {vid_id}')
3672 if is_path_like(fn):
3673 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3674 archive_file.write(vid_id + '\n')
3675 self.archive.add(vid_id)
3676
3677 @staticmethod
3678 def format_resolution(format, default='unknown'):
3679 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3680 return 'audio only'
3681 if format.get('resolution') is not None:
3682 return format['resolution']
3683 if format.get('width') and format.get('height'):
3684 return '%dx%d' % (format['width'], format['height'])
3685 elif format.get('height'):
3686 return '%sp' % format['height']
3687 elif format.get('width'):
3688 return '%dx?' % format['width']
3689 return default
3690
3691 def _list_format_headers(self, *headers):
3692 if self.params.get('listformats_table', True) is not False:
3693 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3694 return headers
3695
3696 def _format_note(self, fdict):
3697 res = ''
3698 if fdict.get('ext') in ['f4f', 'f4m']:
3699 res += '(unsupported)'
3700 if fdict.get('language'):
3701 if res:
3702 res += ' '
3703 res += '[%s]' % fdict['language']
3704 if fdict.get('format_note') is not None:
3705 if res:
3706 res += ' '
3707 res += fdict['format_note']
3708 if fdict.get('tbr') is not None:
3709 if res:
3710 res += ', '
3711 res += '%4dk' % fdict['tbr']
3712 if fdict.get('container') is not None:
3713 if res:
3714 res += ', '
3715 res += '%s container' % fdict['container']
3716 if (fdict.get('vcodec') is not None
3717 and fdict.get('vcodec') != 'none'):
3718 if res:
3719 res += ', '
3720 res += fdict['vcodec']
3721 if fdict.get('vbr') is not None:
3722 res += '@'
3723 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3724 res += 'video@'
3725 if fdict.get('vbr') is not None:
3726 res += '%4dk' % fdict['vbr']
3727 if fdict.get('fps') is not None:
3728 if res:
3729 res += ', '
3730 res += '%sfps' % fdict['fps']
3731 if fdict.get('acodec') is not None:
3732 if res:
3733 res += ', '
3734 if fdict['acodec'] == 'none':
3735 res += 'video only'
3736 else:
3737 res += '%-5s' % fdict['acodec']
3738 elif fdict.get('abr') is not None:
3739 if res:
3740 res += ', '
3741 res += 'audio'
3742 if fdict.get('abr') is not None:
3743 res += '@%3dk' % fdict['abr']
3744 if fdict.get('asr') is not None:
3745 res += ' (%5dHz)' % fdict['asr']
3746 if fdict.get('filesize') is not None:
3747 if res:
3748 res += ', '
3749 res += format_bytes(fdict['filesize'])
3750 elif fdict.get('filesize_approx') is not None:
3751 if res:
3752 res += ', '
3753 res += '~' + format_bytes(fdict['filesize_approx'])
3754 return res
3755
3756 def _get_formats(self, info_dict):
3757 if info_dict.get('formats') is None:
3758 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3759 return [info_dict]
3760 return []
3761 return info_dict['formats']
3762
3763 def render_formats_table(self, info_dict):
3764 formats = self._get_formats(info_dict)
3765 if not formats:
3766 return
3767 if not self.params.get('listformats_table', True) is not False:
3768 table = [
3769 [
3770 format_field(f, 'format_id'),
3771 format_field(f, 'ext'),
3772 self.format_resolution(f),
3773 self._format_note(f)
3774 ] for f in formats if (f.get('preference') or 0) >= -1000]
3775 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3776
3777 def simplified_codec(f, field):
3778 assert field in ('acodec', 'vcodec')
3779 codec = f.get(field)
3780 if not codec:
3781 return 'unknown'
3782 elif codec != 'none':
3783 return '.'.join(codec.split('.')[:4])
3784
3785 if field == 'vcodec' and f.get('acodec') == 'none':
3786 return 'images'
3787 elif field == 'acodec' and f.get('vcodec') == 'none':
3788 return ''
3789 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3790 self.Styles.SUPPRESS)
3791
3792 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3793 table = [
3794 [
3795 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3796 format_field(f, 'ext'),
3797 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3798 format_field(f, 'fps', '\t%d', func=round),
3799 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3800 format_field(f, 'audio_channels', '\t%s'),
3801 delim, (
3802 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3803 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3804 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3805 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3806 format_field(f, 'tbr', '\t%dk', func=round),
3807 shorten_protocol_name(f.get('protocol', '')),
3808 delim,
3809 simplified_codec(f, 'vcodec'),
3810 format_field(f, 'vbr', '\t%dk', func=round),
3811 simplified_codec(f, 'acodec'),
3812 format_field(f, 'abr', '\t%dk', func=round),
3813 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3814 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3815 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3816 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3817 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3818 format_field(f, 'format_note'),
3819 format_field(f, 'container', ignore=(None, f.get('ext'))),
3820 delim=', '), delim=' '),
3821 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3822 header_line = self._list_format_headers(
3823 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3824 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3825
3826 return render_table(
3827 header_line, table, hide_empty=True,
3828 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3829
3830 def render_thumbnails_table(self, info_dict):
3831 thumbnails = list(info_dict.get('thumbnails') or [])
3832 if not thumbnails:
3833 return None
3834 return render_table(
3835 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3836 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3837
3838 def render_subtitles_table(self, video_id, subtitles):
3839 def _row(lang, formats):
3840 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3841 if len(set(names)) == 1:
3842 names = [] if names[0] == 'unknown' else names[:1]
3843 return [lang, ', '.join(names), ', '.join(exts)]
3844
3845 if not subtitles:
3846 return None
3847 return render_table(
3848 self._list_format_headers('Language', 'Name', 'Formats'),
3849 [_row(lang, formats) for lang, formats in subtitles.items()],
3850 hide_empty=True)
3851
3852 def __list_table(self, video_id, name, func, *args):
3853 table = func(*args)
3854 if not table:
3855 self.to_screen(f'{video_id} has no {name}')
3856 return
3857 self.to_screen(f'[info] Available {name} for {video_id}:')
3858 self.to_stdout(table)
3859
3860 def list_formats(self, info_dict):
3861 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3862
3863 def list_thumbnails(self, info_dict):
3864 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3865
3866 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3867 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3868
3869 def print_debug_header(self):
3870 if not self.params.get('verbose'):
3871 return
3872
3873 from . import _IN_CLI # Must be delayed import
3874
3875 # These imports can be slow. So import them only as needed
3876 from .extractor.extractors import _LAZY_LOADER
3877 from .extractor.extractors import (
3878 _PLUGIN_CLASSES as plugin_ies,
3879 _PLUGIN_OVERRIDES as plugin_ie_overrides
3880 )
3881
3882 def get_encoding(stream):
3883 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3884 additional_info = []
3885 if os.environ.get('TERM', '').lower() == 'dumb':
3886 additional_info.append('dumb')
3887 if not supports_terminal_sequences(stream):
3888 from .utils import WINDOWS_VT_MODE # Must be imported locally
3889 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3890 if additional_info:
3891 ret = f'{ret} ({",".join(additional_info)})'
3892 return ret
3893
3894 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3895 locale.getpreferredencoding(),
3896 sys.getfilesystemencoding(),
3897 self.get_encoding(),
3898 ', '.join(
3899 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3900 if stream is not None and key != 'console')
3901 )
3902
3903 logger = self.params.get('logger')
3904 if logger:
3905 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3906 write_debug(encoding_str)
3907 else:
3908 write_string(f'[debug] {encoding_str}\n', encoding=None)
3909 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3910
3911 source = detect_variant()
3912 if VARIANT not in (None, 'pip'):
3913 source += '*'
3914 klass = type(self)
3915 write_debug(join_nonempty(
3916 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3917 f'{CHANNEL}@{__version__}',
3918 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3919 '' if source == 'unknown' else f'({source})',
3920 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3921 delim=' '))
3922
3923 if not _IN_CLI:
3924 write_debug(f'params: {self.params}')
3925
3926 if not _LAZY_LOADER:
3927 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3928 write_debug('Lazy loading extractors is forcibly disabled')
3929 else:
3930 write_debug('Lazy loading extractors is disabled')
3931 if self.params['compat_opts']:
3932 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3933
3934 if current_git_head():
3935 write_debug(f'Git HEAD: {current_git_head()}')
3936 write_debug(system_identifier())
3937
3938 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3939 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3940 if ffmpeg_features:
3941 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3942
3943 exe_versions['rtmpdump'] = rtmpdump_version()
3944 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3945 exe_str = ', '.join(
3946 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3947 ) or 'none'
3948 write_debug('exe versions: %s' % exe_str)
3949
3950 from .compat.compat_utils import get_package_info
3951 from .dependencies import available_dependencies
3952
3953 write_debug('Optional libraries: %s' % (', '.join(sorted({
3954 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3955 })) or 'none'))
3956
3957 write_debug(f'Proxy map: {self.proxies}')
3958 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3959 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3960 display_list = ['%s%s' % (
3961 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3962 for name, klass in plugins.items()]
3963 if plugin_type == 'Extractor':
3964 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3965 for parent, plugins in plugin_ie_overrides.items())
3966 if not display_list:
3967 continue
3968 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3969
3970 plugin_dirs = plugin_directories()
3971 if plugin_dirs:
3972 write_debug(f'Plugin directories: {plugin_dirs}')
3973
3974 # Not implemented
3975 if False and self.params.get('call_home'):
3976 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3977 write_debug('Public IP address: %s' % ipaddr)
3978 latest_version = self.urlopen(
3979 'https://yt-dl.org/latest/version').read().decode()
3980 if version_tuple(latest_version) > version_tuple(__version__):
3981 self.report_warning(
3982 'You are using an outdated version (newest version: %s)! '
3983 'See https://yt-dl.org/update if you need help updating.' %
3984 latest_version)
3985
3986 @functools.cached_property
3987 def proxies(self):
3988 """Global proxy configuration"""
3989 opts_proxy = self.params.get('proxy')
3990 if opts_proxy is not None:
3991 if opts_proxy == '':
3992 opts_proxy = '__noproxy__'
3993 proxies = {'all': opts_proxy}
3994 else:
3995 proxies = urllib.request.getproxies()
3996 # compat. Set HTTPS_PROXY to __noproxy__ to revert
3997 if 'http' in proxies and 'https' not in proxies:
3998 proxies['https'] = proxies['http']
3999
4000 return proxies
4001
4002 @functools.cached_property
4003 def cookiejar(self):
4004 """Global cookiejar instance"""
4005 return load_cookies(
4006 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4007
4008 @property
4009 def _opener(self):
4010 """
4011 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4012 """
4013 self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
4014 handler = self._request_director.handlers['Urllib']
4015 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4016
4017 def urlopen(self, req):
4018 """ Start an HTTP download """
4019 if isinstance(req, str):
4020 req = Request(req)
4021 elif isinstance(req, urllib.request.Request):
4022 req = urllib_req_to_req(req)
4023 assert isinstance(req, Request)
4024
4025 # compat: Assume user:pass url params are basic auth
4026 url, basic_auth_header = extract_basic_auth(req.url)
4027 if basic_auth_header:
4028 req.headers['Authorization'] = basic_auth_header
4029 req.url = sanitize_url(url)
4030
4031 clean_proxies(proxies=req.proxies, headers=req.headers)
4032 clean_headers(req.headers)
4033
4034 try:
4035 return self._request_director.send(req)
4036 except NoSupportingHandlers as e:
4037 for ue in e.unsupported_errors:
4038 if not (ue.handler and ue.msg):
4039 continue
4040 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4041 raise RequestError(
4042 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4043 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4044 raise
4045 except SSLError as e:
4046 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4047 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4048 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4049 raise RequestError(
4050 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4051 'Try using --legacy-server-connect', cause=e) from e
4052 raise
4053 except HTTPError as e: # TODO: Remove in a future release
4054 raise _CompatHTTPError(e) from e
4055
4056 def build_request_director(self, handlers):
4057 logger = _YDLLogger(self)
4058 headers = self.params.get('http_headers').copy()
4059 proxies = self.proxies.copy()
4060 clean_headers(headers)
4061 clean_proxies(proxies, headers)
4062
4063 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4064 for handler in handlers:
4065 director.add_handler(handler(
4066 logger=logger,
4067 headers=headers,
4068 cookiejar=self.cookiejar,
4069 proxies=proxies,
4070 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4071 verify=not self.params.get('nocheckcertificate'),
4072 **traverse_obj(self.params, {
4073 'verbose': 'debug_printtraffic',
4074 'source_address': 'source_address',
4075 'timeout': 'socket_timeout',
4076 'legacy_ssl_support': 'legacy_server_connect',
4077 'enable_file_urls': 'enable_file_urls',
4078 'client_cert': {
4079 'client_certificate': 'client_certificate',
4080 'client_certificate_key': 'client_certificate_key',
4081 'client_certificate_password': 'client_certificate_password',
4082 },
4083 }),
4084 ))
4085 return director
4086
4087 def encode(self, s):
4088 if isinstance(s, bytes):
4089 return s # Already encoded
4090
4091 try:
4092 return s.encode(self.get_encoding())
4093 except UnicodeEncodeError as err:
4094 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4095 raise
4096
4097 def get_encoding(self):
4098 encoding = self.params.get('encoding')
4099 if encoding is None:
4100 encoding = preferredencoding()
4101 return encoding
4102
4103 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4104 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4105 if overwrite is None:
4106 overwrite = self.params.get('overwrites', True)
4107 if not self.params.get('writeinfojson'):
4108 return False
4109 elif not infofn:
4110 self.write_debug(f'Skipping writing {label} infojson')
4111 return False
4112 elif not self._ensure_dir_exists(infofn):
4113 return None
4114 elif not overwrite and os.path.exists(infofn):
4115 self.to_screen(f'[info] {label.title()} metadata is already present')
4116 return 'exists'
4117
4118 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4119 try:
4120 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4121 return True
4122 except OSError:
4123 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4124 return None
4125
4126 def _write_description(self, label, ie_result, descfn):
4127 ''' Write description and returns True = written, False = skip, None = error '''
4128 if not self.params.get('writedescription'):
4129 return False
4130 elif not descfn:
4131 self.write_debug(f'Skipping writing {label} description')
4132 return False
4133 elif not self._ensure_dir_exists(descfn):
4134 return None
4135 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4136 self.to_screen(f'[info] {label.title()} description is already present')
4137 elif ie_result.get('description') is None:
4138 self.to_screen(f'[info] There\'s no {label} description to write')
4139 return False
4140 else:
4141 try:
4142 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4143 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4144 descfile.write(ie_result['description'])
4145 except OSError:
4146 self.report_error(f'Cannot write {label} description file {descfn}')
4147 return None
4148 return True
4149
4150 def _write_subtitles(self, info_dict, filename):
4151 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4152 ret = []
4153 subtitles = info_dict.get('requested_subtitles')
4154 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4155 # subtitles download errors are already managed as troubles in relevant IE
4156 # that way it will silently go on when used with unsupporting IE
4157 return ret
4158 elif not subtitles:
4159 self.to_screen('[info] There are no subtitles for the requested languages')
4160 return ret
4161 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4162 if not sub_filename_base:
4163 self.to_screen('[info] Skipping writing video subtitles')
4164 return ret
4165
4166 for sub_lang, sub_info in subtitles.items():
4167 sub_format = sub_info['ext']
4168 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4169 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4170 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4171 if existing_sub:
4172 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4173 sub_info['filepath'] = existing_sub
4174 ret.append((existing_sub, sub_filename_final))
4175 continue
4176
4177 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4178 if sub_info.get('data') is not None:
4179 try:
4180 # Use newline='' to prevent conversion of newline characters
4181 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4182 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4183 subfile.write(sub_info['data'])
4184 sub_info['filepath'] = sub_filename
4185 ret.append((sub_filename, sub_filename_final))
4186 continue
4187 except OSError:
4188 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4189 return None
4190
4191 try:
4192 sub_copy = sub_info.copy()
4193 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4194 self.dl(sub_filename, sub_copy, subtitle=True)
4195 sub_info['filepath'] = sub_filename
4196 ret.append((sub_filename, sub_filename_final))
4197 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4198 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4199 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4200 if not self.params.get('ignoreerrors'):
4201 self.report_error(msg)
4202 raise DownloadError(msg)
4203 self.report_warning(msg)
4204 return ret
4205
4206 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4207 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4208 write_all = self.params.get('write_all_thumbnails', False)
4209 thumbnails, ret = [], []
4210 if write_all or self.params.get('writethumbnail', False):
4211 thumbnails = info_dict.get('thumbnails') or []
4212 if not thumbnails:
4213 self.to_screen(f'[info] There are no {label} thumbnails to download')
4214 return ret
4215 multiple = write_all and len(thumbnails) > 1
4216
4217 if thumb_filename_base is None:
4218 thumb_filename_base = filename
4219 if thumbnails and not thumb_filename_base:
4220 self.write_debug(f'Skipping writing {label} thumbnail')
4221 return ret
4222
4223 for idx, t in list(enumerate(thumbnails))[::-1]:
4224 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4225 thumb_display_id = f'{label} thumbnail {t["id"]}'
4226 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4227 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4228
4229 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4230 if existing_thumb:
4231 self.to_screen('[info] %s is already present' % (
4232 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4233 t['filepath'] = existing_thumb
4234 ret.append((existing_thumb, thumb_filename_final))
4235 else:
4236 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4237 try:
4238 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4239 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4240 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4241 shutil.copyfileobj(uf, thumbf)
4242 ret.append((thumb_filename, thumb_filename_final))
4243 t['filepath'] = thumb_filename
4244 except network_exceptions as err:
4245 if isinstance(err, urllib.error.HTTPError) and err.code == 404:
4246 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4247 else:
4248 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4249 thumbnails.pop(idx)
4250 if ret and not write_all:
4251 break
4252 return ret