]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[compat, networking] Deprecate old functions (#2861)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import HEADRequest, Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 network_exceptions,
45 )
46 from .plugins import directories as plugin_directories
47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
48 from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
58 FFmpegVideoConvertorPP,
59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61 )
62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
63 from .update import REPOSITORY, current_git_head, detect_variant
64 from .utils import (
65 DEFAULT_OUTTMPL,
66 IDENTITY,
67 LINK_TEMPLATES,
68 MEDIA_EXTENSIONS,
69 NO_DEFAULT,
70 NUMBER_RE,
71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
82 FormatSorter,
83 GeoRestrictedError,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 number_of_digits,
126 orderedSet,
127 orderedSet_from_options,
128 parse_filesize,
129 preferredencoding,
130 prepend_extension,
131 remove_terminal_sequences,
132 render_table,
133 replace_extension,
134 sanitize_filename,
135 sanitize_path,
136 sanitize_url,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils._utils import _YDLLogger
155 from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
159 std_headers,
160 )
161 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL:
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 For compatibility with youtube-dl,
260 "nooverwrites" may also be used instead
261 playlist_items: Specific indices of playlist to download.
262 playlistrandom: Download playlist items in random order.
263 lazy_playlist: Process playlist entries as they are received.
264 matchtitle: Download only matching titles.
265 rejecttitle: Reject downloads for matching titles.
266 logger: Log messages to a logging.Logger instance.
267 logtostderr: Print everything to stderr instead of stdout.
268 consoletitle: Display progress in console window's titlebar.
269 writedescription: Write the video description to a .description file
270 writeinfojson: Write the video description to a .info.json file
271 clean_infojson: Remove internal metadata from the infojson
272 getcomments: Extract video comments. This will not be written to disk
273 unless writeinfojson is also given
274 writeannotations: Write the video annotations to a .annotations.xml file
275 writethumbnail: Write the thumbnail image to a file
276 allow_playlist_files: Whether to write playlists' description, infojson etc
277 also to disk when using the 'write*' options
278 write_all_thumbnails: Write all thumbnail formats to files
279 writelink: Write an internet shortcut file, depending on the
280 current platform (.url/.webloc/.desktop)
281 writeurllink: Write a Windows internet shortcut file (.url)
282 writewebloclink: Write a macOS internet shortcut file (.webloc)
283 writedesktoplink: Write a Linux internet shortcut file (.desktop)
284 writesubtitles: Write the video subtitles to a file
285 writeautomaticsub: Write the automatically generated subtitles to a file
286 listsubtitles: Lists all available subtitles for the video
287 subtitlesformat: The format code for subtitles
288 subtitleslangs: List of languages of the subtitles to download (can be regex).
289 The list may contain "all" to refer to all the available
290 subtitles. The language can be prefixed with a "-" to
291 exclude it from the requested languages, e.g. ['all', '-live_chat']
292 keepvideo: Keep the video file after post-processing
293 daterange: A utils.DateRange object, download only if the upload_date is in the range.
294 skip_download: Skip the actual download of the video file
295 cachedir: Location of the cache files in the filesystem.
296 False to disable filesystem cache.
297 noplaylist: Download single video instead of a playlist if in doubt.
298 age_limit: An integer representing the user's age in years.
299 Unsuitable videos for the given age are skipped.
300 min_views: An integer representing the minimum view count the video
301 must have in order to not be skipped.
302 Videos without view count information are always
303 downloaded. None for no limit.
304 max_views: An integer representing the maximum view count.
305 Videos that are more popular than that are not
306 downloaded.
307 Videos without view count information are always
308 downloaded. None for no limit.
309 download_archive: A set, or the name of a file where all downloads are recorded.
310 Videos already present in the file are not downloaded again.
311 break_on_existing: Stop the download process after attempting to download a
312 file that is in the archive.
313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
315 cookiefile: File name or text stream from where cookies should be read and dumped to
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/path from where cookies are loaded, the name of the keyring,
318 and the container name, e.g. ('chrome', ) or
319 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
320 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
321 support RFC 5746 secure renegotiation
322 nocheckcertificate: Do not verify SSL certificates
323 client_certificate: Path to client certificate file in PEM format. May include the private key
324 client_certificate_key: Path to private key file for client certificate
325 client_certificate_password: Password for client certificate private key, if encrypted.
326 If not provided and the key is encrypted, yt-dlp will ask interactively
327 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
328 (Only supported by some extractors)
329 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
330 http_headers: A dictionary of custom headers to be used for all requests
331 proxy: URL of the proxy server to use
332 geo_verification_proxy: URL of the proxy to use for IP address verification
333 on geo-restricted sites.
334 socket_timeout: Time to wait for unresponsive hosts, in seconds
335 bidi_workaround: Work around buggy terminals without bidirectional text
336 support, using fridibi
337 debug_printtraffic:Print out sent and received HTTP traffic
338 default_search: Prepend this string if an input url is not valid.
339 'auto' for elaborate guessing
340 encoding: Use this encoding instead of the system-specified.
341 extract_flat: Whether to resolve and process url_results further
342 * False: Always process. Default for API
343 * True: Never process
344 * 'in_playlist': Do not process inside playlist/multi_video
345 * 'discard': Always process, but don't return the result
346 from inside playlist/multi_video
347 * 'discard_in_playlist': Same as "discard", but only for
348 playlists (not multi_video). Default for CLI
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 progress_hooks: A list of functions that get called on download
359 progress, with a dictionary with the entries
360 * status: One of "downloading", "error", or "finished".
361 Check this first and ignore unknown values.
362 * info_dict: The extracted info_dict
363
364 If status is one of "downloading", or "finished", the
365 following properties may also be present:
366 * filename: The final filename (always present)
367 * tmpfilename: The filename we're currently writing to
368 * downloaded_bytes: Bytes on disk
369 * total_bytes: Size of the whole file, None if unknown
370 * total_bytes_estimate: Guess of the eventual file size,
371 None if unavailable.
372 * elapsed: The number of seconds since download started.
373 * eta: The estimated time in seconds, None if unknown
374 * speed: The download speed in bytes/second, None if
375 unknown
376 * fragment_index: The counter of the currently
377 downloaded video fragment.
378 * fragment_count: The number of fragments (= individual
379 files that will be merged)
380
381 Progress hooks are guaranteed to be called at least once
382 (with status "finished") if the download is successful.
383 postprocessor_hooks: A list of functions that get called on postprocessing
384 progress, with a dictionary with the entries
385 * status: One of "started", "processing", or "finished".
386 Check this first and ignore unknown values.
387 * postprocessor: Name of the postprocessor
388 * info_dict: The extracted info_dict
389
390 Progress hooks are guaranteed to be called at least twice
391 (with status "started" and "finished") if the processing is successful.
392 merge_output_format: "/" separated list of extensions to use when merging formats.
393 final_ext: Expected final extension; used to detect when the file was
394 already downloaded and converted
395 fixup: Automatically correct known faults of the file.
396 One of:
397 - "never": do nothing
398 - "warn": only emit a warning
399 - "detect_or_warn": check whether we can do anything
400 about it, warn otherwise (default)
401 source_address: Client-side IP address to bind to.
402 sleep_interval_requests: Number of seconds to sleep between requests
403 during extraction
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
408 max_sleep_interval.
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
425 - Raise utils.DownloadCancelled(msg) to abort remaining
426 downloads when a video is rejected.
427 match_filter_func in utils.py is one example for this.
428 color: A Dictionary with output stream names as keys
429 and their respective color policy as values.
430 Can also just be a single color policy,
431 in which case it applies to all outputs.
432 Valid stream names are 'stdout' and 'stderr'.
433 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
434 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
435 HTTP header
436 geo_bypass_country:
437 Two-letter ISO 3166-2 country code that will be used for
438 explicit geographic restriction bypassing via faking
439 X-Forwarded-For HTTP header
440 geo_bypass_ip_block:
441 IP range in CIDR notation that will be used similarly to
442 geo_bypass_country
443 external_downloader: A dictionary of protocol keys and the executable of the
444 external downloader to use for it. The allowed protocols
445 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
446 Set the value to 'native' to use the native downloader
447 compat_opts: Compatibility options. See "Differences in default behavior".
448 The following options do not work when used through the API:
449 filename, abort-on-error, multistreams, no-live-chat, format-sort
450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
451 Refer __init__.py for their implementation
452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
459 download_ranges: A callback function that gets called for every video with
460 the signature (info_dict, ydl) -> Iterable[Section].
461 Only the returned sections will be downloaded.
462 Each Section is a dict with the following keys:
463 * start_time: Start time of the section in seconds
464 * end_time: End time of the section in seconds
465 * title: Section title (Optional)
466 * index: Section number (Optional)
467 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
468 noprogress: Do not print the progress bar
469 live_from_start: Whether to download livestreams videos from the start
470
471 The following parameters are not used by YoutubeDL itself, they are used by
472 the downloader (see yt_dlp/downloader/common.py):
473 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
474 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
475 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
476 external_downloader_args, concurrent_fragment_downloads.
477
478 The following options are used by the post processors:
479 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
480 to the binary or its containing directory.
481 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
482 and a list of additional command-line arguments for the
483 postprocessor/executable. The dict can also have "PP+EXE" keys
484 which are used when the given exe is used by the given PP.
485 Use 'default' as the name for arguments to passed to all PP
486 For compatibility with youtube-dl, a single list of args
487 can also be used
488
489 The following options are used by the extractors:
490 extractor_retries: Number of times to retry for known errors (default: 3)
491 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
492 hls_split_discontinuity: Split HLS playlists to different formats at
493 discontinuities such as ad breaks (default: False)
494 extractor_args: A dictionary of arguments to be passed to the extractors.
495 See "EXTRACTOR ARGUMENTS" for details.
496 E.g. {'youtube': {'skip': ['dash', 'hls']}}
497 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
498
499 The following options are deprecated and may be removed in the future:
500
501 break_on_reject: Stop the download process when encountering a video that
502 has been filtered out.
503 - `raise DownloadCancelled(msg)` in match_filter instead
504 force_generic_extractor: Force downloader to use the generic extractor
505 - Use allowed_extractors = ['generic', 'default']
506 playliststart: - Use playlist_items
507 Playlist item to start at.
508 playlistend: - Use playlist_items
509 Playlist item to end at.
510 playlistreverse: - Use playlist_items
511 Download playlist items in reverse order.
512 forceurl: - Use forceprint
513 Force printing final URL.
514 forcetitle: - Use forceprint
515 Force printing title.
516 forceid: - Use forceprint
517 Force printing ID.
518 forcethumbnail: - Use forceprint
519 Force printing thumbnail URL.
520 forcedescription: - Use forceprint
521 Force printing description.
522 forcefilename: - Use forceprint
523 Force printing final filename.
524 forceduration: - Use forceprint
525 Force printing duration.
526 allsubtitles: - Use subtitleslangs = ['all']
527 Downloads all the subtitles of the video
528 (requires writesubtitles or writeautomaticsub)
529 include_ads: - Doesn't work
530 Download ads as well
531 call_home: - Not implemented
532 Boolean, true iff we are allowed to contact the
533 yt-dlp servers for debugging.
534 post_hooks: - Register a custom postprocessor
535 A list of functions that get called as the final step
536 for each video file, after all postprocessors have been
537 called. The filename will be passed as the only argument.
538 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
539 Use the native HLS downloader instead of ffmpeg/avconv
540 if True, otherwise use ffmpeg/avconv if False, otherwise
541 use downloader suggested by extractor if None.
542 prefer_ffmpeg: - avconv support is deprecated
543 If False, use avconv instead of ffmpeg if both are available,
544 otherwise prefer ffmpeg.
545 youtube_include_dash_manifest: - Use extractor_args
546 If True (default), DASH manifests and related
547 data will be downloaded and processed by extractor.
548 You can reduce network I/O by disabling it if you don't
549 care about DASH. (only for youtube)
550 youtube_include_hls_manifest: - Use extractor_args
551 If True (default), HLS manifests and related
552 data will be downloaded and processed by extractor.
553 You can reduce network I/O by disabling it if you don't
554 care about HLS. (only for youtube)
555 no_color: Same as `color='no_color'`
556 """
557
558 _NUMERIC_FIELDS = {
559 'width', 'height', 'asr', 'audio_channels', 'fps',
560 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
561 'timestamp', 'release_timestamp',
562 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
563 'average_rating', 'comment_count', 'age_limit',
564 'start_time', 'end_time',
565 'chapter_number', 'season_number', 'episode_number',
566 'track_number', 'disc_number', 'release_year',
567 }
568
569 _format_fields = {
570 # NB: Keep in sync with the docstring of extractor/common.py
571 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
572 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
573 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
574 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
575 'preference', 'language', 'language_preference', 'quality', 'source_preference',
576 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
577 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
578 }
579 _format_selection_exts = {
580 'audio': set(MEDIA_EXTENSIONS.common_audio),
581 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
582 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
583 }
584
585 def __init__(self, params=None, auto_init=True):
586 """Create a FileDownloader object with the given options.
587 @param auto_init Whether to load the default extractors and print header (if verbose).
588 Set to 'no_verbose_header' to not print the header
589 """
590 if params is None:
591 params = {}
592 self.params = params
593 self._ies = {}
594 self._ies_instances = {}
595 self._pps = {k: [] for k in POSTPROCESS_WHEN}
596 self._printed_messages = set()
597 self._first_webpage_request = True
598 self._post_hooks = []
599 self._progress_hooks = []
600 self._postprocessor_hooks = []
601 self._download_retcode = 0
602 self._num_downloads = 0
603 self._num_videos = 0
604 self._playlist_level = 0
605 self._playlist_urls = set()
606 self.cache = Cache(self)
607
608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
615 )
616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
624 self.report_warning('Overwriting params from "color" with "no_color"')
625 self.params['color'] = 'no_color'
626
627 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
628
629 def process_color_policy(stream):
630 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
631 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
632 if policy in ('auto', None):
633 return term_allow_color and supports_terminal_sequences(stream)
634 assert policy in ('always', 'never', 'no_color')
635 return {'always': True, 'never': False}.get(policy, policy)
636
637 self._allow_colors = Namespace(**{
638 name: process_color_policy(stream)
639 for name, stream in self._out_files.items_ if name != 'console'
640 })
641
642 # The code is left like this to be reused for future deprecations
643 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
644 current_version = sys.version_info[:2]
645 if current_version < MIN_RECOMMENDED:
646 msg = ('Support for Python version %d.%d has been deprecated. '
647 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
648 '\n You will no longer receive updates on this version')
649 if current_version < MIN_SUPPORTED:
650 msg = 'Python version %d.%d is no longer supported'
651 self.deprecated_feature(
652 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
653
654 if self.params.get('allow_unplayable_formats'):
655 self.report_warning(
656 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
657 'This is a developer option intended for debugging. \n'
658 ' If you experience any issues while using this option, '
659 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
660
661 if self.params.get('bidi_workaround', False):
662 try:
663 import pty
664 master, slave = pty.openpty()
665 width = shutil.get_terminal_size().columns
666 width_args = [] if width is None else ['-w', str(width)]
667 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
668 try:
669 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
670 except OSError:
671 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
672 self._output_channel = os.fdopen(master, 'rb')
673 except OSError as ose:
674 if ose.errno == errno.ENOENT:
675 self.report_warning(
676 'Could not find fribidi executable, ignoring --bidi-workaround. '
677 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
678 else:
679 raise
680
681 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
682 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
683 self._request_director = self.build_request_director(
684 sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
685 if auto_init and auto_init != 'no_verbose_header':
686 self.print_debug_header()
687
688 self.__header_cookies = []
689 self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
690
691 def check_deprecated(param, option, suggestion):
692 if self.params.get(param) is not None:
693 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
694 return True
695 return False
696
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self.params.get('geo_verification_proxy') is None:
699 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
700
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
704
705 for msg in self.params.get('_warnings', []):
706 self.report_warning(msg)
707 for msg in self.params.get('_deprecation_warnings', []):
708 self.deprecated_feature(msg)
709
710 if 'list-formats' in self.params['compat_opts']:
711 self.params['listformats_table'] = False
712
713 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self.params['overwrites'] = not self.params['nooverwrites']
718 elif self.params.get('overwrites') is None:
719 self.params.pop('overwrites', None)
720 else:
721 self.params['nooverwrites'] = not self.params['overwrites']
722
723 if self.params.get('simulate') is None and any((
724 self.params.get('list_thumbnails'),
725 self.params.get('listformats'),
726 self.params.get('listsubtitles'),
727 )):
728 self.params['simulate'] = 'list_only'
729
730 self.params.setdefault('forceprint', {})
731 self.params.setdefault('print_to_file', {})
732
733 # Compatibility with older syntax
734 if not isinstance(params['forceprint'], dict):
735 self.params['forceprint'] = {'video': params['forceprint']}
736
737 if auto_init:
738 self.add_default_info_extractors()
739
740 if (sys.platform != 'win32'
741 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self.params.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
744 self.report_warning(
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self.params['restrictfilenames'] = True
749
750 self._parse_outtmpl()
751
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self.format_selector = (
754 self.params.get('format') if self.params.get('format') in (None, '-')
755 else self.params['format'] if callable(self.params['format'])
756 else self.build_format_selector(self.params['format']))
757
758 hooks = {
759 'post_hooks': self.add_post_hook,
760 'progress_hooks': self.add_progress_hook,
761 'postprocessor_hooks': self.add_postprocessor_hook,
762 }
763 for opt, fn in hooks.items():
764 for ph in self.params.get(opt, []):
765 fn(ph)
766
767 for pp_def_raw in self.params.get('postprocessors', []):
768 pp_def = dict(pp_def_raw)
769 when = pp_def.pop('when', 'post_process')
770 self.add_post_processor(
771 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
772 when=when)
773
774 def preload_download_archive(fn):
775 """Preload the archive, if any is specified"""
776 archive = set()
777 if fn is None:
778 return archive
779 elif not is_path_like(fn):
780 return fn
781
782 self.write_debug(f'Loading archive file {fn!r}')
783 try:
784 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
785 for line in archive_file:
786 archive.add(line.strip())
787 except OSError as ioe:
788 if ioe.errno != errno.ENOENT:
789 raise
790 return archive
791
792 self.archive = preload_download_archive(self.params.get('download_archive'))
793
794 def warn_if_short_id(self, argv):
795 # short YouTube ID starting with dash?
796 idxs = [
797 i for i, a in enumerate(argv)
798 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
799 if idxs:
800 correct_argv = (
801 ['yt-dlp']
802 + [a for i, a in enumerate(argv) if i not in idxs]
803 + ['--'] + [argv[i] for i in idxs]
804 )
805 self.report_warning(
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv))
809
810 def add_info_extractor(self, ie):
811 """Add an InfoExtractor object to the end of the list."""
812 ie_key = ie.ie_key()
813 self._ies[ie_key] = ie
814 if not isinstance(ie, type):
815 self._ies_instances[ie_key] = ie
816 ie.set_downloader(self)
817
818 def get_info_extractor(self, ie_key):
819 """
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
823 """
824 ie = self._ies_instances.get(ie_key)
825 if ie is None:
826 ie = get_info_extractor(ie_key)()
827 self.add_info_extractor(ie)
828 return ie
829
830 def add_default_info_extractors(self):
831 """
832 Add the InfoExtractors returned by gen_extractors to the end of the list
833 """
834 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies['end'] = UnsupportedURLIE()
836 try:
837 ie_names = orderedSet_from_options(
838 self.params.get('allowed_extractors', ['default']), {
839 'all': list(all_ies),
840 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
841 }, use_regex=True)
842 except re.error as e:
843 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
844 for name in ie_names:
845 self.add_info_extractor(all_ies[name])
846 self.write_debug(f'Loaded {len(ie_names)} extractors')
847
848 def add_post_processor(self, pp, when='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
851 self._pps[when].append(pp)
852 pp.set_downloader(self)
853
854 def add_post_hook(self, ph):
855 """Add the post hook"""
856 self._post_hooks.append(ph)
857
858 def add_progress_hook(self, ph):
859 """Add the download progress hook"""
860 self._progress_hooks.append(ph)
861
862 def add_postprocessor_hook(self, ph):
863 """Add the postprocessing progress hook"""
864 self._postprocessor_hooks.append(ph)
865 for pps in self._pps.values():
866 for pp in pps:
867 pp.add_progress_hook(ph)
868
869 def _bidi_workaround(self, message):
870 if not hasattr(self, '_output_channel'):
871 return message
872
873 assert hasattr(self, '_output_process')
874 assert isinstance(message, str)
875 line_count = message.count('\n') + 1
876 self._output_process.stdin.write((message + '\n').encode())
877 self._output_process.stdin.flush()
878 res = ''.join(self._output_channel.readline().decode()
879 for _ in range(line_count))
880 return res[:-len('\n')]
881
882 def _write_string(self, message, out=None, only_once=False):
883 if only_once:
884 if message in self._printed_messages:
885 return
886 self._printed_messages.add(message)
887 write_string(message, out=out, encoding=self.params.get('encoding'))
888
889 def to_stdout(self, message, skip_eol=False, quiet=None):
890 """Print message to stdout"""
891 if quiet is not None:
892 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol is not False:
895 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
898
899 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
900 """Print message to screen if not in quiet mode"""
901 if self.params.get('logger'):
902 self.params['logger'].debug(message)
903 return
904 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
905 return
906 self._write_string(
907 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
908 self._out_files.screen, only_once=only_once)
909
910 def to_stderr(self, message, only_once=False):
911 """Print message to stderr"""
912 assert isinstance(message, str)
913 if self.params.get('logger'):
914 self.params['logger'].error(message)
915 else:
916 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
917
918 def _send_console_code(self, code):
919 if compat_os_name == 'nt' or not self._out_files.console:
920 return
921 self._write_string(code, self._out_files.console)
922
923 def to_console_title(self, message):
924 if not self.params.get('consoletitle', False):
925 return
926 message = remove_terminal_sequences(message)
927 if compat_os_name == 'nt':
928 if ctypes.windll.kernel32.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
932 else:
933 self._send_console_code(f'\033]0;{message}\007')
934
935 def save_console_title(self):
936 if not self.params.get('consoletitle') or self.params.get('simulate'):
937 return
938 self._send_console_code('\033[22;0t') # Save the title on stack
939
940 def restore_console_title(self):
941 if not self.params.get('consoletitle') or self.params.get('simulate'):
942 return
943 self._send_console_code('\033[23;0t') # Restore the title from stack
944
945 def __enter__(self):
946 self.save_console_title()
947 return self
948
949 def save_cookies(self):
950 if self.params.get('cookiefile') is not None:
951 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
952
953 def __exit__(self, *args):
954 self.restore_console_title()
955 self.close()
956
957 def close(self):
958 self.save_cookies()
959 self._request_director.close()
960
961 def trouble(self, message=None, tb=None, is_error=True):
962 """Determine action to take when a download problem appears.
963
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
967
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
970 """
971 if message is not None:
972 self.to_stderr(message)
973 if self.params.get('verbose'):
974 if tb is None:
975 if sys.exc_info()[0]: # if .trouble has been called from an except block
976 tb = ''
977 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
978 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
979 tb += encode_compat_str(traceback.format_exc())
980 else:
981 tb_data = traceback.format_list(traceback.extract_stack())
982 tb = ''.join(tb_data)
983 if tb:
984 self.to_stderr(tb)
985 if not is_error:
986 return
987 if not self.params.get('ignoreerrors'):
988 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
989 exc_info = sys.exc_info()[1].exc_info
990 else:
991 exc_info = sys.exc_info()
992 raise DownloadError(message, exc_info)
993 self._download_retcode = 1
994
995 Styles = Namespace(
996 HEADERS='yellow',
997 EMPHASIS='light blue',
998 FILENAME='green',
999 ID='green',
1000 DELIM='blue',
1001 ERROR='red',
1002 BAD_FORMAT='light red',
1003 WARNING='yellow',
1004 SUPPRESS='light black',
1005 )
1006
1007 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1008 text = str(text)
1009 if test_encoding:
1010 original_text = text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1013 text = text.encode(encoding, 'ignore').decode(encoding)
1014 if fallback is not None and text != original_text:
1015 text = fallback
1016 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1017
1018 def _format_out(self, *args, **kwargs):
1019 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1020
1021 def _format_screen(self, *args, **kwargs):
1022 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1023
1024 def _format_err(self, *args, **kwargs):
1025 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1026
1027 def report_warning(self, message, only_once=False):
1028 '''
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1031 '''
1032 if self.params.get('logger') is not None:
1033 self.params['logger'].warning(message)
1034 else:
1035 if self.params.get('no_warnings'):
1036 return
1037 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1038
1039 def deprecation_warning(self, message, *, stacklevel=0):
1040 deprecation_warning(
1041 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1042
1043 def deprecated_feature(self, message):
1044 if self.params.get('logger') is not None:
1045 self.params['logger'].warning(f'Deprecated Feature: {message}')
1046 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1047
1048 def report_error(self, message, *args, **kwargs):
1049 '''
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1052 '''
1053 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1054
1055 def write_debug(self, message, only_once=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self.params.get('verbose', False):
1058 return
1059 message = f'[debug] {message}'
1060 if self.params.get('logger'):
1061 self.params['logger'].debug(message)
1062 else:
1063 self.to_stderr(message, only_once)
1064
1065 def report_file_already_downloaded(self, file_name):
1066 """Report file has already been fully downloaded."""
1067 try:
1068 self.to_screen('[download] %s has already been downloaded' % file_name)
1069 except UnicodeEncodeError:
1070 self.to_screen('[download] The file has already been downloaded')
1071
1072 def report_file_delete(self, file_name):
1073 """Report that existing file will be deleted."""
1074 try:
1075 self.to_screen('Deleting existing file %s' % file_name)
1076 except UnicodeEncodeError:
1077 self.to_screen('Deleting existing file')
1078
1079 def raise_no_formats(self, info, forced=False, *, msg=None):
1080 has_drm = info.get('_has_drm')
1081 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1082 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1083 if forced or not ignored:
1084 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1085 expected=has_drm or ignored or expected)
1086 else:
1087 self.report_warning(msg)
1088
1089 def parse_outtmpl(self):
1090 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self._parse_outtmpl()
1092 return self.params['outtmpl']
1093
1094 def _parse_outtmpl(self):
1095 sanitize = IDENTITY
1096 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1098
1099 outtmpl = self.params.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl, dict):
1101 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1102 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1103
1104 def get_output_path(self, dir_type='', filename=None):
1105 paths = self.params.get('paths', {})
1106 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1107 path = os.path.join(
1108 expand_path(paths.get('home', '').strip()),
1109 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1110 filename or '')
1111 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1112
1113 @staticmethod
1114 def _outtmpl_expandpath(outtmpl):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep = ''.join(random.choices(string.ascii_letters, k=32))
1120 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1121
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl).replace(sep, '')
1127
1128 @staticmethod
1129 def escape_outtmpl(outtmpl):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1131 return re.sub(
1132 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1133 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1134 outtmpl)
1135
1136 @classmethod
1137 def validate_outtmpl(cls, outtmpl):
1138 ''' @return None or Exception object '''
1139 outtmpl = re.sub(
1140 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj: f'{mobj.group(0)[:-1]}s',
1142 cls._outtmpl_expandpath(outtmpl))
1143 try:
1144 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1145 return None
1146 except ValueError as err:
1147 return err
1148
1149 @staticmethod
1150 def _copy_infodict(info_dict):
1151 info_dict = dict(info_dict)
1152 info_dict.pop('__postprocessors', None)
1153 info_dict.pop('__pending_error', None)
1154 return info_dict
1155
1156 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1160 """
1161
1162 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1163
1164 info_dict = self._copy_infodict(info_dict)
1165 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1167 if info_dict.get('duration', None) is not None
1168 else None)
1169 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1170 info_dict['video_autonumber'] = self._num_videos
1171 if info_dict.get('resolution') is None:
1172 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1173
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map = {
1177 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1179 'autonumber': self.params.get('autonumber_size') or 5,
1180 }
1181
1182 TMPL_DICT = {}
1183 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1184 MATH_FUNCTIONS = {
1185 '+': float.__add__,
1186 '-': float.__sub__,
1187 }
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE,
1193 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1194 }
1195 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1197 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1198 (?P<negate>-)?
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1202 (?P<remaining>
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1206 )$''')
1207
1208 def _traverse_infodict(fields):
1209 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1210 for f in ([x] if x.startswith('{') else x.split('.'))]
1211 for i in (0, -1):
1212 if fields and not fields[i]:
1213 fields.pop(i)
1214
1215 for i, f in enumerate(fields):
1216 if not f.startswith('{'):
1217 continue
1218 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1219 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1220
1221 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1222
1223 def get_value(mdict):
1224 # Object traversal
1225 value = _traverse_infodict(mdict['fields'])
1226 # Negative
1227 if mdict['negate']:
1228 value = float_or_none(value)
1229 if value is not None:
1230 value *= -1
1231 # Do maths
1232 offset_key = mdict['maths']
1233 if offset_key:
1234 value = float_or_none(value)
1235 operator = None
1236 while offset_key:
1237 item = re.match(
1238 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1239 offset_key).group(0)
1240 offset_key = offset_key[len(item):]
1241 if operator is None:
1242 operator = MATH_FUNCTIONS[item]
1243 continue
1244 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1245 offset = float_or_none(item)
1246 if offset is None:
1247 offset = float_or_none(_traverse_infodict(item))
1248 try:
1249 value = operator(value, multiplier * offset)
1250 except (TypeError, ZeroDivisionError):
1251 return None
1252 operator = None
1253 # Datetime formatting
1254 if mdict['strf_format']:
1255 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1256
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize and value == '':
1259 value = None
1260 return value
1261
1262 na = self.params.get('outtmpl_na_placeholder', 'NA')
1263
1264 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1265 return sanitize_filename(str(value), restricted=restricted, is_id=(
1266 bool(re.search(r'(^|[_.])id(\.|$)', key))
1267 if 'filename-sanitization' in self.params['compat_opts']
1268 else NO_DEFAULT))
1269
1270 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1271 sanitize = bool(sanitize)
1272
1273 def _dumpjson_default(obj):
1274 if isinstance(obj, (set, LazyList)):
1275 return list(obj)
1276 return repr(obj)
1277
1278 class _ReplacementFormatter(string.Formatter):
1279 def get_field(self, field_name, args, kwargs):
1280 if field_name.isdigit():
1281 return args[0], -1
1282 raise ValueError('Unsupported field')
1283
1284 replacement_formatter = _ReplacementFormatter()
1285
1286 def create_key(outer_mobj):
1287 if not outer_mobj.group('has_key'):
1288 return outer_mobj.group(0)
1289 key = outer_mobj.group('key')
1290 mobj = re.match(INTERNAL_FORMAT_RE, key)
1291 value, replacement, default, last_field = None, None, na, ''
1292 while mobj:
1293 mobj = mobj.groupdict()
1294 default = mobj['default'] if mobj['default'] is not None else default
1295 value = get_value(mobj)
1296 last_field, replacement = mobj['fields'], mobj['replacement']
1297 if value is None and mobj['alternate']:
1298 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1299 else:
1300 break
1301
1302 fmt = outer_mobj.group('format')
1303 if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1304 fmt = f'0{field_size_compat_map[last_field]:d}d'
1305
1306 if None not in (value, replacement):
1307 try:
1308 value = replacement_formatter.format(replacement, value)
1309 except ValueError:
1310 value, default = None, na
1311
1312 flags = outer_mobj.group('conversion') or ''
1313 str_fmt = f'{fmt[:-1]}s'
1314 if value is None:
1315 value, fmt = default, 's'
1316 elif fmt[-1] == 'l': # list
1317 delim = '\n' if '#' in flags else ', '
1318 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1319 elif fmt[-1] == 'j': # json
1320 value, fmt = json.dumps(
1321 value, default=_dumpjson_default,
1322 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1323 elif fmt[-1] == 'h': # html
1324 value, fmt = escapeHTML(str(value)), str_fmt
1325 elif fmt[-1] == 'q': # quoted
1326 value = map(str, variadic(value) if '#' in flags else [value])
1327 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1328 elif fmt[-1] == 'B': # bytes
1329 value = f'%{str_fmt}'.encode() % str(value).encode()
1330 value, fmt = value.decode('utf-8', 'ignore'), 's'
1331 elif fmt[-1] == 'U': # unicode normalized
1332 value, fmt = unicodedata.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1335 value), str_fmt
1336 elif fmt[-1] == 'D': # decimal suffix
1337 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1338 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1339 factor=1024 if '#' in flags else 1000)
1340 elif fmt[-1] == 'S': # filename sanitization
1341 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1342 elif fmt[-1] == 'c':
1343 if value:
1344 value = str(value)[0]
1345 else:
1346 fmt = str_fmt
1347 elif fmt[-1] not in 'rsa': # numeric
1348 value = float_or_none(value)
1349 if value is None:
1350 value, fmt = default, 's'
1351
1352 if sanitize:
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1355 if fmt[-1] == 'r':
1356 value, fmt = repr(value), str_fmt
1357 elif fmt[-1] == 'a':
1358 value, fmt = ascii(value), str_fmt
1359 if fmt[-1] in 'csra':
1360 value = sanitizer(last_field, value)
1361
1362 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1363 TMPL_DICT[key] = value
1364 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1365
1366 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1367
1368 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1369 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1370 return self.escape_outtmpl(outtmpl) % info_dict
1371
1372 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1373 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1374 if outtmpl is None:
1375 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1376 try:
1377 outtmpl = self._outtmpl_expandpath(outtmpl)
1378 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1379 if not filename:
1380 return None
1381
1382 if tmpl_type in ('', 'temp'):
1383 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1384 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1385 filename = replace_extension(filename, ext, final_ext)
1386 elif tmpl_type:
1387 force_ext = OUTTMPL_TYPES[tmpl_type]
1388 if force_ext:
1389 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1390
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name = self.params.get('trim_file_name', False)
1393 if trim_file_name:
1394 no_ext, *ext = filename.rsplit('.', 2)
1395 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1396
1397 return filename
1398 except ValueError as err:
1399 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1400 return None
1401
1402 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1403 """Generate the output filename"""
1404 if outtmpl:
1405 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1406 dir_type = None
1407 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1408 if not filename and dir_type not in ('', 'temp'):
1409 return ''
1410
1411 if warn:
1412 if not self.params.get('paths'):
1413 pass
1414 elif filename == '-':
1415 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1416 elif os.path.isabs(filename):
1417 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1418 if filename == '-' or not filename:
1419 return filename
1420
1421 return self.get_output_path(dir_type, filename)
1422
1423 def _match_entry(self, info_dict, incomplete=False, silent=False):
1424 """Returns None if the file should be downloaded"""
1425 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1426 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1427
1428 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1429
1430 def check_filter():
1431 if _type in ('playlist', 'multi_video'):
1432 return
1433 elif _type in ('url', 'url_transparent') and not try_call(
1434 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1435 return
1436
1437 if 'title' in info_dict:
1438 # This can happen when we're just evaluating the playlist
1439 title = info_dict['title']
1440 matchtitle = self.params.get('matchtitle', False)
1441 if matchtitle:
1442 if not re.search(matchtitle, title, re.IGNORECASE):
1443 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1444 rejecttitle = self.params.get('rejecttitle', False)
1445 if rejecttitle:
1446 if re.search(rejecttitle, title, re.IGNORECASE):
1447 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1448
1449 date = info_dict.get('upload_date')
1450 if date is not None:
1451 dateRange = self.params.get('daterange', DateRange())
1452 if date not in dateRange:
1453 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count = info_dict.get('view_count')
1455 if view_count is not None:
1456 min_views = self.params.get('min_views')
1457 if min_views is not None and view_count < min_views:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1459 max_views = self.params.get('max_views')
1460 if max_views is not None and view_count > max_views:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1462 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1464
1465 match_filter = self.params.get('match_filter')
1466 if match_filter is None:
1467 return None
1468
1469 cancelled = None
1470 try:
1471 try:
1472 ret = match_filter(info_dict, incomplete=incomplete)
1473 except TypeError:
1474 # For backward compatibility
1475 ret = None if incomplete else match_filter(info_dict)
1476 except DownloadCancelled as err:
1477 if err.msg is not NO_DEFAULT:
1478 raise
1479 ret, cancelled = err.msg, err
1480
1481 if ret is NO_DEFAULT:
1482 while True:
1483 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1484 reply = input(self._format_screen(
1485 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1486 if reply in {'y', ''}:
1487 return None
1488 elif reply == 'n':
1489 if cancelled:
1490 raise type(cancelled)(f'Skipping {video_title}')
1491 return f'Skipping {video_title}'
1492 return ret
1493
1494 if self.in_download_archive(info_dict):
1495 reason = '%s has already been recorded in the archive' % video_title
1496 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1497 else:
1498 try:
1499 reason = check_filter()
1500 except DownloadCancelled as e:
1501 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1502 else:
1503 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1504 if reason is not None:
1505 if not silent:
1506 self.to_screen('[download] ' + reason)
1507 if self.params.get(break_opt, False):
1508 raise break_err()
1509 return reason
1510
1511 @staticmethod
1512 def add_extra_info(info_dict, extra_info):
1513 '''Set the keys from extra_info in info dict if they are missing'''
1514 for key, value in extra_info.items():
1515 info_dict.setdefault(key, value)
1516
1517 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1518 process=True, force_generic_extractor=False):
1519 """
1520 Extract and return the information dictionary of the URL
1521
1522 Arguments:
1523 @param url URL to extract
1524
1525 Keyword arguments:
1526 @param download Whether to download videos
1527 @param process Whether to resolve all unresolved references (URLs, playlist items).
1528 Must be True for download to work
1529 @param ie_key Use only the extractor with this key
1530
1531 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1532 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1533 """
1534
1535 if extra_info is None:
1536 extra_info = {}
1537
1538 if not ie_key and force_generic_extractor:
1539 ie_key = 'Generic'
1540
1541 if ie_key:
1542 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1543 else:
1544 ies = self._ies
1545
1546 for key, ie in ies.items():
1547 if not ie.suitable(url):
1548 continue
1549
1550 if not ie.working():
1551 self.report_warning('The program functionality for this site has been marked as broken, '
1552 'and will probably not work.')
1553
1554 temp_id = ie.get_temp_id(url)
1555 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1556 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1557 if self.params.get('break_on_existing', False):
1558 raise ExistingVideoReached()
1559 break
1560 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1561 else:
1562 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1563 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1564 tb=False if extractors_restricted else None)
1565
1566 def _handle_extraction_exceptions(func):
1567 @functools.wraps(func)
1568 def wrapper(self, *args, **kwargs):
1569 while True:
1570 try:
1571 return func(self, *args, **kwargs)
1572 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1573 raise
1574 except ReExtractInfo as e:
1575 if e.expected:
1576 self.to_screen(f'{e}; Re-extracting data')
1577 else:
1578 self.to_stderr('\r')
1579 self.report_warning(f'{e}; Re-extracting data')
1580 continue
1581 except GeoRestrictedError as e:
1582 msg = e.msg
1583 if e.countries:
1584 msg += '\nThis video is available in %s.' % ', '.join(
1585 map(ISO3166Utils.short2full, e.countries))
1586 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1587 self.report_error(msg)
1588 except ExtractorError as e: # An error we somewhat expected
1589 self.report_error(str(e), e.format_traceback())
1590 except Exception as e:
1591 if self.params.get('ignoreerrors'):
1592 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1593 else:
1594 raise
1595 break
1596 return wrapper
1597
1598 def _wait_for_video(self, ie_result={}):
1599 if (not self.params.get('wait_for_video')
1600 or ie_result.get('_type', 'video') != 'video'
1601 or ie_result.get('formats') or ie_result.get('url')):
1602 return
1603
1604 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1605 last_msg = ''
1606
1607 def progress(msg):
1608 nonlocal last_msg
1609 full_msg = f'{msg}\n'
1610 if not self.params.get('noprogress'):
1611 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1612 elif last_msg:
1613 return
1614 self.to_screen(full_msg, skip_eol=True)
1615 last_msg = msg
1616
1617 min_wait, max_wait = self.params.get('wait_for_video')
1618 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1619 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1620 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1621 self.report_warning('Release time of video is not known')
1622 elif ie_result and (diff or 0) <= 0:
1623 self.report_warning('Video should already be available according to extracted info')
1624 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1625 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1626
1627 wait_till = time.time() + diff
1628 try:
1629 while True:
1630 diff = wait_till - time.time()
1631 if diff <= 0:
1632 progress('')
1633 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1634 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1635 time.sleep(1)
1636 except KeyboardInterrupt:
1637 progress('')
1638 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1639 except BaseException as e:
1640 if not isinstance(e, ReExtractInfo):
1641 self.to_screen('')
1642 raise
1643
1644 def _load_cookies(self, data, *, from_headers=True):
1645 """Loads cookies from a `Cookie` header
1646
1647 This tries to work around the security vulnerability of passing cookies to every domain.
1648 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1649 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1650
1651 @param data The Cookie header as string to load the cookies from
1652 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1653 """
1654 for cookie in LenientSimpleCookie(data).values():
1655 if from_headers and any(cookie.values()):
1656 raise ValueError('Invalid syntax in Cookie Header')
1657
1658 domain = cookie.get('domain') or ''
1659 expiry = cookie.get('expires')
1660 if expiry == '': # 0 is valid
1661 expiry = None
1662 prepared_cookie = http.cookiejar.Cookie(
1663 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1664 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1665 cookie.get('secure') or False, expiry, False, None, None, {})
1666
1667 if domain:
1668 self.cookiejar.set_cookie(prepared_cookie)
1669 elif from_headers:
1670 self.deprecated_feature(
1671 'Passing cookies as a header is a potential security risk; '
1672 'they will be scoped to the domain of the downloaded urls. '
1673 'Please consider loading cookies from a file or browser instead.')
1674 self.__header_cookies.append(prepared_cookie)
1675 else:
1676 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1677 tb=False, is_error=False)
1678
1679 def _apply_header_cookies(self, url):
1680 """Applies stray header cookies to the provided url
1681
1682 This loads header cookies and scopes them to the domain provided in `url`.
1683 While this is not ideal, it helps reduce the risk of them being sent
1684 to an unintended destination while mostly maintaining compatibility.
1685 """
1686 parsed = urllib.parse.urlparse(url)
1687 if not parsed.hostname:
1688 return
1689
1690 for cookie in map(copy.copy, self.__header_cookies):
1691 cookie.domain = f'.{parsed.hostname}'
1692 self.cookiejar.set_cookie(cookie)
1693
1694 @_handle_extraction_exceptions
1695 def __extract_info(self, url, ie, download, extra_info, process):
1696 self._apply_header_cookies(url)
1697
1698 try:
1699 ie_result = ie.extract(url)
1700 except UserNotLive as e:
1701 if process:
1702 if self.params.get('wait_for_video'):
1703 self.report_warning(e)
1704 self._wait_for_video()
1705 raise
1706 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1707 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1708 return
1709 if isinstance(ie_result, list):
1710 # Backwards compatibility: old IE result format
1711 ie_result = {
1712 '_type': 'compat_list',
1713 'entries': ie_result,
1714 }
1715 if extra_info.get('original_url'):
1716 ie_result.setdefault('original_url', extra_info['original_url'])
1717 self.add_default_extra_info(ie_result, ie, url)
1718 if process:
1719 self._wait_for_video(ie_result)
1720 return self.process_ie_result(ie_result, download, extra_info)
1721 else:
1722 return ie_result
1723
1724 def add_default_extra_info(self, ie_result, ie, url):
1725 if url is not None:
1726 self.add_extra_info(ie_result, {
1727 'webpage_url': url,
1728 'original_url': url,
1729 })
1730 webpage_url = ie_result.get('webpage_url')
1731 if webpage_url:
1732 self.add_extra_info(ie_result, {
1733 'webpage_url_basename': url_basename(webpage_url),
1734 'webpage_url_domain': get_domain(webpage_url),
1735 })
1736 if ie is not None:
1737 self.add_extra_info(ie_result, {
1738 'extractor': ie.IE_NAME,
1739 'extractor_key': ie.ie_key(),
1740 })
1741
1742 def process_ie_result(self, ie_result, download=True, extra_info=None):
1743 """
1744 Take the result of the ie(may be modified) and resolve all unresolved
1745 references (URLs, playlist items).
1746
1747 It will also download the videos if 'download'.
1748 Returns the resolved ie_result.
1749 """
1750 if extra_info is None:
1751 extra_info = {}
1752 result_type = ie_result.get('_type', 'video')
1753
1754 if result_type in ('url', 'url_transparent'):
1755 ie_result['url'] = sanitize_url(
1756 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1757 if ie_result.get('original_url') and not extra_info.get('original_url'):
1758 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1759
1760 extract_flat = self.params.get('extract_flat', False)
1761 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1762 or extract_flat is True):
1763 info_copy = ie_result.copy()
1764 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1765 if ie and not ie_result.get('id'):
1766 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1767 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1768 self.add_extra_info(info_copy, extra_info)
1769 info_copy, _ = self.pre_process(info_copy)
1770 self._fill_common_fields(info_copy, False)
1771 self.__forced_printings(info_copy)
1772 self._raise_pending_errors(info_copy)
1773 if self.params.get('force_write_download_archive', False):
1774 self.record_download_archive(info_copy)
1775 return ie_result
1776
1777 if result_type == 'video':
1778 self.add_extra_info(ie_result, extra_info)
1779 ie_result = self.process_video_result(ie_result, download=download)
1780 self._raise_pending_errors(ie_result)
1781 additional_urls = (ie_result or {}).get('additional_urls')
1782 if additional_urls:
1783 # TODO: Improve MetadataParserPP to allow setting a list
1784 if isinstance(additional_urls, str):
1785 additional_urls = [additional_urls]
1786 self.to_screen(
1787 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1788 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1789 ie_result['additional_entries'] = [
1790 self.extract_info(
1791 url, download, extra_info=extra_info,
1792 force_generic_extractor=self.params.get('force_generic_extractor'))
1793 for url in additional_urls
1794 ]
1795 return ie_result
1796 elif result_type == 'url':
1797 # We have to add extra_info to the results because it may be
1798 # contained in a playlist
1799 return self.extract_info(
1800 ie_result['url'], download,
1801 ie_key=ie_result.get('ie_key'),
1802 extra_info=extra_info)
1803 elif result_type == 'url_transparent':
1804 # Use the information from the embedding page
1805 info = self.extract_info(
1806 ie_result['url'], ie_key=ie_result.get('ie_key'),
1807 extra_info=extra_info, download=False, process=False)
1808
1809 # extract_info may return None when ignoreerrors is enabled and
1810 # extraction failed with an error, don't crash and return early
1811 # in this case
1812 if not info:
1813 return info
1814
1815 exempted_fields = {'_type', 'url', 'ie_key'}
1816 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1817 # For video clips, the id etc of the clip extractor should be used
1818 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1819
1820 new_result = info.copy()
1821 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1822
1823 # Extracted info may not be a video result (i.e.
1824 # info.get('_type', 'video') != video) but rather an url or
1825 # url_transparent. In such cases outer metadata (from ie_result)
1826 # should be propagated to inner one (info). For this to happen
1827 # _type of info should be overridden with url_transparent. This
1828 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1829 if new_result.get('_type') == 'url':
1830 new_result['_type'] = 'url_transparent'
1831
1832 return self.process_ie_result(
1833 new_result, download=download, extra_info=extra_info)
1834 elif result_type in ('playlist', 'multi_video'):
1835 # Protect from infinite recursion due to recursively nested playlists
1836 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1837 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1838 if webpage_url and webpage_url in self._playlist_urls:
1839 self.to_screen(
1840 '[download] Skipping already downloaded playlist: %s'
1841 % ie_result.get('title') or ie_result.get('id'))
1842 return
1843
1844 self._playlist_level += 1
1845 self._playlist_urls.add(webpage_url)
1846 self._fill_common_fields(ie_result, False)
1847 self._sanitize_thumbnails(ie_result)
1848 try:
1849 return self.__process_playlist(ie_result, download)
1850 finally:
1851 self._playlist_level -= 1
1852 if not self._playlist_level:
1853 self._playlist_urls.clear()
1854 elif result_type == 'compat_list':
1855 self.report_warning(
1856 'Extractor %s returned a compat_list result. '
1857 'It needs to be updated.' % ie_result.get('extractor'))
1858
1859 def _fixup(r):
1860 self.add_extra_info(r, {
1861 'extractor': ie_result['extractor'],
1862 'webpage_url': ie_result['webpage_url'],
1863 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1864 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1865 'extractor_key': ie_result['extractor_key'],
1866 })
1867 return r
1868 ie_result['entries'] = [
1869 self.process_ie_result(_fixup(r), download, extra_info)
1870 for r in ie_result['entries']
1871 ]
1872 return ie_result
1873 else:
1874 raise Exception('Invalid result type: %s' % result_type)
1875
1876 def _ensure_dir_exists(self, path):
1877 return make_dir(path, self.report_error)
1878
1879 @staticmethod
1880 def _playlist_infodict(ie_result, strict=False, **kwargs):
1881 info = {
1882 'playlist_count': ie_result.get('playlist_count'),
1883 'playlist': ie_result.get('title') or ie_result.get('id'),
1884 'playlist_id': ie_result.get('id'),
1885 'playlist_title': ie_result.get('title'),
1886 'playlist_uploader': ie_result.get('uploader'),
1887 'playlist_uploader_id': ie_result.get('uploader_id'),
1888 **kwargs,
1889 }
1890 if strict:
1891 return info
1892 if ie_result.get('webpage_url'):
1893 info.update({
1894 'webpage_url': ie_result['webpage_url'],
1895 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1896 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1897 })
1898 return {
1899 **info,
1900 'playlist_index': 0,
1901 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1902 'extractor': ie_result['extractor'],
1903 'extractor_key': ie_result['extractor_key'],
1904 }
1905
1906 def __process_playlist(self, ie_result, download):
1907 """Process each entry in the playlist"""
1908 assert ie_result['_type'] in ('playlist', 'multi_video')
1909
1910 common_info = self._playlist_infodict(ie_result, strict=True)
1911 title = common_info.get('playlist') or '<Untitled>'
1912 if self._match_entry(common_info, incomplete=True) is not None:
1913 return
1914 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1915
1916 all_entries = PlaylistEntries(self, ie_result)
1917 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1918
1919 lazy = self.params.get('lazy_playlist')
1920 if lazy:
1921 resolved_entries, n_entries = [], 'N/A'
1922 ie_result['requested_entries'], ie_result['entries'] = None, None
1923 else:
1924 entries = resolved_entries = list(entries)
1925 n_entries = len(resolved_entries)
1926 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1927 if not ie_result.get('playlist_count'):
1928 # Better to do this after potentially exhausting entries
1929 ie_result['playlist_count'] = all_entries.get_full_count()
1930
1931 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1932 ie_copy = collections.ChainMap(ie_result, extra)
1933
1934 _infojson_written = False
1935 write_playlist_files = self.params.get('allow_playlist_files', True)
1936 if write_playlist_files and self.params.get('list_thumbnails'):
1937 self.list_thumbnails(ie_result)
1938 if write_playlist_files and not self.params.get('simulate'):
1939 _infojson_written = self._write_info_json(
1940 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1941 if _infojson_written is None:
1942 return
1943 if self._write_description('playlist', ie_result,
1944 self.prepare_filename(ie_copy, 'pl_description')) is None:
1945 return
1946 # TODO: This should be passed to ThumbnailsConvertor if necessary
1947 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1948
1949 if lazy:
1950 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1951 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1952 elif self.params.get('playlistreverse'):
1953 entries.reverse()
1954 elif self.params.get('playlistrandom'):
1955 random.shuffle(entries)
1956
1957 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1958 f'{format_field(ie_result, "playlist_count", " of %s")}')
1959
1960 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1961 if self.params.get('extract_flat') == 'discard_in_playlist':
1962 keep_resolved_entries = ie_result['_type'] != 'playlist'
1963 if keep_resolved_entries:
1964 self.write_debug('The information of all playlist entries will be held in memory')
1965
1966 failures = 0
1967 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1968 for i, (playlist_index, entry) in enumerate(entries):
1969 if lazy:
1970 resolved_entries.append((playlist_index, entry))
1971 if not entry:
1972 continue
1973
1974 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1975 if not lazy and 'playlist-index' in self.params['compat_opts']:
1976 playlist_index = ie_result['requested_entries'][i]
1977
1978 entry_copy = collections.ChainMap(entry, {
1979 **common_info,
1980 'n_entries': int_or_none(n_entries),
1981 'playlist_index': playlist_index,
1982 'playlist_autonumber': i + 1,
1983 })
1984
1985 if self._match_entry(entry_copy, incomplete=True) is not None:
1986 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1987 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1988 continue
1989
1990 self.to_screen('[download] Downloading item %s of %s' % (
1991 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1992
1993 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1994 'playlist_index': playlist_index,
1995 'playlist_autonumber': i + 1,
1996 }, extra))
1997 if not entry_result:
1998 failures += 1
1999 if failures >= max_failures:
2000 self.report_error(
2001 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2002 break
2003 if keep_resolved_entries:
2004 resolved_entries[i] = (playlist_index, entry_result)
2005
2006 # Update with processed data
2007 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2008 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2009 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2010 # Do not set for full playlist
2011 ie_result.pop('requested_entries')
2012
2013 # Write the updated info to json
2014 if _infojson_written is True and self._write_info_json(
2015 'updated playlist', ie_result,
2016 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2017 return
2018
2019 ie_result = self.run_all_pps('playlist', ie_result)
2020 self.to_screen(f'[download] Finished downloading playlist: {title}')
2021 return ie_result
2022
2023 @_handle_extraction_exceptions
2024 def __process_iterable_entry(self, entry, download, extra_info):
2025 return self.process_ie_result(
2026 entry, download=download, extra_info=extra_info)
2027
2028 def _build_format_filter(self, filter_spec):
2029 " Returns a function to filter the formats according to the filter_spec "
2030
2031 OPERATORS = {
2032 '<': operator.lt,
2033 '<=': operator.le,
2034 '>': operator.gt,
2035 '>=': operator.ge,
2036 '=': operator.eq,
2037 '!=': operator.ne,
2038 }
2039 operator_rex = re.compile(r'''(?x)\s*
2040 (?P<key>[\w.-]+)\s*
2041 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2042 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2043 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2044 m = operator_rex.fullmatch(filter_spec)
2045 if m:
2046 try:
2047 comparison_value = int(m.group('value'))
2048 except ValueError:
2049 comparison_value = parse_filesize(m.group('value'))
2050 if comparison_value is None:
2051 comparison_value = parse_filesize(m.group('value') + 'B')
2052 if comparison_value is None:
2053 raise ValueError(
2054 'Invalid value %r in format specification %r' % (
2055 m.group('value'), filter_spec))
2056 op = OPERATORS[m.group('op')]
2057
2058 if not m:
2059 STR_OPERATORS = {
2060 '=': operator.eq,
2061 '^=': lambda attr, value: attr.startswith(value),
2062 '$=': lambda attr, value: attr.endswith(value),
2063 '*=': lambda attr, value: value in attr,
2064 '~=': lambda attr, value: value.search(attr) is not None
2065 }
2066 str_operator_rex = re.compile(r'''(?x)\s*
2067 (?P<key>[a-zA-Z0-9._-]+)\s*
2068 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2069 (?P<quote>["'])?
2070 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2071 (?(quote)(?P=quote))\s*
2072 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2073 m = str_operator_rex.fullmatch(filter_spec)
2074 if m:
2075 if m.group('op') == '~=':
2076 comparison_value = re.compile(m.group('value'))
2077 else:
2078 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2079 str_op = STR_OPERATORS[m.group('op')]
2080 if m.group('negation'):
2081 op = lambda attr, value: not str_op(attr, value)
2082 else:
2083 op = str_op
2084
2085 if not m:
2086 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2087
2088 def _filter(f):
2089 actual_value = f.get(m.group('key'))
2090 if actual_value is None:
2091 return m.group('none_inclusive')
2092 return op(actual_value, comparison_value)
2093 return _filter
2094
2095 def _check_formats(self, formats):
2096 for f in formats:
2097 self.to_screen('[info] Testing format %s' % f['format_id'])
2098 path = self.get_output_path('temp')
2099 if not self._ensure_dir_exists(f'{path}/'):
2100 continue
2101 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2102 temp_file.close()
2103 try:
2104 success, _ = self.dl(temp_file.name, f, test=True)
2105 except (DownloadError, OSError, ValueError) + network_exceptions:
2106 success = False
2107 finally:
2108 if os.path.exists(temp_file.name):
2109 try:
2110 os.remove(temp_file.name)
2111 except OSError:
2112 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2113 if success:
2114 yield f
2115 else:
2116 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2117
2118 def _default_format_spec(self, info_dict, download=True):
2119
2120 def can_merge():
2121 merger = FFmpegMergerPP(self)
2122 return merger.available and merger.can_merge()
2123
2124 prefer_best = (
2125 not self.params.get('simulate')
2126 and download
2127 and (
2128 not can_merge()
2129 or info_dict.get('is_live') and not self.params.get('live_from_start')
2130 or self.params['outtmpl']['default'] == '-'))
2131 compat = (
2132 prefer_best
2133 or self.params.get('allow_multiple_audio_streams', False)
2134 or 'format-spec' in self.params['compat_opts'])
2135
2136 return (
2137 'best/bestvideo+bestaudio' if prefer_best
2138 else 'bestvideo*+bestaudio/best' if not compat
2139 else 'bestvideo+bestaudio/best')
2140
2141 def build_format_selector(self, format_spec):
2142 def syntax_error(note, start):
2143 message = (
2144 'Invalid format specification: '
2145 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2146 return SyntaxError(message)
2147
2148 PICKFIRST = 'PICKFIRST'
2149 MERGE = 'MERGE'
2150 SINGLE = 'SINGLE'
2151 GROUP = 'GROUP'
2152 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2153
2154 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2155 'video': self.params.get('allow_multiple_video_streams', False)}
2156
2157 def _parse_filter(tokens):
2158 filter_parts = []
2159 for type, string_, start, _, _ in tokens:
2160 if type == tokenize.OP and string_ == ']':
2161 return ''.join(filter_parts)
2162 else:
2163 filter_parts.append(string_)
2164
2165 def _remove_unused_ops(tokens):
2166 # Remove operators that we don't use and join them with the surrounding strings.
2167 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2168 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2169 last_string, last_start, last_end, last_line = None, None, None, None
2170 for type, string_, start, end, line in tokens:
2171 if type == tokenize.OP and string_ == '[':
2172 if last_string:
2173 yield tokenize.NAME, last_string, last_start, last_end, last_line
2174 last_string = None
2175 yield type, string_, start, end, line
2176 # everything inside brackets will be handled by _parse_filter
2177 for type, string_, start, end, line in tokens:
2178 yield type, string_, start, end, line
2179 if type == tokenize.OP and string_ == ']':
2180 break
2181 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2182 if last_string:
2183 yield tokenize.NAME, last_string, last_start, last_end, last_line
2184 last_string = None
2185 yield type, string_, start, end, line
2186 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2187 if not last_string:
2188 last_string = string_
2189 last_start = start
2190 last_end = end
2191 else:
2192 last_string += string_
2193 if last_string:
2194 yield tokenize.NAME, last_string, last_start, last_end, last_line
2195
2196 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2197 selectors = []
2198 current_selector = None
2199 for type, string_, start, _, _ in tokens:
2200 # ENCODING is only defined in python 3.x
2201 if type == getattr(tokenize, 'ENCODING', None):
2202 continue
2203 elif type in [tokenize.NAME, tokenize.NUMBER]:
2204 current_selector = FormatSelector(SINGLE, string_, [])
2205 elif type == tokenize.OP:
2206 if string_ == ')':
2207 if not inside_group:
2208 # ')' will be handled by the parentheses group
2209 tokens.restore_last_token()
2210 break
2211 elif inside_merge and string_ in ['/', ',']:
2212 tokens.restore_last_token()
2213 break
2214 elif inside_choice and string_ == ',':
2215 tokens.restore_last_token()
2216 break
2217 elif string_ == ',':
2218 if not current_selector:
2219 raise syntax_error('"," must follow a format selector', start)
2220 selectors.append(current_selector)
2221 current_selector = None
2222 elif string_ == '/':
2223 if not current_selector:
2224 raise syntax_error('"/" must follow a format selector', start)
2225 first_choice = current_selector
2226 second_choice = _parse_format_selection(tokens, inside_choice=True)
2227 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2228 elif string_ == '[':
2229 if not current_selector:
2230 current_selector = FormatSelector(SINGLE, 'best', [])
2231 format_filter = _parse_filter(tokens)
2232 current_selector.filters.append(format_filter)
2233 elif string_ == '(':
2234 if current_selector:
2235 raise syntax_error('Unexpected "("', start)
2236 group = _parse_format_selection(tokens, inside_group=True)
2237 current_selector = FormatSelector(GROUP, group, [])
2238 elif string_ == '+':
2239 if not current_selector:
2240 raise syntax_error('Unexpected "+"', start)
2241 selector_1 = current_selector
2242 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2243 if not selector_2:
2244 raise syntax_error('Expected a selector', start)
2245 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2246 else:
2247 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2248 elif type == tokenize.ENDMARKER:
2249 break
2250 if current_selector:
2251 selectors.append(current_selector)
2252 return selectors
2253
2254 def _merge(formats_pair):
2255 format_1, format_2 = formats_pair
2256
2257 formats_info = []
2258 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2259 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2260
2261 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2262 get_no_more = {'video': False, 'audio': False}
2263 for (i, fmt_info) in enumerate(formats_info):
2264 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2265 formats_info.pop(i)
2266 continue
2267 for aud_vid in ['audio', 'video']:
2268 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2269 if get_no_more[aud_vid]:
2270 formats_info.pop(i)
2271 break
2272 get_no_more[aud_vid] = True
2273
2274 if len(formats_info) == 1:
2275 return formats_info[0]
2276
2277 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2278 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2279
2280 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2281 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2282
2283 output_ext = get_compatible_ext(
2284 vcodecs=[f.get('vcodec') for f in video_fmts],
2285 acodecs=[f.get('acodec') for f in audio_fmts],
2286 vexts=[f['ext'] for f in video_fmts],
2287 aexts=[f['ext'] for f in audio_fmts],
2288 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2289 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2290
2291 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2292
2293 new_dict = {
2294 'requested_formats': formats_info,
2295 'format': '+'.join(filtered('format')),
2296 'format_id': '+'.join(filtered('format_id')),
2297 'ext': output_ext,
2298 'protocol': '+'.join(map(determine_protocol, formats_info)),
2299 'language': '+'.join(orderedSet(filtered('language'))) or None,
2300 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2301 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2302 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2303 }
2304
2305 if the_only_video:
2306 new_dict.update({
2307 'width': the_only_video.get('width'),
2308 'height': the_only_video.get('height'),
2309 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2310 'fps': the_only_video.get('fps'),
2311 'dynamic_range': the_only_video.get('dynamic_range'),
2312 'vcodec': the_only_video.get('vcodec'),
2313 'vbr': the_only_video.get('vbr'),
2314 'stretched_ratio': the_only_video.get('stretched_ratio'),
2315 'aspect_ratio': the_only_video.get('aspect_ratio'),
2316 })
2317
2318 if the_only_audio:
2319 new_dict.update({
2320 'acodec': the_only_audio.get('acodec'),
2321 'abr': the_only_audio.get('abr'),
2322 'asr': the_only_audio.get('asr'),
2323 'audio_channels': the_only_audio.get('audio_channels')
2324 })
2325
2326 return new_dict
2327
2328 def _check_formats(formats):
2329 if (self.params.get('check_formats') is not None
2330 or self.params.get('allow_unplayable_formats')):
2331 yield from formats
2332 return
2333 elif self.params.get('check_formats') == 'selected':
2334 yield from self._check_formats(formats)
2335 return
2336
2337 for f in formats:
2338 if f.get('has_drm'):
2339 yield from self._check_formats([f])
2340 else:
2341 yield f
2342
2343 def _build_selector_function(selector):
2344 if isinstance(selector, list): # ,
2345 fs = [_build_selector_function(s) for s in selector]
2346
2347 def selector_function(ctx):
2348 for f in fs:
2349 yield from f(ctx)
2350 return selector_function
2351
2352 elif selector.type == GROUP: # ()
2353 selector_function = _build_selector_function(selector.selector)
2354
2355 elif selector.type == PICKFIRST: # /
2356 fs = [_build_selector_function(s) for s in selector.selector]
2357
2358 def selector_function(ctx):
2359 for f in fs:
2360 picked_formats = list(f(ctx))
2361 if picked_formats:
2362 return picked_formats
2363 return []
2364
2365 elif selector.type == MERGE: # +
2366 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2367
2368 def selector_function(ctx):
2369 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2370 yield _merge(pair)
2371
2372 elif selector.type == SINGLE: # atom
2373 format_spec = selector.selector or 'best'
2374
2375 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2376 if format_spec == 'all':
2377 def selector_function(ctx):
2378 yield from _check_formats(ctx['formats'][::-1])
2379 elif format_spec == 'mergeall':
2380 def selector_function(ctx):
2381 formats = list(_check_formats(
2382 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2383 if not formats:
2384 return
2385 merged_format = formats[-1]
2386 for f in formats[-2::-1]:
2387 merged_format = _merge((merged_format, f))
2388 yield merged_format
2389
2390 else:
2391 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2392 mobj = re.match(
2393 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2394 format_spec)
2395 if mobj is not None:
2396 format_idx = int_or_none(mobj.group('n'), default=1)
2397 format_reverse = mobj.group('bw')[0] == 'b'
2398 format_type = (mobj.group('type') or [None])[0]
2399 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2400 format_modified = mobj.group('mod') is not None
2401
2402 format_fallback = not format_type and not format_modified # for b, w
2403 _filter_f = (
2404 (lambda f: f.get('%scodec' % format_type) != 'none')
2405 if format_type and format_modified # bv*, ba*, wv*, wa*
2406 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2407 if format_type # bv, ba, wv, wa
2408 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2409 if not format_modified # b, w
2410 else lambda f: True) # b*, w*
2411 filter_f = lambda f: _filter_f(f) and (
2412 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2413 else:
2414 if format_spec in self._format_selection_exts['audio']:
2415 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2416 elif format_spec in self._format_selection_exts['video']:
2417 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2418 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2419 elif format_spec in self._format_selection_exts['storyboards']:
2420 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2421 else:
2422 filter_f = lambda f: f.get('format_id') == format_spec # id
2423
2424 def selector_function(ctx):
2425 formats = list(ctx['formats'])
2426 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2427 if not matches:
2428 if format_fallback and ctx['incomplete_formats']:
2429 # for extractors with incomplete formats (audio only (soundcloud)
2430 # or video only (imgur)) best/worst will fallback to
2431 # best/worst {video,audio}-only format
2432 matches = formats
2433 elif seperate_fallback and not ctx['has_merged_format']:
2434 # for compatibility with youtube-dl when there is no pre-merged format
2435 matches = list(filter(seperate_fallback, formats))
2436 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2437 try:
2438 yield matches[format_idx - 1]
2439 except LazyList.IndexError:
2440 return
2441
2442 filters = [self._build_format_filter(f) for f in selector.filters]
2443
2444 def final_selector(ctx):
2445 ctx_copy = dict(ctx)
2446 for _filter in filters:
2447 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2448 return selector_function(ctx_copy)
2449 return final_selector
2450
2451 stream = io.BytesIO(format_spec.encode())
2452 try:
2453 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2454 except tokenize.TokenError:
2455 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2456
2457 class TokenIterator:
2458 def __init__(self, tokens):
2459 self.tokens = tokens
2460 self.counter = 0
2461
2462 def __iter__(self):
2463 return self
2464
2465 def __next__(self):
2466 if self.counter >= len(self.tokens):
2467 raise StopIteration()
2468 value = self.tokens[self.counter]
2469 self.counter += 1
2470 return value
2471
2472 next = __next__
2473
2474 def restore_last_token(self):
2475 self.counter -= 1
2476
2477 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2478 return _build_selector_function(parsed_selector)
2479
2480 def _calc_headers(self, info_dict):
2481 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2482 clean_headers(res)
2483 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2484 if cookies:
2485 encoder = LenientSimpleCookie()
2486 values = []
2487 for cookie in cookies:
2488 _, value = encoder.value_encode(cookie.value)
2489 values.append(f'{cookie.name}={value}')
2490 if cookie.domain:
2491 values.append(f'Domain={cookie.domain}')
2492 if cookie.path:
2493 values.append(f'Path={cookie.path}')
2494 if cookie.secure:
2495 values.append('Secure')
2496 if cookie.expires:
2497 values.append(f'Expires={cookie.expires}')
2498 if cookie.version:
2499 values.append(f'Version={cookie.version}')
2500 info_dict['cookies'] = '; '.join(values)
2501
2502 if 'X-Forwarded-For' not in res:
2503 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2504 if x_forwarded_for_ip:
2505 res['X-Forwarded-For'] = x_forwarded_for_ip
2506
2507 return res
2508
2509 def _calc_cookies(self, url):
2510 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2511 return self.cookiejar.get_cookie_header(url)
2512
2513 def _sort_thumbnails(self, thumbnails):
2514 thumbnails.sort(key=lambda t: (
2515 t.get('preference') if t.get('preference') is not None else -1,
2516 t.get('width') if t.get('width') is not None else -1,
2517 t.get('height') if t.get('height') is not None else -1,
2518 t.get('id') if t.get('id') is not None else '',
2519 t.get('url')))
2520
2521 def _sanitize_thumbnails(self, info_dict):
2522 thumbnails = info_dict.get('thumbnails')
2523 if thumbnails is None:
2524 thumbnail = info_dict.get('thumbnail')
2525 if thumbnail:
2526 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2527 if not thumbnails:
2528 return
2529
2530 def check_thumbnails(thumbnails):
2531 for t in thumbnails:
2532 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2533 try:
2534 self.urlopen(HEADRequest(t['url']))
2535 except network_exceptions as err:
2536 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2537 continue
2538 yield t
2539
2540 self._sort_thumbnails(thumbnails)
2541 for i, t in enumerate(thumbnails):
2542 if t.get('id') is None:
2543 t['id'] = '%d' % i
2544 if t.get('width') and t.get('height'):
2545 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2546 t['url'] = sanitize_url(t['url'])
2547
2548 if self.params.get('check_formats') is True:
2549 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2550 else:
2551 info_dict['thumbnails'] = thumbnails
2552
2553 def _fill_common_fields(self, info_dict, final=True):
2554 # TODO: move sanitization here
2555 if final:
2556 title = info_dict['fulltitle'] = info_dict.get('title')
2557 if not title:
2558 if title == '':
2559 self.write_debug('Extractor gave empty title. Creating a generic title')
2560 else:
2561 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2562 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2563
2564 if info_dict.get('duration') is not None:
2565 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2566
2567 for ts_key, date_key in (
2568 ('timestamp', 'upload_date'),
2569 ('release_timestamp', 'release_date'),
2570 ('modified_timestamp', 'modified_date'),
2571 ):
2572 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2573 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2574 # see http://bugs.python.org/issue1646728)
2575 with contextlib.suppress(ValueError, OverflowError, OSError):
2576 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2577 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2578
2579 live_keys = ('is_live', 'was_live')
2580 live_status = info_dict.get('live_status')
2581 if live_status is None:
2582 for key in live_keys:
2583 if info_dict.get(key) is False:
2584 continue
2585 if info_dict.get(key):
2586 live_status = key
2587 break
2588 if all(info_dict.get(key) is False for key in live_keys):
2589 live_status = 'not_live'
2590 if live_status:
2591 info_dict['live_status'] = live_status
2592 for key in live_keys:
2593 if info_dict.get(key) is None:
2594 info_dict[key] = (live_status == key)
2595 if live_status == 'post_live':
2596 info_dict['was_live'] = True
2597
2598 # Auto generate title fields corresponding to the *_number fields when missing
2599 # in order to always have clean titles. This is very common for TV series.
2600 for field in ('chapter', 'season', 'episode'):
2601 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2602 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2603
2604 def _raise_pending_errors(self, info):
2605 err = info.pop('__pending_error', None)
2606 if err:
2607 self.report_error(err, tb=False)
2608
2609 def sort_formats(self, info_dict):
2610 formats = self._get_formats(info_dict)
2611 formats.sort(key=FormatSorter(
2612 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2613
2614 def process_video_result(self, info_dict, download=True):
2615 assert info_dict.get('_type', 'video') == 'video'
2616 self._num_videos += 1
2617
2618 if 'id' not in info_dict:
2619 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2620 elif not info_dict.get('id'):
2621 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2622
2623 def report_force_conversion(field, field_not, conversion):
2624 self.report_warning(
2625 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2626 % (field, field_not, conversion))
2627
2628 def sanitize_string_field(info, string_field):
2629 field = info.get(string_field)
2630 if field is None or isinstance(field, str):
2631 return
2632 report_force_conversion(string_field, 'a string', 'string')
2633 info[string_field] = str(field)
2634
2635 def sanitize_numeric_fields(info):
2636 for numeric_field in self._NUMERIC_FIELDS:
2637 field = info.get(numeric_field)
2638 if field is None or isinstance(field, (int, float)):
2639 continue
2640 report_force_conversion(numeric_field, 'numeric', 'int')
2641 info[numeric_field] = int_or_none(field)
2642
2643 sanitize_string_field(info_dict, 'id')
2644 sanitize_numeric_fields(info_dict)
2645 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2646 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2647 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2648 self.report_warning('"duration" field is negative, there is an error in extractor')
2649
2650 chapters = info_dict.get('chapters') or []
2651 if chapters and chapters[0].get('start_time'):
2652 chapters.insert(0, {'start_time': 0})
2653
2654 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2655 for idx, (prev, current, next_) in enumerate(zip(
2656 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2657 if current.get('start_time') is None:
2658 current['start_time'] = prev.get('end_time')
2659 if not current.get('end_time'):
2660 current['end_time'] = next_.get('start_time')
2661 if not current.get('title'):
2662 current['title'] = f'<Untitled Chapter {idx}>'
2663
2664 if 'playlist' not in info_dict:
2665 # It isn't part of a playlist
2666 info_dict['playlist'] = None
2667 info_dict['playlist_index'] = None
2668
2669 self._sanitize_thumbnails(info_dict)
2670
2671 thumbnail = info_dict.get('thumbnail')
2672 thumbnails = info_dict.get('thumbnails')
2673 if thumbnail:
2674 info_dict['thumbnail'] = sanitize_url(thumbnail)
2675 elif thumbnails:
2676 info_dict['thumbnail'] = thumbnails[-1]['url']
2677
2678 if info_dict.get('display_id') is None and 'id' in info_dict:
2679 info_dict['display_id'] = info_dict['id']
2680
2681 self._fill_common_fields(info_dict)
2682
2683 for cc_kind in ('subtitles', 'automatic_captions'):
2684 cc = info_dict.get(cc_kind)
2685 if cc:
2686 for _, subtitle in cc.items():
2687 for subtitle_format in subtitle:
2688 if subtitle_format.get('url'):
2689 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2690 if subtitle_format.get('ext') is None:
2691 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2692
2693 automatic_captions = info_dict.get('automatic_captions')
2694 subtitles = info_dict.get('subtitles')
2695
2696 info_dict['requested_subtitles'] = self.process_subtitles(
2697 info_dict['id'], subtitles, automatic_captions)
2698
2699 formats = self._get_formats(info_dict)
2700
2701 # Backward compatibility with InfoExtractor._sort_formats
2702 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2703 if field_preference:
2704 info_dict['_format_sort_fields'] = field_preference
2705
2706 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2707 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2708 if not self.params.get('allow_unplayable_formats'):
2709 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2710
2711 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2712 self.report_warning(
2713 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2714 'only images are available for download. Use --list-formats to see them'.capitalize())
2715
2716 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2717 if not get_from_start:
2718 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2719 if info_dict.get('is_live') and formats:
2720 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2721 if get_from_start and not formats:
2722 self.raise_no_formats(info_dict, msg=(
2723 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2724 'If you want to download from the current time, use --no-live-from-start'))
2725
2726 def is_wellformed(f):
2727 url = f.get('url')
2728 if not url:
2729 self.report_warning(
2730 '"url" field is missing or empty - skipping format, '
2731 'there is an error in extractor')
2732 return False
2733 if isinstance(url, bytes):
2734 sanitize_string_field(f, 'url')
2735 return True
2736
2737 # Filter out malformed formats for better extraction robustness
2738 formats = list(filter(is_wellformed, formats or []))
2739
2740 if not formats:
2741 self.raise_no_formats(info_dict)
2742
2743 for format in formats:
2744 sanitize_string_field(format, 'format_id')
2745 sanitize_numeric_fields(format)
2746 format['url'] = sanitize_url(format['url'])
2747 if format.get('ext') is None:
2748 format['ext'] = determine_ext(format['url']).lower()
2749 if format.get('protocol') is None:
2750 format['protocol'] = determine_protocol(format)
2751 if format.get('resolution') is None:
2752 format['resolution'] = self.format_resolution(format, default=None)
2753 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2754 format['dynamic_range'] = 'SDR'
2755 if format.get('aspect_ratio') is None:
2756 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2757 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2758 and info_dict.get('duration') and format.get('tbr')
2759 and not format.get('filesize') and not format.get('filesize_approx')):
2760 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2761 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2762
2763 # This is copied to http_headers by the above _calc_headers and can now be removed
2764 if '__x_forwarded_for_ip' in info_dict:
2765 del info_dict['__x_forwarded_for_ip']
2766
2767 self.sort_formats({
2768 'formats': formats,
2769 '_format_sort_fields': info_dict.get('_format_sort_fields')
2770 })
2771
2772 # Sanitize and group by format_id
2773 formats_dict = {}
2774 for i, format in enumerate(formats):
2775 if not format.get('format_id'):
2776 format['format_id'] = str(i)
2777 else:
2778 # Sanitize format_id from characters used in format selector expression
2779 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2780 formats_dict.setdefault(format['format_id'], []).append(format)
2781
2782 # Make sure all formats have unique format_id
2783 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2784 for format_id, ambiguous_formats in formats_dict.items():
2785 ambigious_id = len(ambiguous_formats) > 1
2786 for i, format in enumerate(ambiguous_formats):
2787 if ambigious_id:
2788 format['format_id'] = '%s-%d' % (format_id, i)
2789 # Ensure there is no conflict between id and ext in format selection
2790 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2791 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2792 format['format_id'] = 'f%s' % format['format_id']
2793
2794 if format.get('format') is None:
2795 format['format'] = '{id} - {res}{note}'.format(
2796 id=format['format_id'],
2797 res=self.format_resolution(format),
2798 note=format_field(format, 'format_note', ' (%s)'),
2799 )
2800
2801 if self.params.get('check_formats') is True:
2802 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2803
2804 if not formats or formats[0] is not info_dict:
2805 # only set the 'formats' fields if the original info_dict list them
2806 # otherwise we end up with a circular reference, the first (and unique)
2807 # element in the 'formats' field in info_dict is info_dict itself,
2808 # which can't be exported to json
2809 info_dict['formats'] = formats
2810
2811 info_dict, _ = self.pre_process(info_dict)
2812
2813 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2814 return info_dict
2815
2816 self.post_extract(info_dict)
2817 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2818
2819 # The pre-processors may have modified the formats
2820 formats = self._get_formats(info_dict)
2821
2822 list_only = self.params.get('simulate') == 'list_only'
2823 interactive_format_selection = not list_only and self.format_selector == '-'
2824 if self.params.get('list_thumbnails'):
2825 self.list_thumbnails(info_dict)
2826 if self.params.get('listsubtitles'):
2827 if 'automatic_captions' in info_dict:
2828 self.list_subtitles(
2829 info_dict['id'], automatic_captions, 'automatic captions')
2830 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2831 if self.params.get('listformats') or interactive_format_selection:
2832 self.list_formats(info_dict)
2833 if list_only:
2834 # Without this printing, -F --print-json will not work
2835 self.__forced_printings(info_dict)
2836 return info_dict
2837
2838 format_selector = self.format_selector
2839 while True:
2840 if interactive_format_selection:
2841 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2842 + '(Press ENTER for default, or Ctrl+C to quit)'
2843 + self._format_screen(': ', self.Styles.EMPHASIS))
2844 try:
2845 format_selector = self.build_format_selector(req_format) if req_format else None
2846 except SyntaxError as err:
2847 self.report_error(err, tb=False, is_error=False)
2848 continue
2849
2850 if format_selector is None:
2851 req_format = self._default_format_spec(info_dict, download=download)
2852 self.write_debug(f'Default format spec: {req_format}')
2853 format_selector = self.build_format_selector(req_format)
2854
2855 formats_to_download = list(format_selector({
2856 'formats': formats,
2857 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2858 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2859 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2860 }))
2861 if interactive_format_selection and not formats_to_download:
2862 self.report_error('Requested format is not available', tb=False, is_error=False)
2863 continue
2864 break
2865
2866 if not formats_to_download:
2867 if not self.params.get('ignore_no_formats_error'):
2868 raise ExtractorError(
2869 'Requested format is not available. Use --list-formats for a list of available formats',
2870 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2871 self.report_warning('Requested format is not available')
2872 # Process what we can, even without any available formats.
2873 formats_to_download = [{}]
2874
2875 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2876 best_format, downloaded_formats = formats_to_download[-1], []
2877 if download:
2878 if best_format and requested_ranges:
2879 def to_screen(*msg):
2880 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2881
2882 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2883 (f['format_id'] for f in formats_to_download))
2884 if requested_ranges != ({}, ):
2885 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2886 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2887 max_downloads_reached = False
2888
2889 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2890 new_info = self._copy_infodict(info_dict)
2891 new_info.update(fmt)
2892 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2893 end_time = offset + min(chapter.get('end_time', duration), duration)
2894 # duration may not be accurate. So allow deviations <1sec
2895 if end_time == float('inf') or end_time > offset + duration + 1:
2896 end_time = None
2897 if chapter or offset:
2898 new_info.update({
2899 'section_start': offset + chapter.get('start_time', 0),
2900 'section_end': end_time,
2901 'section_title': chapter.get('title'),
2902 'section_number': chapter.get('index'),
2903 })
2904 downloaded_formats.append(new_info)
2905 try:
2906 self.process_info(new_info)
2907 except MaxDownloadsReached:
2908 max_downloads_reached = True
2909 self._raise_pending_errors(new_info)
2910 # Remove copied info
2911 for key, val in tuple(new_info.items()):
2912 if info_dict.get(key) == val:
2913 new_info.pop(key)
2914 if max_downloads_reached:
2915 break
2916
2917 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2918 assert write_archive.issubset({True, False, 'ignore'})
2919 if True in write_archive and False not in write_archive:
2920 self.record_download_archive(info_dict)
2921
2922 info_dict['requested_downloads'] = downloaded_formats
2923 info_dict = self.run_all_pps('after_video', info_dict)
2924 if max_downloads_reached:
2925 raise MaxDownloadsReached()
2926
2927 # We update the info dict with the selected best quality format (backwards compatibility)
2928 info_dict.update(best_format)
2929 return info_dict
2930
2931 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2932 """Select the requested subtitles and their format"""
2933 available_subs, normal_sub_langs = {}, []
2934 if normal_subtitles and self.params.get('writesubtitles'):
2935 available_subs.update(normal_subtitles)
2936 normal_sub_langs = tuple(normal_subtitles.keys())
2937 if automatic_captions and self.params.get('writeautomaticsub'):
2938 for lang, cap_info in automatic_captions.items():
2939 if lang not in available_subs:
2940 available_subs[lang] = cap_info
2941
2942 if not available_subs or (
2943 not self.params.get('writesubtitles')
2944 and not self.params.get('writeautomaticsub')):
2945 return None
2946
2947 all_sub_langs = tuple(available_subs.keys())
2948 if self.params.get('allsubtitles', False):
2949 requested_langs = all_sub_langs
2950 elif self.params.get('subtitleslangs', False):
2951 try:
2952 requested_langs = orderedSet_from_options(
2953 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2954 except re.error as e:
2955 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2956 else:
2957 requested_langs = LazyList(itertools.chain(
2958 ['en'] if 'en' in normal_sub_langs else [],
2959 filter(lambda f: f.startswith('en'), normal_sub_langs),
2960 ['en'] if 'en' in all_sub_langs else [],
2961 filter(lambda f: f.startswith('en'), all_sub_langs),
2962 normal_sub_langs, all_sub_langs,
2963 ))[:1]
2964 if requested_langs:
2965 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2966
2967 formats_query = self.params.get('subtitlesformat', 'best')
2968 formats_preference = formats_query.split('/') if formats_query else []
2969 subs = {}
2970 for lang in requested_langs:
2971 formats = available_subs.get(lang)
2972 if formats is None:
2973 self.report_warning(f'{lang} subtitles not available for {video_id}')
2974 continue
2975 for ext in formats_preference:
2976 if ext == 'best':
2977 f = formats[-1]
2978 break
2979 matches = list(filter(lambda f: f['ext'] == ext, formats))
2980 if matches:
2981 f = matches[-1]
2982 break
2983 else:
2984 f = formats[-1]
2985 self.report_warning(
2986 'No subtitle format found matching "%s" for language %s, '
2987 'using %s' % (formats_query, lang, f['ext']))
2988 subs[lang] = f
2989 return subs
2990
2991 def _forceprint(self, key, info_dict):
2992 if info_dict is None:
2993 return
2994 info_copy = info_dict.copy()
2995 info_copy.setdefault('filename', self.prepare_filename(info_dict))
2996 if info_dict.get('requested_formats') is not None:
2997 # For RTMP URLs, also include the playpath
2998 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2999 elif info_dict.get('url'):
3000 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3001 info_copy['formats_table'] = self.render_formats_table(info_dict)
3002 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3003 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3004 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3005
3006 def format_tmpl(tmpl):
3007 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3008 if not mobj:
3009 return tmpl
3010
3011 fmt = '%({})s'
3012 if tmpl.startswith('{'):
3013 tmpl, fmt = f'.{tmpl}', '%({})j'
3014 if tmpl.endswith('='):
3015 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3016 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3017
3018 for tmpl in self.params['forceprint'].get(key, []):
3019 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3020
3021 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3022 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3023 tmpl = format_tmpl(tmpl)
3024 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3025 if self._ensure_dir_exists(filename):
3026 with open(filename, 'a', encoding='utf-8', newline='') as f:
3027 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3028
3029 return info_copy
3030
3031 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3032 if (self.params.get('forcejson')
3033 or self.params['forceprint'].get('video')
3034 or self.params['print_to_file'].get('video')):
3035 self.post_extract(info_dict)
3036 if filename:
3037 info_dict['filename'] = filename
3038 info_copy = self._forceprint('video', info_dict)
3039
3040 def print_field(field, actual_field=None, optional=False):
3041 if actual_field is None:
3042 actual_field = field
3043 if self.params.get(f'force{field}') and (
3044 info_copy.get(field) is not None or (not optional and not incomplete)):
3045 self.to_stdout(info_copy[actual_field])
3046
3047 print_field('title')
3048 print_field('id')
3049 print_field('url', 'urls')
3050 print_field('thumbnail', optional=True)
3051 print_field('description', optional=True)
3052 print_field('filename')
3053 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3054 self.to_stdout(formatSeconds(info_copy['duration']))
3055 print_field('format')
3056
3057 if self.params.get('forcejson'):
3058 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3059
3060 def dl(self, name, info, subtitle=False, test=False):
3061 if not info.get('url'):
3062 self.raise_no_formats(info, True)
3063
3064 if test:
3065 verbose = self.params.get('verbose')
3066 params = {
3067 'test': True,
3068 'quiet': self.params.get('quiet') or not verbose,
3069 'verbose': verbose,
3070 'noprogress': not verbose,
3071 'nopart': True,
3072 'skip_unavailable_fragments': False,
3073 'keep_fragments': False,
3074 'overwrites': True,
3075 '_no_ytdl_file': True,
3076 }
3077 else:
3078 params = self.params
3079 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3080 if not test:
3081 for ph in self._progress_hooks:
3082 fd.add_progress_hook(ph)
3083 urls = '", "'.join(
3084 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3085 for f in info.get('requested_formats', []) or [info])
3086 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3087
3088 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3089 # But it may contain objects that are not deep-copyable
3090 new_info = self._copy_infodict(info)
3091 if new_info.get('http_headers') is None:
3092 new_info['http_headers'] = self._calc_headers(new_info)
3093 return fd.download(name, new_info, subtitle)
3094
3095 def existing_file(self, filepaths, *, default_overwrite=True):
3096 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3097 if existing_files and not self.params.get('overwrites', default_overwrite):
3098 return existing_files[0]
3099
3100 for file in existing_files:
3101 self.report_file_delete(file)
3102 os.remove(file)
3103 return None
3104
3105 def process_info(self, info_dict):
3106 """Process a single resolved IE result. (Modifies it in-place)"""
3107
3108 assert info_dict.get('_type', 'video') == 'video'
3109 original_infodict = info_dict
3110
3111 if 'format' not in info_dict and 'ext' in info_dict:
3112 info_dict['format'] = info_dict['ext']
3113
3114 if self._match_entry(info_dict) is not None:
3115 info_dict['__write_download_archive'] = 'ignore'
3116 return
3117
3118 # Does nothing under normal operation - for backward compatibility of process_info
3119 self.post_extract(info_dict)
3120
3121 def replace_info_dict(new_info):
3122 nonlocal info_dict
3123 if new_info == info_dict:
3124 return
3125 info_dict.clear()
3126 info_dict.update(new_info)
3127
3128 new_info, _ = self.pre_process(info_dict, 'video')
3129 replace_info_dict(new_info)
3130 self._num_downloads += 1
3131
3132 # info_dict['_filename'] needs to be set for backward compatibility
3133 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3134 temp_filename = self.prepare_filename(info_dict, 'temp')
3135 files_to_move = {}
3136
3137 # Forced printings
3138 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3139
3140 def check_max_downloads():
3141 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3142 raise MaxDownloadsReached()
3143
3144 if self.params.get('simulate'):
3145 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3146 check_max_downloads()
3147 return
3148
3149 if full_filename is None:
3150 return
3151 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3152 return
3153 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3154 return
3155
3156 if self._write_description('video', info_dict,
3157 self.prepare_filename(info_dict, 'description')) is None:
3158 return
3159
3160 sub_files = self._write_subtitles(info_dict, temp_filename)
3161 if sub_files is None:
3162 return
3163 files_to_move.update(dict(sub_files))
3164
3165 thumb_files = self._write_thumbnails(
3166 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3167 if thumb_files is None:
3168 return
3169 files_to_move.update(dict(thumb_files))
3170
3171 infofn = self.prepare_filename(info_dict, 'infojson')
3172 _infojson_written = self._write_info_json('video', info_dict, infofn)
3173 if _infojson_written:
3174 info_dict['infojson_filename'] = infofn
3175 # For backward compatibility, even though it was a private field
3176 info_dict['__infojson_filename'] = infofn
3177 elif _infojson_written is None:
3178 return
3179
3180 # Note: Annotations are deprecated
3181 annofn = None
3182 if self.params.get('writeannotations', False):
3183 annofn = self.prepare_filename(info_dict, 'annotation')
3184 if annofn:
3185 if not self._ensure_dir_exists(encodeFilename(annofn)):
3186 return
3187 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3188 self.to_screen('[info] Video annotations are already present')
3189 elif not info_dict.get('annotations'):
3190 self.report_warning('There are no annotations to write.')
3191 else:
3192 try:
3193 self.to_screen('[info] Writing video annotations to: ' + annofn)
3194 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3195 annofile.write(info_dict['annotations'])
3196 except (KeyError, TypeError):
3197 self.report_warning('There are no annotations to write.')
3198 except OSError:
3199 self.report_error('Cannot write annotations file: ' + annofn)
3200 return
3201
3202 # Write internet shortcut files
3203 def _write_link_file(link_type):
3204 url = try_get(info_dict['webpage_url'], iri_to_uri)
3205 if not url:
3206 self.report_warning(
3207 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3208 return True
3209 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3210 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3211 return False
3212 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3213 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3214 return True
3215 try:
3216 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3217 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3218 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3219 template_vars = {'url': url}
3220 if link_type == 'desktop':
3221 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3222 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3223 except OSError:
3224 self.report_error(f'Cannot write internet shortcut {linkfn}')
3225 return False
3226 return True
3227
3228 write_links = {
3229 'url': self.params.get('writeurllink'),
3230 'webloc': self.params.get('writewebloclink'),
3231 'desktop': self.params.get('writedesktoplink'),
3232 }
3233 if self.params.get('writelink'):
3234 link_type = ('webloc' if sys.platform == 'darwin'
3235 else 'desktop' if sys.platform.startswith('linux')
3236 else 'url')
3237 write_links[link_type] = True
3238
3239 if any(should_write and not _write_link_file(link_type)
3240 for link_type, should_write in write_links.items()):
3241 return
3242
3243 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3244 replace_info_dict(new_info)
3245
3246 if self.params.get('skip_download'):
3247 info_dict['filepath'] = temp_filename
3248 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3249 info_dict['__files_to_move'] = files_to_move
3250 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3251 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3252 else:
3253 # Download
3254 info_dict.setdefault('__postprocessors', [])
3255 try:
3256
3257 def existing_video_file(*filepaths):
3258 ext = info_dict.get('ext')
3259 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3260 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3261 default_overwrite=False)
3262 if file:
3263 info_dict['ext'] = os.path.splitext(file)[1][1:]
3264 return file
3265
3266 fd, success = None, True
3267 if info_dict.get('protocol') or info_dict.get('url'):
3268 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3269 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3270 info_dict.get('section_start') or info_dict.get('section_end')):
3271 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3272 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3273 self.report_error(f'{msg}. Aborting')
3274 return
3275
3276 if info_dict.get('requested_formats') is not None:
3277 old_ext = info_dict['ext']
3278 if self.params.get('merge_output_format') is None:
3279 if (info_dict['ext'] == 'webm'
3280 and info_dict.get('thumbnails')
3281 # check with type instead of pp_key, __name__, or isinstance
3282 # since we dont want any custom PPs to trigger this
3283 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3284 info_dict['ext'] = 'mkv'
3285 self.report_warning(
3286 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3287 new_ext = info_dict['ext']
3288
3289 def correct_ext(filename, ext=new_ext):
3290 if filename == '-':
3291 return filename
3292 filename_real_ext = os.path.splitext(filename)[1][1:]
3293 filename_wo_ext = (
3294 os.path.splitext(filename)[0]
3295 if filename_real_ext in (old_ext, new_ext)
3296 else filename)
3297 return f'{filename_wo_ext}.{ext}'
3298
3299 # Ensure filename always has a correct extension for successful merge
3300 full_filename = correct_ext(full_filename)
3301 temp_filename = correct_ext(temp_filename)
3302 dl_filename = existing_video_file(full_filename, temp_filename)
3303
3304 info_dict['__real_download'] = False
3305 # NOTE: Copy so that original format dicts are not modified
3306 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3307
3308 merger = FFmpegMergerPP(self)
3309 downloaded = []
3310 if dl_filename is not None:
3311 self.report_file_already_downloaded(dl_filename)
3312 elif fd:
3313 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3314 f['filepath'] = fname = prepend_extension(
3315 correct_ext(temp_filename, info_dict['ext']),
3316 'f%s' % f['format_id'], info_dict['ext'])
3317 downloaded.append(fname)
3318 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3319 success, real_download = self.dl(temp_filename, info_dict)
3320 info_dict['__real_download'] = real_download
3321 else:
3322 if self.params.get('allow_unplayable_formats'):
3323 self.report_warning(
3324 'You have requested merging of multiple formats '
3325 'while also allowing unplayable formats to be downloaded. '
3326 'The formats won\'t be merged to prevent data corruption.')
3327 elif not merger.available:
3328 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3329 if not self.params.get('ignoreerrors'):
3330 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3331 return
3332 self.report_warning(f'{msg}. The formats won\'t be merged')
3333
3334 if temp_filename == '-':
3335 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3336 else 'but the formats are incompatible for simultaneous download' if merger.available
3337 else 'but ffmpeg is not installed')
3338 self.report_warning(
3339 f'You have requested downloading multiple formats to stdout {reason}. '
3340 'The formats will be streamed one after the other')
3341 fname = temp_filename
3342 for f in info_dict['requested_formats']:
3343 new_info = dict(info_dict)
3344 del new_info['requested_formats']
3345 new_info.update(f)
3346 if temp_filename != '-':
3347 fname = prepend_extension(
3348 correct_ext(temp_filename, new_info['ext']),
3349 'f%s' % f['format_id'], new_info['ext'])
3350 if not self._ensure_dir_exists(fname):
3351 return
3352 f['filepath'] = fname
3353 downloaded.append(fname)
3354 partial_success, real_download = self.dl(fname, new_info)
3355 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3356 success = success and partial_success
3357
3358 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3359 info_dict['__postprocessors'].append(merger)
3360 info_dict['__files_to_merge'] = downloaded
3361 # Even if there were no downloads, it is being merged only now
3362 info_dict['__real_download'] = True
3363 else:
3364 for file in downloaded:
3365 files_to_move[file] = None
3366 else:
3367 # Just a single file
3368 dl_filename = existing_video_file(full_filename, temp_filename)
3369 if dl_filename is None or dl_filename == temp_filename:
3370 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3371 # So we should try to resume the download
3372 success, real_download = self.dl(temp_filename, info_dict)
3373 info_dict['__real_download'] = real_download
3374 else:
3375 self.report_file_already_downloaded(dl_filename)
3376
3377 dl_filename = dl_filename or temp_filename
3378 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3379
3380 except network_exceptions as err:
3381 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3382 return
3383 except OSError as err:
3384 raise UnavailableVideoError(err)
3385 except (ContentTooShortError, ) as err:
3386 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3387 return
3388
3389 self._raise_pending_errors(info_dict)
3390 if success and full_filename != '-':
3391
3392 def fixup():
3393 do_fixup = True
3394 fixup_policy = self.params.get('fixup')
3395 vid = info_dict['id']
3396
3397 if fixup_policy in ('ignore', 'never'):
3398 return
3399 elif fixup_policy == 'warn':
3400 do_fixup = 'warn'
3401 elif fixup_policy != 'force':
3402 assert fixup_policy in ('detect_or_warn', None)
3403 if not info_dict.get('__real_download'):
3404 do_fixup = False
3405
3406 def ffmpeg_fixup(cndn, msg, cls):
3407 if not (do_fixup and cndn):
3408 return
3409 elif do_fixup == 'warn':
3410 self.report_warning(f'{vid}: {msg}')
3411 return
3412 pp = cls(self)
3413 if pp.available:
3414 info_dict['__postprocessors'].append(pp)
3415 else:
3416 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3417
3418 stretched_ratio = info_dict.get('stretched_ratio')
3419 ffmpeg_fixup(stretched_ratio not in (1, None),
3420 f'Non-uniform pixel ratio {stretched_ratio}',
3421 FFmpegFixupStretchedPP)
3422
3423 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3424 downloader = downloader.FD_NAME if downloader else None
3425
3426 ext = info_dict.get('ext')
3427 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3428 isinstance(pp, FFmpegVideoConvertorPP)
3429 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3430 ) for pp in self._pps['post_process'])
3431
3432 if not postprocessed_by_ffmpeg:
3433 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3434 'writing DASH m4a. Only some players support this container',
3435 FFmpegFixupM4aPP)
3436 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3437 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3438 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3439 FFmpegFixupM3u8PP)
3440 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3441 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3442
3443 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3444 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3445
3446 fixup()
3447 try:
3448 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3449 except PostProcessingError as err:
3450 self.report_error('Postprocessing: %s' % str(err))
3451 return
3452 try:
3453 for ph in self._post_hooks:
3454 ph(info_dict['filepath'])
3455 except Exception as err:
3456 self.report_error('post hooks: %s' % str(err))
3457 return
3458 info_dict['__write_download_archive'] = True
3459
3460 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3461 if self.params.get('force_write_download_archive'):
3462 info_dict['__write_download_archive'] = True
3463 check_max_downloads()
3464
3465 def __download_wrapper(self, func):
3466 @functools.wraps(func)
3467 def wrapper(*args, **kwargs):
3468 try:
3469 res = func(*args, **kwargs)
3470 except UnavailableVideoError as e:
3471 self.report_error(e)
3472 except DownloadCancelled as e:
3473 self.to_screen(f'[info] {e}')
3474 if not self.params.get('break_per_url'):
3475 raise
3476 self._num_downloads = 0
3477 else:
3478 if self.params.get('dump_single_json', False):
3479 self.post_extract(res)
3480 self.to_stdout(json.dumps(self.sanitize_info(res)))
3481 return wrapper
3482
3483 def download(self, url_list):
3484 """Download a given list of URLs."""
3485 url_list = variadic(url_list) # Passing a single URL is a common mistake
3486 outtmpl = self.params['outtmpl']['default']
3487 if (len(url_list) > 1
3488 and outtmpl != '-'
3489 and '%' not in outtmpl
3490 and self.params.get('max_downloads') != 1):
3491 raise SameFileError(outtmpl)
3492
3493 for url in url_list:
3494 self.__download_wrapper(self.extract_info)(
3495 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3496
3497 return self._download_retcode
3498
3499 def download_with_info_file(self, info_filename):
3500 with contextlib.closing(fileinput.FileInput(
3501 [info_filename], mode='r',
3502 openhook=fileinput.hook_encoded('utf-8'))) as f:
3503 # FileInput doesn't have a read method, we can't call json.load
3504 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3505 for info in variadic(json.loads('\n'.join(f)))]
3506 for info in infos:
3507 self._load_cookies(info.get('cookies'), from_headers=False)
3508 self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
3509 try:
3510 self.__download_wrapper(self.process_ie_result)(info, download=True)
3511 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3512 if not isinstance(e, EntryNotInPlaylist):
3513 self.to_stderr('\r')
3514 webpage_url = info.get('webpage_url')
3515 if webpage_url is None:
3516 raise
3517 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3518 self.download([webpage_url])
3519 return self._download_retcode
3520
3521 @staticmethod
3522 def sanitize_info(info_dict, remove_private_keys=False):
3523 ''' Sanitize the infodict for converting to json '''
3524 if info_dict is None:
3525 return info_dict
3526 info_dict.setdefault('epoch', int(time.time()))
3527 info_dict.setdefault('_type', 'video')
3528 info_dict.setdefault('_version', {
3529 'version': __version__,
3530 'current_git_head': current_git_head(),
3531 'release_git_head': RELEASE_GIT_HEAD,
3532 'repository': REPOSITORY,
3533 })
3534
3535 if remove_private_keys:
3536 reject = lambda k, v: v is None or k.startswith('__') or k in {
3537 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3538 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3539 'playlist_autonumber', '_format_sort_fields',
3540 }
3541 else:
3542 reject = lambda k, v: False
3543
3544 def filter_fn(obj):
3545 if isinstance(obj, dict):
3546 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3547 elif isinstance(obj, (list, tuple, set, LazyList)):
3548 return list(map(filter_fn, obj))
3549 elif obj is None or isinstance(obj, (str, int, float, bool)):
3550 return obj
3551 else:
3552 return repr(obj)
3553
3554 return filter_fn(info_dict)
3555
3556 @staticmethod
3557 def filter_requested_info(info_dict, actually_filter=True):
3558 ''' Alias of sanitize_info for backward compatibility '''
3559 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3560
3561 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3562 for filename in set(filter(None, files_to_delete)):
3563 if msg:
3564 self.to_screen(msg % filename)
3565 try:
3566 os.remove(filename)
3567 except OSError:
3568 self.report_warning(f'Unable to delete file {filename}')
3569 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3570 del info['__files_to_move'][filename]
3571
3572 @staticmethod
3573 def post_extract(info_dict):
3574 def actual_post_extract(info_dict):
3575 if info_dict.get('_type') in ('playlist', 'multi_video'):
3576 for video_dict in info_dict.get('entries', {}):
3577 actual_post_extract(video_dict or {})
3578 return
3579
3580 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3581 info_dict.update(post_extractor())
3582
3583 actual_post_extract(info_dict or {})
3584
3585 def run_pp(self, pp, infodict):
3586 files_to_delete = []
3587 if '__files_to_move' not in infodict:
3588 infodict['__files_to_move'] = {}
3589 try:
3590 files_to_delete, infodict = pp.run(infodict)
3591 except PostProcessingError as e:
3592 # Must be True and not 'only_download'
3593 if self.params.get('ignoreerrors') is True:
3594 self.report_error(e)
3595 return infodict
3596 raise
3597
3598 if not files_to_delete:
3599 return infodict
3600 if self.params.get('keepvideo', False):
3601 for f in files_to_delete:
3602 infodict['__files_to_move'].setdefault(f, '')
3603 else:
3604 self._delete_downloaded_files(
3605 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3606 return infodict
3607
3608 def run_all_pps(self, key, info, *, additional_pps=None):
3609 if key != 'video':
3610 self._forceprint(key, info)
3611 for pp in (additional_pps or []) + self._pps[key]:
3612 info = self.run_pp(pp, info)
3613 return info
3614
3615 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3616 info = dict(ie_info)
3617 info['__files_to_move'] = files_to_move or {}
3618 try:
3619 info = self.run_all_pps(key, info)
3620 except PostProcessingError as err:
3621 msg = f'Preprocessing: {err}'
3622 info.setdefault('__pending_error', msg)
3623 self.report_error(msg, is_error=False)
3624 return info, info.pop('__files_to_move', None)
3625
3626 def post_process(self, filename, info, files_to_move=None):
3627 """Run all the postprocessors on the given file."""
3628 info['filepath'] = filename
3629 info['__files_to_move'] = files_to_move or {}
3630 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3631 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3632 del info['__files_to_move']
3633 return self.run_all_pps('after_move', info)
3634
3635 def _make_archive_id(self, info_dict):
3636 video_id = info_dict.get('id')
3637 if not video_id:
3638 return
3639 # Future-proof against any change in case
3640 # and backwards compatibility with prior versions
3641 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3642 if extractor is None:
3643 url = str_or_none(info_dict.get('url'))
3644 if not url:
3645 return
3646 # Try to find matching extractor for the URL and take its ie_key
3647 for ie_key, ie in self._ies.items():
3648 if ie.suitable(url):
3649 extractor = ie_key
3650 break
3651 else:
3652 return
3653 return make_archive_id(extractor, video_id)
3654
3655 def in_download_archive(self, info_dict):
3656 if not self.archive:
3657 return False
3658
3659 vid_ids = [self._make_archive_id(info_dict)]
3660 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3661 return any(id_ in self.archive for id_ in vid_ids)
3662
3663 def record_download_archive(self, info_dict):
3664 fn = self.params.get('download_archive')
3665 if fn is None:
3666 return
3667 vid_id = self._make_archive_id(info_dict)
3668 assert vid_id
3669
3670 self.write_debug(f'Adding to archive: {vid_id}')
3671 if is_path_like(fn):
3672 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3673 archive_file.write(vid_id + '\n')
3674 self.archive.add(vid_id)
3675
3676 @staticmethod
3677 def format_resolution(format, default='unknown'):
3678 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3679 return 'audio only'
3680 if format.get('resolution') is not None:
3681 return format['resolution']
3682 if format.get('width') and format.get('height'):
3683 return '%dx%d' % (format['width'], format['height'])
3684 elif format.get('height'):
3685 return '%sp' % format['height']
3686 elif format.get('width'):
3687 return '%dx?' % format['width']
3688 return default
3689
3690 def _list_format_headers(self, *headers):
3691 if self.params.get('listformats_table', True) is not False:
3692 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3693 return headers
3694
3695 def _format_note(self, fdict):
3696 res = ''
3697 if fdict.get('ext') in ['f4f', 'f4m']:
3698 res += '(unsupported)'
3699 if fdict.get('language'):
3700 if res:
3701 res += ' '
3702 res += '[%s]' % fdict['language']
3703 if fdict.get('format_note') is not None:
3704 if res:
3705 res += ' '
3706 res += fdict['format_note']
3707 if fdict.get('tbr') is not None:
3708 if res:
3709 res += ', '
3710 res += '%4dk' % fdict['tbr']
3711 if fdict.get('container') is not None:
3712 if res:
3713 res += ', '
3714 res += '%s container' % fdict['container']
3715 if (fdict.get('vcodec') is not None
3716 and fdict.get('vcodec') != 'none'):
3717 if res:
3718 res += ', '
3719 res += fdict['vcodec']
3720 if fdict.get('vbr') is not None:
3721 res += '@'
3722 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3723 res += 'video@'
3724 if fdict.get('vbr') is not None:
3725 res += '%4dk' % fdict['vbr']
3726 if fdict.get('fps') is not None:
3727 if res:
3728 res += ', '
3729 res += '%sfps' % fdict['fps']
3730 if fdict.get('acodec') is not None:
3731 if res:
3732 res += ', '
3733 if fdict['acodec'] == 'none':
3734 res += 'video only'
3735 else:
3736 res += '%-5s' % fdict['acodec']
3737 elif fdict.get('abr') is not None:
3738 if res:
3739 res += ', '
3740 res += 'audio'
3741 if fdict.get('abr') is not None:
3742 res += '@%3dk' % fdict['abr']
3743 if fdict.get('asr') is not None:
3744 res += ' (%5dHz)' % fdict['asr']
3745 if fdict.get('filesize') is not None:
3746 if res:
3747 res += ', '
3748 res += format_bytes(fdict['filesize'])
3749 elif fdict.get('filesize_approx') is not None:
3750 if res:
3751 res += ', '
3752 res += '~' + format_bytes(fdict['filesize_approx'])
3753 return res
3754
3755 def _get_formats(self, info_dict):
3756 if info_dict.get('formats') is None:
3757 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3758 return [info_dict]
3759 return []
3760 return info_dict['formats']
3761
3762 def render_formats_table(self, info_dict):
3763 formats = self._get_formats(info_dict)
3764 if not formats:
3765 return
3766 if not self.params.get('listformats_table', True) is not False:
3767 table = [
3768 [
3769 format_field(f, 'format_id'),
3770 format_field(f, 'ext'),
3771 self.format_resolution(f),
3772 self._format_note(f)
3773 ] for f in formats if (f.get('preference') or 0) >= -1000]
3774 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3775
3776 def simplified_codec(f, field):
3777 assert field in ('acodec', 'vcodec')
3778 codec = f.get(field)
3779 if not codec:
3780 return 'unknown'
3781 elif codec != 'none':
3782 return '.'.join(codec.split('.')[:4])
3783
3784 if field == 'vcodec' and f.get('acodec') == 'none':
3785 return 'images'
3786 elif field == 'acodec' and f.get('vcodec') == 'none':
3787 return ''
3788 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3789 self.Styles.SUPPRESS)
3790
3791 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3792 table = [
3793 [
3794 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3795 format_field(f, 'ext'),
3796 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3797 format_field(f, 'fps', '\t%d', func=round),
3798 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3799 format_field(f, 'audio_channels', '\t%s'),
3800 delim, (
3801 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3802 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3803 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3804 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3805 format_field(f, 'tbr', '\t%dk', func=round),
3806 shorten_protocol_name(f.get('protocol', '')),
3807 delim,
3808 simplified_codec(f, 'vcodec'),
3809 format_field(f, 'vbr', '\t%dk', func=round),
3810 simplified_codec(f, 'acodec'),
3811 format_field(f, 'abr', '\t%dk', func=round),
3812 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3813 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3814 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3815 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3816 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3817 format_field(f, 'format_note'),
3818 format_field(f, 'container', ignore=(None, f.get('ext'))),
3819 delim=', '), delim=' '),
3820 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3821 header_line = self._list_format_headers(
3822 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3823 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3824
3825 return render_table(
3826 header_line, table, hide_empty=True,
3827 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3828
3829 def render_thumbnails_table(self, info_dict):
3830 thumbnails = list(info_dict.get('thumbnails') or [])
3831 if not thumbnails:
3832 return None
3833 return render_table(
3834 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3835 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3836
3837 def render_subtitles_table(self, video_id, subtitles):
3838 def _row(lang, formats):
3839 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3840 if len(set(names)) == 1:
3841 names = [] if names[0] == 'unknown' else names[:1]
3842 return [lang, ', '.join(names), ', '.join(exts)]
3843
3844 if not subtitles:
3845 return None
3846 return render_table(
3847 self._list_format_headers('Language', 'Name', 'Formats'),
3848 [_row(lang, formats) for lang, formats in subtitles.items()],
3849 hide_empty=True)
3850
3851 def __list_table(self, video_id, name, func, *args):
3852 table = func(*args)
3853 if not table:
3854 self.to_screen(f'{video_id} has no {name}')
3855 return
3856 self.to_screen(f'[info] Available {name} for {video_id}:')
3857 self.to_stdout(table)
3858
3859 def list_formats(self, info_dict):
3860 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3861
3862 def list_thumbnails(self, info_dict):
3863 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3864
3865 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3866 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3867
3868 def print_debug_header(self):
3869 if not self.params.get('verbose'):
3870 return
3871
3872 from . import _IN_CLI # Must be delayed import
3873
3874 # These imports can be slow. So import them only as needed
3875 from .extractor.extractors import _LAZY_LOADER
3876 from .extractor.extractors import (
3877 _PLUGIN_CLASSES as plugin_ies,
3878 _PLUGIN_OVERRIDES as plugin_ie_overrides
3879 )
3880
3881 def get_encoding(stream):
3882 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3883 additional_info = []
3884 if os.environ.get('TERM', '').lower() == 'dumb':
3885 additional_info.append('dumb')
3886 if not supports_terminal_sequences(stream):
3887 from .utils import WINDOWS_VT_MODE # Must be imported locally
3888 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3889 if additional_info:
3890 ret = f'{ret} ({",".join(additional_info)})'
3891 return ret
3892
3893 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3894 locale.getpreferredencoding(),
3895 sys.getfilesystemencoding(),
3896 self.get_encoding(),
3897 ', '.join(
3898 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3899 if stream is not None and key != 'console')
3900 )
3901
3902 logger = self.params.get('logger')
3903 if logger:
3904 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3905 write_debug(encoding_str)
3906 else:
3907 write_string(f'[debug] {encoding_str}\n', encoding=None)
3908 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3909
3910 source = detect_variant()
3911 if VARIANT not in (None, 'pip'):
3912 source += '*'
3913 klass = type(self)
3914 write_debug(join_nonempty(
3915 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3916 f'{CHANNEL}@{__version__}',
3917 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3918 '' if source == 'unknown' else f'({source})',
3919 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3920 delim=' '))
3921
3922 if not _IN_CLI:
3923 write_debug(f'params: {self.params}')
3924
3925 if not _LAZY_LOADER:
3926 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3927 write_debug('Lazy loading extractors is forcibly disabled')
3928 else:
3929 write_debug('Lazy loading extractors is disabled')
3930 if self.params['compat_opts']:
3931 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3932
3933 if current_git_head():
3934 write_debug(f'Git HEAD: {current_git_head()}')
3935 write_debug(system_identifier())
3936
3937 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3938 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3939 if ffmpeg_features:
3940 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3941
3942 exe_versions['rtmpdump'] = rtmpdump_version()
3943 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3944 exe_str = ', '.join(
3945 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3946 ) or 'none'
3947 write_debug('exe versions: %s' % exe_str)
3948
3949 from .compat.compat_utils import get_package_info
3950 from .dependencies import available_dependencies
3951
3952 write_debug('Optional libraries: %s' % (', '.join(sorted({
3953 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3954 })) or 'none'))
3955
3956 write_debug(f'Proxy map: {self.proxies}')
3957 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3958 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3959 display_list = ['%s%s' % (
3960 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3961 for name, klass in plugins.items()]
3962 if plugin_type == 'Extractor':
3963 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3964 for parent, plugins in plugin_ie_overrides.items())
3965 if not display_list:
3966 continue
3967 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3968
3969 plugin_dirs = plugin_directories()
3970 if plugin_dirs:
3971 write_debug(f'Plugin directories: {plugin_dirs}')
3972
3973 # Not implemented
3974 if False and self.params.get('call_home'):
3975 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3976 write_debug('Public IP address: %s' % ipaddr)
3977 latest_version = self.urlopen(
3978 'https://yt-dl.org/latest/version').read().decode()
3979 if version_tuple(latest_version) > version_tuple(__version__):
3980 self.report_warning(
3981 'You are using an outdated version (newest version: %s)! '
3982 'See https://yt-dl.org/update if you need help updating.' %
3983 latest_version)
3984
3985 @functools.cached_property
3986 def proxies(self):
3987 """Global proxy configuration"""
3988 opts_proxy = self.params.get('proxy')
3989 if opts_proxy is not None:
3990 if opts_proxy == '':
3991 opts_proxy = '__noproxy__'
3992 proxies = {'all': opts_proxy}
3993 else:
3994 proxies = urllib.request.getproxies()
3995 # compat. Set HTTPS_PROXY to __noproxy__ to revert
3996 if 'http' in proxies and 'https' not in proxies:
3997 proxies['https'] = proxies['http']
3998
3999 return proxies
4000
4001 @functools.cached_property
4002 def cookiejar(self):
4003 """Global cookiejar instance"""
4004 return load_cookies(
4005 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4006
4007 @property
4008 def _opener(self):
4009 """
4010 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4011 """
4012 self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
4013 handler = self._request_director.handlers['Urllib']
4014 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4015
4016 def urlopen(self, req):
4017 """ Start an HTTP download """
4018 if isinstance(req, str):
4019 req = Request(req)
4020 elif isinstance(req, urllib.request.Request):
4021 self.deprecation_warning(
4022 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4023 'Use yt_dlp.networking.common.Request instead.')
4024 req = urllib_req_to_req(req)
4025 assert isinstance(req, Request)
4026
4027 # compat: Assume user:pass url params are basic auth
4028 url, basic_auth_header = extract_basic_auth(req.url)
4029 if basic_auth_header:
4030 req.headers['Authorization'] = basic_auth_header
4031 req.url = sanitize_url(url)
4032
4033 clean_proxies(proxies=req.proxies, headers=req.headers)
4034 clean_headers(req.headers)
4035
4036 try:
4037 return self._request_director.send(req)
4038 except NoSupportingHandlers as e:
4039 for ue in e.unsupported_errors:
4040 if not (ue.handler and ue.msg):
4041 continue
4042 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4043 raise RequestError(
4044 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4045 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4046 raise
4047 except SSLError as e:
4048 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4049 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4050 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4051 raise RequestError(
4052 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4053 'Try using --legacy-server-connect', cause=e) from e
4054 raise
4055 except HTTPError as e: # TODO: Remove in a future release
4056 raise _CompatHTTPError(e) from e
4057
4058 def build_request_director(self, handlers):
4059 logger = _YDLLogger(self)
4060 headers = self.params.get('http_headers').copy()
4061 proxies = self.proxies.copy()
4062 clean_headers(headers)
4063 clean_proxies(proxies, headers)
4064
4065 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4066 for handler in handlers:
4067 director.add_handler(handler(
4068 logger=logger,
4069 headers=headers,
4070 cookiejar=self.cookiejar,
4071 proxies=proxies,
4072 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4073 verify=not self.params.get('nocheckcertificate'),
4074 **traverse_obj(self.params, {
4075 'verbose': 'debug_printtraffic',
4076 'source_address': 'source_address',
4077 'timeout': 'socket_timeout',
4078 'legacy_ssl_support': 'legacy_server_connect',
4079 'enable_file_urls': 'enable_file_urls',
4080 'client_cert': {
4081 'client_certificate': 'client_certificate',
4082 'client_certificate_key': 'client_certificate_key',
4083 'client_certificate_password': 'client_certificate_password',
4084 },
4085 }),
4086 ))
4087 return director
4088
4089 def encode(self, s):
4090 if isinstance(s, bytes):
4091 return s # Already encoded
4092
4093 try:
4094 return s.encode(self.get_encoding())
4095 except UnicodeEncodeError as err:
4096 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4097 raise
4098
4099 def get_encoding(self):
4100 encoding = self.params.get('encoding')
4101 if encoding is None:
4102 encoding = preferredencoding()
4103 return encoding
4104
4105 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4106 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4107 if overwrite is None:
4108 overwrite = self.params.get('overwrites', True)
4109 if not self.params.get('writeinfojson'):
4110 return False
4111 elif not infofn:
4112 self.write_debug(f'Skipping writing {label} infojson')
4113 return False
4114 elif not self._ensure_dir_exists(infofn):
4115 return None
4116 elif not overwrite and os.path.exists(infofn):
4117 self.to_screen(f'[info] {label.title()} metadata is already present')
4118 return 'exists'
4119
4120 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4121 try:
4122 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4123 return True
4124 except OSError:
4125 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4126 return None
4127
4128 def _write_description(self, label, ie_result, descfn):
4129 ''' Write description and returns True = written, False = skip, None = error '''
4130 if not self.params.get('writedescription'):
4131 return False
4132 elif not descfn:
4133 self.write_debug(f'Skipping writing {label} description')
4134 return False
4135 elif not self._ensure_dir_exists(descfn):
4136 return None
4137 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4138 self.to_screen(f'[info] {label.title()} description is already present')
4139 elif ie_result.get('description') is None:
4140 self.to_screen(f'[info] There\'s no {label} description to write')
4141 return False
4142 else:
4143 try:
4144 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4145 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4146 descfile.write(ie_result['description'])
4147 except OSError:
4148 self.report_error(f'Cannot write {label} description file {descfn}')
4149 return None
4150 return True
4151
4152 def _write_subtitles(self, info_dict, filename):
4153 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4154 ret = []
4155 subtitles = info_dict.get('requested_subtitles')
4156 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4157 # subtitles download errors are already managed as troubles in relevant IE
4158 # that way it will silently go on when used with unsupporting IE
4159 return ret
4160 elif not subtitles:
4161 self.to_screen('[info] There are no subtitles for the requested languages')
4162 return ret
4163 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4164 if not sub_filename_base:
4165 self.to_screen('[info] Skipping writing video subtitles')
4166 return ret
4167
4168 for sub_lang, sub_info in subtitles.items():
4169 sub_format = sub_info['ext']
4170 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4171 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4172 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4173 if existing_sub:
4174 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4175 sub_info['filepath'] = existing_sub
4176 ret.append((existing_sub, sub_filename_final))
4177 continue
4178
4179 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4180 if sub_info.get('data') is not None:
4181 try:
4182 # Use newline='' to prevent conversion of newline characters
4183 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4184 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4185 subfile.write(sub_info['data'])
4186 sub_info['filepath'] = sub_filename
4187 ret.append((sub_filename, sub_filename_final))
4188 continue
4189 except OSError:
4190 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4191 return None
4192
4193 try:
4194 sub_copy = sub_info.copy()
4195 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4196 self.dl(sub_filename, sub_copy, subtitle=True)
4197 sub_info['filepath'] = sub_filename
4198 ret.append((sub_filename, sub_filename_final))
4199 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4200 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4201 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4202 if not self.params.get('ignoreerrors'):
4203 self.report_error(msg)
4204 raise DownloadError(msg)
4205 self.report_warning(msg)
4206 return ret
4207
4208 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4209 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4210 write_all = self.params.get('write_all_thumbnails', False)
4211 thumbnails, ret = [], []
4212 if write_all or self.params.get('writethumbnail', False):
4213 thumbnails = info_dict.get('thumbnails') or []
4214 if not thumbnails:
4215 self.to_screen(f'[info] There are no {label} thumbnails to download')
4216 return ret
4217 multiple = write_all and len(thumbnails) > 1
4218
4219 if thumb_filename_base is None:
4220 thumb_filename_base = filename
4221 if thumbnails and not thumb_filename_base:
4222 self.write_debug(f'Skipping writing {label} thumbnail')
4223 return ret
4224
4225 for idx, t in list(enumerate(thumbnails))[::-1]:
4226 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4227 thumb_display_id = f'{label} thumbnail {t["id"]}'
4228 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4229 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4230
4231 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4232 if existing_thumb:
4233 self.to_screen('[info] %s is already present' % (
4234 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4235 t['filepath'] = existing_thumb
4236 ret.append((existing_thumb, thumb_filename_final))
4237 else:
4238 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4239 try:
4240 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4241 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4242 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4243 shutil.copyfileobj(uf, thumbf)
4244 ret.append((thumb_filename, thumb_filename_final))
4245 t['filepath'] = thumb_filename
4246 except network_exceptions as err:
4247 if isinstance(err, HTTPError) and err.status == 404:
4248 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4249 else:
4250 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4251 thumbnails.pop(idx)
4252 if ret and not write_all:
4253 break
4254 return ret