]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[networking] Add module (#2861)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import functools
8 import http.cookiejar
9 import io
10 import itertools
11 import json
12 import locale
13 import operator
14 import os
15 import random
16 import re
17 import shutil
18 import string
19 import subprocess
20 import sys
21 import tempfile
22 import time
23 import tokenize
24 import traceback
25 import unicodedata
26
27 from .cache import Cache
28 from .compat import urllib # isort: split
29 from .compat import compat_os_name, compat_shlex_quote
30 from .cookies import LenientSimpleCookie, load_cookies
31 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
32 from .downloader.rtmp import rtmpdump_version
33 from .extractor import gen_extractor_classes, get_info_extractor
34 from .extractor.common import UnsupportedURLIE
35 from .extractor.openload import PhantomJSwrapper
36 from .minicurses import format_text
37 from .plugins import directories as plugin_directories
38 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
39 from .postprocessor import (
40 EmbedThumbnailPP,
41 FFmpegFixupDuplicateMoovPP,
42 FFmpegFixupDurationPP,
43 FFmpegFixupM3u8PP,
44 FFmpegFixupM4aPP,
45 FFmpegFixupStretchedPP,
46 FFmpegFixupTimestampPP,
47 FFmpegMergerPP,
48 FFmpegPostProcessor,
49 FFmpegVideoConvertorPP,
50 MoveFilesAfterDownloadPP,
51 get_postprocessor,
52 )
53 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
54 from .update import REPOSITORY, current_git_head, detect_variant
55 from .utils import (
56 DEFAULT_OUTTMPL,
57 IDENTITY,
58 LINK_TEMPLATES,
59 MEDIA_EXTENSIONS,
60 NO_DEFAULT,
61 NUMBER_RE,
62 OUTTMPL_TYPES,
63 POSTPROCESS_WHEN,
64 STR_FORMAT_RE_TMPL,
65 STR_FORMAT_TYPES,
66 ContentTooShortError,
67 DateRange,
68 DownloadCancelled,
69 DownloadError,
70 EntryNotInPlaylist,
71 ExistingVideoReached,
72 ExtractorError,
73 FormatSorter,
74 GeoRestrictedError,
75 HEADRequest,
76 ISO3166Utils,
77 LazyList,
78 MaxDownloadsReached,
79 Namespace,
80 PagedList,
81 PerRequestProxyHandler,
82 PlaylistEntries,
83 Popen,
84 PostProcessingError,
85 ReExtractInfo,
86 RejectedVideoReached,
87 SameFileError,
88 UnavailableVideoError,
89 UserNotLive,
90 YoutubeDLCookieProcessor,
91 YoutubeDLHandler,
92 YoutubeDLRedirectHandler,
93 age_restricted,
94 args_to_str,
95 bug_reports_message,
96 date_from_str,
97 deprecation_warning,
98 determine_ext,
99 determine_protocol,
100 encode_compat_str,
101 encodeFilename,
102 error_to_compat_str,
103 escapeHTML,
104 expand_path,
105 filter_dict,
106 float_or_none,
107 format_bytes,
108 format_decimal_suffix,
109 format_field,
110 formatSeconds,
111 get_compatible_ext,
112 get_domain,
113 int_or_none,
114 iri_to_uri,
115 is_path_like,
116 join_nonempty,
117 locked_file,
118 make_archive_id,
119 make_dir,
120 make_HTTPS_handler,
121 merge_headers,
122 network_exceptions,
123 number_of_digits,
124 orderedSet,
125 orderedSet_from_options,
126 parse_filesize,
127 preferredencoding,
128 prepend_extension,
129 remove_terminal_sequences,
130 render_table,
131 replace_extension,
132 sanitize_filename,
133 sanitize_path,
134 sanitize_url,
135 sanitized_Request,
136 std_headers,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils.networking import clean_headers
155 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
156
157 if compat_os_name == 'nt':
158 import ctypes
159
160
161 class YoutubeDL:
162 """YoutubeDL class.
163
164 YoutubeDL objects are the ones responsible of downloading the
165 actual video file and writing it to disk if the user has requested
166 it, among some other tasks. In most cases there should be one per
167 program. As, given a video URL, the downloader doesn't know how to
168 extract all the needed information, task that InfoExtractors do, it
169 has to pass the URL to one of them.
170
171 For this, YoutubeDL objects have a method that allows
172 InfoExtractors to be registered in a given order. When it is passed
173 a URL, the YoutubeDL object handles it to the first InfoExtractor it
174 finds that reports being able to handle it. The InfoExtractor extracts
175 all the information about the video or videos the URL refers to, and
176 YoutubeDL process the extracted information, possibly using a File
177 Downloader to download the video.
178
179 YoutubeDL objects accept a lot of parameters. In order not to saturate
180 the object constructor with arguments, it receives a dictionary of
181 options instead. These options are available through the params
182 attribute for the InfoExtractors to use. The YoutubeDL also
183 registers itself as the downloader in charge for the InfoExtractors
184 that are added to it, so this is a "mutual registration".
185
186 Available options:
187
188 username: Username for authentication purposes.
189 password: Password for authentication purposes.
190 videopassword: Password for accessing a video.
191 ap_mso: Adobe Pass multiple-system operator identifier.
192 ap_username: Multiple-system operator account username.
193 ap_password: Multiple-system operator account password.
194 usenetrc: Use netrc for authentication instead.
195 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
196 netrc_cmd: Use a shell command to get credentials
197 verbose: Print additional info to stdout.
198 quiet: Do not print messages to stdout.
199 no_warnings: Do not print out anything for warnings.
200 forceprint: A dict with keys WHEN mapped to a list of templates to
201 print to stdout. The allowed keys are video or any of the
202 items in utils.POSTPROCESS_WHEN.
203 For compatibility, a single list is also accepted
204 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
205 a list of tuples with (template, filename)
206 forcejson: Force printing info_dict as JSON.
207 dump_single_json: Force printing the info_dict of the whole playlist
208 (or video) as a single JSON line.
209 force_write_download_archive: Force writing download archive regardless
210 of 'skip_download' or 'simulate'.
211 simulate: Do not download the video files. If unset (or None),
212 simulate only if listsubtitles, listformats or list_thumbnails is used
213 format: Video format code. see "FORMAT SELECTION" for more details.
214 You can also pass a function. The function takes 'ctx' as
215 argument and returns the formats to download.
216 See "build_format_selector" for an implementation
217 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
218 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
219 extracting metadata even if the video is not actually
220 available for download (experimental)
221 format_sort: A list of fields by which to sort the video formats.
222 See "Sorting Formats" for more details.
223 format_sort_force: Force the given format_sort. see "Sorting Formats"
224 for more details.
225 prefer_free_formats: Whether to prefer video formats with free containers
226 over non-free ones of same quality.
227 allow_multiple_video_streams: Allow multiple video streams to be merged
228 into a single file
229 allow_multiple_audio_streams: Allow multiple audio streams to be merged
230 into a single file
231 check_formats Whether to test if the formats are downloadable.
232 Can be True (check all), False (check none),
233 'selected' (check selected formats),
234 or None (check only if requested by extractor)
235 paths: Dictionary of output paths. The allowed keys are 'home'
236 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
237 outtmpl: Dictionary of templates for output names. Allowed keys
238 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
239 For compatibility with youtube-dl, a single string can also be used
240 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
241 restrictfilenames: Do not allow "&" and spaces in file names
242 trim_file_name: Limit length of filename (extension excluded)
243 windowsfilenames: Force the filenames to be windows compatible
244 ignoreerrors: Do not stop on download/postprocessing errors.
245 Can be 'only_download' to ignore only download errors.
246 Default is 'only_download' for CLI, but False for API
247 skip_playlist_after_errors: Number of allowed failures until the rest of
248 the playlist is skipped
249 allowed_extractors: List of regexes to match against extractor names that are allowed
250 overwrites: Overwrite all video and metadata files if True,
251 overwrite only non-video files if None
252 and don't overwrite any file if False
253 For compatibility with youtube-dl,
254 "nooverwrites" may also be used instead
255 playlist_items: Specific indices of playlist to download.
256 playlistrandom: Download playlist items in random order.
257 lazy_playlist: Process playlist entries as they are received.
258 matchtitle: Download only matching titles.
259 rejecttitle: Reject downloads for matching titles.
260 logger: Log messages to a logging.Logger instance.
261 logtostderr: Print everything to stderr instead of stdout.
262 consoletitle: Display progress in console window's titlebar.
263 writedescription: Write the video description to a .description file
264 writeinfojson: Write the video description to a .info.json file
265 clean_infojson: Remove internal metadata from the infojson
266 getcomments: Extract video comments. This will not be written to disk
267 unless writeinfojson is also given
268 writeannotations: Write the video annotations to a .annotations.xml file
269 writethumbnail: Write the thumbnail image to a file
270 allow_playlist_files: Whether to write playlists' description, infojson etc
271 also to disk when using the 'write*' options
272 write_all_thumbnails: Write all thumbnail formats to files
273 writelink: Write an internet shortcut file, depending on the
274 current platform (.url/.webloc/.desktop)
275 writeurllink: Write a Windows internet shortcut file (.url)
276 writewebloclink: Write a macOS internet shortcut file (.webloc)
277 writedesktoplink: Write a Linux internet shortcut file (.desktop)
278 writesubtitles: Write the video subtitles to a file
279 writeautomaticsub: Write the automatically generated subtitles to a file
280 listsubtitles: Lists all available subtitles for the video
281 subtitlesformat: The format code for subtitles
282 subtitleslangs: List of languages of the subtitles to download (can be regex).
283 The list may contain "all" to refer to all the available
284 subtitles. The language can be prefixed with a "-" to
285 exclude it from the requested languages, e.g. ['all', '-live_chat']
286 keepvideo: Keep the video file after post-processing
287 daterange: A utils.DateRange object, download only if the upload_date is in the range.
288 skip_download: Skip the actual download of the video file
289 cachedir: Location of the cache files in the filesystem.
290 False to disable filesystem cache.
291 noplaylist: Download single video instead of a playlist if in doubt.
292 age_limit: An integer representing the user's age in years.
293 Unsuitable videos for the given age are skipped.
294 min_views: An integer representing the minimum view count the video
295 must have in order to not be skipped.
296 Videos without view count information are always
297 downloaded. None for no limit.
298 max_views: An integer representing the maximum view count.
299 Videos that are more popular than that are not
300 downloaded.
301 Videos without view count information are always
302 downloaded. None for no limit.
303 download_archive: A set, or the name of a file where all downloads are recorded.
304 Videos already present in the file are not downloaded again.
305 break_on_existing: Stop the download process after attempting to download a
306 file that is in the archive.
307 break_per_url: Whether break_on_reject and break_on_existing
308 should act on each input URL as opposed to for the entire queue
309 cookiefile: File name or text stream from where cookies should be read and dumped to
310 cookiesfrombrowser: A tuple containing the name of the browser, the profile
311 name/path from where cookies are loaded, the name of the keyring,
312 and the container name, e.g. ('chrome', ) or
313 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
314 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
315 support RFC 5746 secure renegotiation
316 nocheckcertificate: Do not verify SSL certificates
317 client_certificate: Path to client certificate file in PEM format. May include the private key
318 client_certificate_key: Path to private key file for client certificate
319 client_certificate_password: Password for client certificate private key, if encrypted.
320 If not provided and the key is encrypted, yt-dlp will ask interactively
321 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
322 (Only supported by some extractors)
323 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
324 http_headers: A dictionary of custom headers to be used for all requests
325 proxy: URL of the proxy server to use
326 geo_verification_proxy: URL of the proxy to use for IP address verification
327 on geo-restricted sites.
328 socket_timeout: Time to wait for unresponsive hosts, in seconds
329 bidi_workaround: Work around buggy terminals without bidirectional text
330 support, using fridibi
331 debug_printtraffic:Print out sent and received HTTP traffic
332 default_search: Prepend this string if an input url is not valid.
333 'auto' for elaborate guessing
334 encoding: Use this encoding instead of the system-specified.
335 extract_flat: Whether to resolve and process url_results further
336 * False: Always process. Default for API
337 * True: Never process
338 * 'in_playlist': Do not process inside playlist/multi_video
339 * 'discard': Always process, but don't return the result
340 from inside playlist/multi_video
341 * 'discard_in_playlist': Same as "discard", but only for
342 playlists (not multi_video). Default for CLI
343 wait_for_video: If given, wait for scheduled streams to become available.
344 The value should be a tuple containing the range
345 (min_secs, max_secs) to wait between retries
346 postprocessors: A list of dictionaries, each with an entry
347 * key: The name of the postprocessor. See
348 yt_dlp/postprocessor/__init__.py for a list.
349 * when: When to run the postprocessor. Allowed values are
350 the entries of utils.POSTPROCESS_WHEN
351 Assumed to be 'post_process' if not given
352 progress_hooks: A list of functions that get called on download
353 progress, with a dictionary with the entries
354 * status: One of "downloading", "error", or "finished".
355 Check this first and ignore unknown values.
356 * info_dict: The extracted info_dict
357
358 If status is one of "downloading", or "finished", the
359 following properties may also be present:
360 * filename: The final filename (always present)
361 * tmpfilename: The filename we're currently writing to
362 * downloaded_bytes: Bytes on disk
363 * total_bytes: Size of the whole file, None if unknown
364 * total_bytes_estimate: Guess of the eventual file size,
365 None if unavailable.
366 * elapsed: The number of seconds since download started.
367 * eta: The estimated time in seconds, None if unknown
368 * speed: The download speed in bytes/second, None if
369 unknown
370 * fragment_index: The counter of the currently
371 downloaded video fragment.
372 * fragment_count: The number of fragments (= individual
373 files that will be merged)
374
375 Progress hooks are guaranteed to be called at least once
376 (with status "finished") if the download is successful.
377 postprocessor_hooks: A list of functions that get called on postprocessing
378 progress, with a dictionary with the entries
379 * status: One of "started", "processing", or "finished".
380 Check this first and ignore unknown values.
381 * postprocessor: Name of the postprocessor
382 * info_dict: The extracted info_dict
383
384 Progress hooks are guaranteed to be called at least twice
385 (with status "started" and "finished") if the processing is successful.
386 merge_output_format: "/" separated list of extensions to use when merging formats.
387 final_ext: Expected final extension; used to detect when the file was
388 already downloaded and converted
389 fixup: Automatically correct known faults of the file.
390 One of:
391 - "never": do nothing
392 - "warn": only emit a warning
393 - "detect_or_warn": check whether we can do anything
394 about it, warn otherwise (default)
395 source_address: Client-side IP address to bind to.
396 sleep_interval_requests: Number of seconds to sleep between requests
397 during extraction
398 sleep_interval: Number of seconds to sleep before each download when
399 used alone or a lower bound of a range for randomized
400 sleep before each download (minimum possible number
401 of seconds to sleep) when used along with
402 max_sleep_interval.
403 max_sleep_interval:Upper bound of a range for randomized sleep before each
404 download (maximum possible number of seconds to sleep).
405 Must only be used along with sleep_interval.
406 Actual sleep time will be a random float from range
407 [sleep_interval; max_sleep_interval].
408 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
409 listformats: Print an overview of available video formats and exit.
410 list_thumbnails: Print a table of all thumbnails and exit.
411 match_filter: A function that gets called for every video with the signature
412 (info_dict, *, incomplete: bool) -> Optional[str]
413 For backward compatibility with youtube-dl, the signature
414 (info_dict) -> Optional[str] is also allowed.
415 - If it returns a message, the video is ignored.
416 - If it returns None, the video is downloaded.
417 - If it returns utils.NO_DEFAULT, the user is interactively
418 asked whether to download the video.
419 - Raise utils.DownloadCancelled(msg) to abort remaining
420 downloads when a video is rejected.
421 match_filter_func in utils.py is one example for this.
422 color: A Dictionary with output stream names as keys
423 and their respective color policy as values.
424 Can also just be a single color policy,
425 in which case it applies to all outputs.
426 Valid stream names are 'stdout' and 'stderr'.
427 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
428 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
429 HTTP header
430 geo_bypass_country:
431 Two-letter ISO 3166-2 country code that will be used for
432 explicit geographic restriction bypassing via faking
433 X-Forwarded-For HTTP header
434 geo_bypass_ip_block:
435 IP range in CIDR notation that will be used similarly to
436 geo_bypass_country
437 external_downloader: A dictionary of protocol keys and the executable of the
438 external downloader to use for it. The allowed protocols
439 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
440 Set the value to 'native' to use the native downloader
441 compat_opts: Compatibility options. See "Differences in default behavior".
442 The following options do not work when used through the API:
443 filename, abort-on-error, multistreams, no-live-chat, format-sort
444 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
445 Refer __init__.py for their implementation
446 progress_template: Dictionary of templates for progress outputs.
447 Allowed keys are 'download', 'postprocess',
448 'download-title' (console title) and 'postprocess-title'.
449 The template is mapped on a dictionary with keys 'progress' and 'info'
450 retry_sleep_functions: Dictionary of functions that takes the number of attempts
451 as argument and returns the time to sleep in seconds.
452 Allowed keys are 'http', 'fragment', 'file_access'
453 download_ranges: A callback function that gets called for every video with
454 the signature (info_dict, ydl) -> Iterable[Section].
455 Only the returned sections will be downloaded.
456 Each Section is a dict with the following keys:
457 * start_time: Start time of the section in seconds
458 * end_time: End time of the section in seconds
459 * title: Section title (Optional)
460 * index: Section number (Optional)
461 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
462 noprogress: Do not print the progress bar
463 live_from_start: Whether to download livestreams videos from the start
464
465 The following parameters are not used by YoutubeDL itself, they are used by
466 the downloader (see yt_dlp/downloader/common.py):
467 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
468 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
469 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
470 external_downloader_args, concurrent_fragment_downloads.
471
472 The following options are used by the post processors:
473 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
474 to the binary or its containing directory.
475 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
476 and a list of additional command-line arguments for the
477 postprocessor/executable. The dict can also have "PP+EXE" keys
478 which are used when the given exe is used by the given PP.
479 Use 'default' as the name for arguments to passed to all PP
480 For compatibility with youtube-dl, a single list of args
481 can also be used
482
483 The following options are used by the extractors:
484 extractor_retries: Number of times to retry for known errors (default: 3)
485 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
486 hls_split_discontinuity: Split HLS playlists to different formats at
487 discontinuities such as ad breaks (default: False)
488 extractor_args: A dictionary of arguments to be passed to the extractors.
489 See "EXTRACTOR ARGUMENTS" for details.
490 E.g. {'youtube': {'skip': ['dash', 'hls']}}
491 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
492
493 The following options are deprecated and may be removed in the future:
494
495 break_on_reject: Stop the download process when encountering a video that
496 has been filtered out.
497 - `raise DownloadCancelled(msg)` in match_filter instead
498 force_generic_extractor: Force downloader to use the generic extractor
499 - Use allowed_extractors = ['generic', 'default']
500 playliststart: - Use playlist_items
501 Playlist item to start at.
502 playlistend: - Use playlist_items
503 Playlist item to end at.
504 playlistreverse: - Use playlist_items
505 Download playlist items in reverse order.
506 forceurl: - Use forceprint
507 Force printing final URL.
508 forcetitle: - Use forceprint
509 Force printing title.
510 forceid: - Use forceprint
511 Force printing ID.
512 forcethumbnail: - Use forceprint
513 Force printing thumbnail URL.
514 forcedescription: - Use forceprint
515 Force printing description.
516 forcefilename: - Use forceprint
517 Force printing final filename.
518 forceduration: - Use forceprint
519 Force printing duration.
520 allsubtitles: - Use subtitleslangs = ['all']
521 Downloads all the subtitles of the video
522 (requires writesubtitles or writeautomaticsub)
523 include_ads: - Doesn't work
524 Download ads as well
525 call_home: - Not implemented
526 Boolean, true iff we are allowed to contact the
527 yt-dlp servers for debugging.
528 post_hooks: - Register a custom postprocessor
529 A list of functions that get called as the final step
530 for each video file, after all postprocessors have been
531 called. The filename will be passed as the only argument.
532 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
533 Use the native HLS downloader instead of ffmpeg/avconv
534 if True, otherwise use ffmpeg/avconv if False, otherwise
535 use downloader suggested by extractor if None.
536 prefer_ffmpeg: - avconv support is deprecated
537 If False, use avconv instead of ffmpeg if both are available,
538 otherwise prefer ffmpeg.
539 youtube_include_dash_manifest: - Use extractor_args
540 If True (default), DASH manifests and related
541 data will be downloaded and processed by extractor.
542 You can reduce network I/O by disabling it if you don't
543 care about DASH. (only for youtube)
544 youtube_include_hls_manifest: - Use extractor_args
545 If True (default), HLS manifests and related
546 data will be downloaded and processed by extractor.
547 You can reduce network I/O by disabling it if you don't
548 care about HLS. (only for youtube)
549 no_color: Same as `color='no_color'`
550 """
551
552 _NUMERIC_FIELDS = {
553 'width', 'height', 'asr', 'audio_channels', 'fps',
554 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
555 'timestamp', 'release_timestamp',
556 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
557 'average_rating', 'comment_count', 'age_limit',
558 'start_time', 'end_time',
559 'chapter_number', 'season_number', 'episode_number',
560 'track_number', 'disc_number', 'release_year',
561 }
562
563 _format_fields = {
564 # NB: Keep in sync with the docstring of extractor/common.py
565 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
566 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
567 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
568 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
569 'preference', 'language', 'language_preference', 'quality', 'source_preference',
570 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
571 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
572 }
573 _format_selection_exts = {
574 'audio': set(MEDIA_EXTENSIONS.common_audio),
575 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
576 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
577 }
578
579 def __init__(self, params=None, auto_init=True):
580 """Create a FileDownloader object with the given options.
581 @param auto_init Whether to load the default extractors and print header (if verbose).
582 Set to 'no_verbose_header' to not print the header
583 """
584 if params is None:
585 params = {}
586 self.params = params
587 self._ies = {}
588 self._ies_instances = {}
589 self._pps = {k: [] for k in POSTPROCESS_WHEN}
590 self._printed_messages = set()
591 self._first_webpage_request = True
592 self._post_hooks = []
593 self._progress_hooks = []
594 self._postprocessor_hooks = []
595 self._download_retcode = 0
596 self._num_downloads = 0
597 self._num_videos = 0
598 self._playlist_level = 0
599 self._playlist_urls = set()
600 self.cache = Cache(self)
601
602 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
603 self._out_files = Namespace(
604 out=stdout,
605 error=sys.stderr,
606 screen=sys.stderr if self.params.get('quiet') else stdout,
607 console=None if compat_os_name == 'nt' else next(
608 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
609 )
610
611 try:
612 windows_enable_vt_mode()
613 except Exception as e:
614 self.write_debug(f'Failed to enable VT mode: {e}')
615
616 if self.params.get('no_color'):
617 if self.params.get('color') is not None:
618 self.report_warning('Overwriting params from "color" with "no_color"')
619 self.params['color'] = 'no_color'
620
621 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
622
623 def process_color_policy(stream):
624 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
625 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
626 if policy in ('auto', None):
627 return term_allow_color and supports_terminal_sequences(stream)
628 assert policy in ('always', 'never', 'no_color')
629 return {'always': True, 'never': False}.get(policy, policy)
630
631 self._allow_colors = Namespace(**{
632 name: process_color_policy(stream)
633 for name, stream in self._out_files.items_ if name != 'console'
634 })
635
636 # The code is left like this to be reused for future deprecations
637 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
638 current_version = sys.version_info[:2]
639 if current_version < MIN_RECOMMENDED:
640 msg = ('Support for Python version %d.%d has been deprecated. '
641 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
642 '\n You will no longer receive updates on this version')
643 if current_version < MIN_SUPPORTED:
644 msg = 'Python version %d.%d is no longer supported'
645 self.deprecated_feature(
646 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
647
648 if self.params.get('allow_unplayable_formats'):
649 self.report_warning(
650 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
651 'This is a developer option intended for debugging. \n'
652 ' If you experience any issues while using this option, '
653 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
654
655 if self.params.get('bidi_workaround', False):
656 try:
657 import pty
658 master, slave = pty.openpty()
659 width = shutil.get_terminal_size().columns
660 width_args = [] if width is None else ['-w', str(width)]
661 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
662 try:
663 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
664 except OSError:
665 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
666 self._output_channel = os.fdopen(master, 'rb')
667 except OSError as ose:
668 if ose.errno == errno.ENOENT:
669 self.report_warning(
670 'Could not find fribidi executable, ignoring --bidi-workaround. '
671 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
672 else:
673 raise
674
675 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
676 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
677 if auto_init and auto_init != 'no_verbose_header':
678 self.print_debug_header()
679
680 self.__header_cookies = []
681 self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
682
683 def check_deprecated(param, option, suggestion):
684 if self.params.get(param) is not None:
685 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
686 return True
687 return False
688
689 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
690 if self.params.get('geo_verification_proxy') is None:
691 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
692
693 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
694 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
695 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
696
697 for msg in self.params.get('_warnings', []):
698 self.report_warning(msg)
699 for msg in self.params.get('_deprecation_warnings', []):
700 self.deprecated_feature(msg)
701
702 if 'list-formats' in self.params['compat_opts']:
703 self.params['listformats_table'] = False
704
705 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
706 # nooverwrites was unnecessarily changed to overwrites
707 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
708 # This ensures compatibility with both keys
709 self.params['overwrites'] = not self.params['nooverwrites']
710 elif self.params.get('overwrites') is None:
711 self.params.pop('overwrites', None)
712 else:
713 self.params['nooverwrites'] = not self.params['overwrites']
714
715 if self.params.get('simulate') is None and any((
716 self.params.get('list_thumbnails'),
717 self.params.get('listformats'),
718 self.params.get('listsubtitles'),
719 )):
720 self.params['simulate'] = 'list_only'
721
722 self.params.setdefault('forceprint', {})
723 self.params.setdefault('print_to_file', {})
724
725 # Compatibility with older syntax
726 if not isinstance(params['forceprint'], dict):
727 self.params['forceprint'] = {'video': params['forceprint']}
728
729 if auto_init:
730 self.add_default_info_extractors()
731
732 if (sys.platform != 'win32'
733 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
734 and not self.params.get('restrictfilenames', False)):
735 # Unicode filesystem API will throw errors (#1474, #13027)
736 self.report_warning(
737 'Assuming --restrict-filenames since file system encoding '
738 'cannot encode all characters. '
739 'Set the LC_ALL environment variable to fix this.')
740 self.params['restrictfilenames'] = True
741
742 self._parse_outtmpl()
743
744 # Creating format selector here allows us to catch syntax errors before the extraction
745 self.format_selector = (
746 self.params.get('format') if self.params.get('format') in (None, '-')
747 else self.params['format'] if callable(self.params['format'])
748 else self.build_format_selector(self.params['format']))
749
750 hooks = {
751 'post_hooks': self.add_post_hook,
752 'progress_hooks': self.add_progress_hook,
753 'postprocessor_hooks': self.add_postprocessor_hook,
754 }
755 for opt, fn in hooks.items():
756 for ph in self.params.get(opt, []):
757 fn(ph)
758
759 for pp_def_raw in self.params.get('postprocessors', []):
760 pp_def = dict(pp_def_raw)
761 when = pp_def.pop('when', 'post_process')
762 self.add_post_processor(
763 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
764 when=when)
765
766 self._setup_opener()
767
768 def preload_download_archive(fn):
769 """Preload the archive, if any is specified"""
770 archive = set()
771 if fn is None:
772 return archive
773 elif not is_path_like(fn):
774 return fn
775
776 self.write_debug(f'Loading archive file {fn!r}')
777 try:
778 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
779 for line in archive_file:
780 archive.add(line.strip())
781 except OSError as ioe:
782 if ioe.errno != errno.ENOENT:
783 raise
784 return archive
785
786 self.archive = preload_download_archive(self.params.get('download_archive'))
787
788 def warn_if_short_id(self, argv):
789 # short YouTube ID starting with dash?
790 idxs = [
791 i for i, a in enumerate(argv)
792 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
793 if idxs:
794 correct_argv = (
795 ['yt-dlp']
796 + [a for i, a in enumerate(argv) if i not in idxs]
797 + ['--'] + [argv[i] for i in idxs]
798 )
799 self.report_warning(
800 'Long argument string detected. '
801 'Use -- to separate parameters and URLs, like this:\n%s' %
802 args_to_str(correct_argv))
803
804 def add_info_extractor(self, ie):
805 """Add an InfoExtractor object to the end of the list."""
806 ie_key = ie.ie_key()
807 self._ies[ie_key] = ie
808 if not isinstance(ie, type):
809 self._ies_instances[ie_key] = ie
810 ie.set_downloader(self)
811
812 def get_info_extractor(self, ie_key):
813 """
814 Get an instance of an IE with name ie_key, it will try to get one from
815 the _ies list, if there's no instance it will create a new one and add
816 it to the extractor list.
817 """
818 ie = self._ies_instances.get(ie_key)
819 if ie is None:
820 ie = get_info_extractor(ie_key)()
821 self.add_info_extractor(ie)
822 return ie
823
824 def add_default_info_extractors(self):
825 """
826 Add the InfoExtractors returned by gen_extractors to the end of the list
827 """
828 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
829 all_ies['end'] = UnsupportedURLIE()
830 try:
831 ie_names = orderedSet_from_options(
832 self.params.get('allowed_extractors', ['default']), {
833 'all': list(all_ies),
834 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
835 }, use_regex=True)
836 except re.error as e:
837 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
838 for name in ie_names:
839 self.add_info_extractor(all_ies[name])
840 self.write_debug(f'Loaded {len(ie_names)} extractors')
841
842 def add_post_processor(self, pp, when='post_process'):
843 """Add a PostProcessor object to the end of the chain."""
844 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
845 self._pps[when].append(pp)
846 pp.set_downloader(self)
847
848 def add_post_hook(self, ph):
849 """Add the post hook"""
850 self._post_hooks.append(ph)
851
852 def add_progress_hook(self, ph):
853 """Add the download progress hook"""
854 self._progress_hooks.append(ph)
855
856 def add_postprocessor_hook(self, ph):
857 """Add the postprocessing progress hook"""
858 self._postprocessor_hooks.append(ph)
859 for pps in self._pps.values():
860 for pp in pps:
861 pp.add_progress_hook(ph)
862
863 def _bidi_workaround(self, message):
864 if not hasattr(self, '_output_channel'):
865 return message
866
867 assert hasattr(self, '_output_process')
868 assert isinstance(message, str)
869 line_count = message.count('\n') + 1
870 self._output_process.stdin.write((message + '\n').encode())
871 self._output_process.stdin.flush()
872 res = ''.join(self._output_channel.readline().decode()
873 for _ in range(line_count))
874 return res[:-len('\n')]
875
876 def _write_string(self, message, out=None, only_once=False):
877 if only_once:
878 if message in self._printed_messages:
879 return
880 self._printed_messages.add(message)
881 write_string(message, out=out, encoding=self.params.get('encoding'))
882
883 def to_stdout(self, message, skip_eol=False, quiet=None):
884 """Print message to stdout"""
885 if quiet is not None:
886 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
887 'Use "YoutubeDL.to_screen" instead')
888 if skip_eol is not False:
889 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
890 'Use "YoutubeDL.to_screen" instead')
891 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
892
893 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
894 """Print message to screen if not in quiet mode"""
895 if self.params.get('logger'):
896 self.params['logger'].debug(message)
897 return
898 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
899 return
900 self._write_string(
901 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
902 self._out_files.screen, only_once=only_once)
903
904 def to_stderr(self, message, only_once=False):
905 """Print message to stderr"""
906 assert isinstance(message, str)
907 if self.params.get('logger'):
908 self.params['logger'].error(message)
909 else:
910 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
911
912 def _send_console_code(self, code):
913 if compat_os_name == 'nt' or not self._out_files.console:
914 return
915 self._write_string(code, self._out_files.console)
916
917 def to_console_title(self, message):
918 if not self.params.get('consoletitle', False):
919 return
920 message = remove_terminal_sequences(message)
921 if compat_os_name == 'nt':
922 if ctypes.windll.kernel32.GetConsoleWindow():
923 # c_wchar_p() might not be necessary if `message` is
924 # already of type unicode()
925 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
926 else:
927 self._send_console_code(f'\033]0;{message}\007')
928
929 def save_console_title(self):
930 if not self.params.get('consoletitle') or self.params.get('simulate'):
931 return
932 self._send_console_code('\033[22;0t') # Save the title on stack
933
934 def restore_console_title(self):
935 if not self.params.get('consoletitle') or self.params.get('simulate'):
936 return
937 self._send_console_code('\033[23;0t') # Restore the title from stack
938
939 def __enter__(self):
940 self.save_console_title()
941 return self
942
943 def save_cookies(self):
944 if self.params.get('cookiefile') is not None:
945 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
946
947 def __exit__(self, *args):
948 self.restore_console_title()
949 self.save_cookies()
950
951 def trouble(self, message=None, tb=None, is_error=True):
952 """Determine action to take when a download problem appears.
953
954 Depending on if the downloader has been configured to ignore
955 download errors or not, this method may throw an exception or
956 not when errors are found, after printing the message.
957
958 @param tb If given, is additional traceback information
959 @param is_error Whether to raise error according to ignorerrors
960 """
961 if message is not None:
962 self.to_stderr(message)
963 if self.params.get('verbose'):
964 if tb is None:
965 if sys.exc_info()[0]: # if .trouble has been called from an except block
966 tb = ''
967 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
968 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
969 tb += encode_compat_str(traceback.format_exc())
970 else:
971 tb_data = traceback.format_list(traceback.extract_stack())
972 tb = ''.join(tb_data)
973 if tb:
974 self.to_stderr(tb)
975 if not is_error:
976 return
977 if not self.params.get('ignoreerrors'):
978 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
979 exc_info = sys.exc_info()[1].exc_info
980 else:
981 exc_info = sys.exc_info()
982 raise DownloadError(message, exc_info)
983 self._download_retcode = 1
984
985 Styles = Namespace(
986 HEADERS='yellow',
987 EMPHASIS='light blue',
988 FILENAME='green',
989 ID='green',
990 DELIM='blue',
991 ERROR='red',
992 BAD_FORMAT='light red',
993 WARNING='yellow',
994 SUPPRESS='light black',
995 )
996
997 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
998 text = str(text)
999 if test_encoding:
1000 original_text = text
1001 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1002 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1003 text = text.encode(encoding, 'ignore').decode(encoding)
1004 if fallback is not None and text != original_text:
1005 text = fallback
1006 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1007
1008 def _format_out(self, *args, **kwargs):
1009 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1010
1011 def _format_screen(self, *args, **kwargs):
1012 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1013
1014 def _format_err(self, *args, **kwargs):
1015 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1016
1017 def report_warning(self, message, only_once=False):
1018 '''
1019 Print the message to stderr, it will be prefixed with 'WARNING:'
1020 If stderr is a tty file the 'WARNING:' will be colored
1021 '''
1022 if self.params.get('logger') is not None:
1023 self.params['logger'].warning(message)
1024 else:
1025 if self.params.get('no_warnings'):
1026 return
1027 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1028
1029 def deprecation_warning(self, message, *, stacklevel=0):
1030 deprecation_warning(
1031 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1032
1033 def deprecated_feature(self, message):
1034 if self.params.get('logger') is not None:
1035 self.params['logger'].warning(f'Deprecated Feature: {message}')
1036 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1037
1038 def report_error(self, message, *args, **kwargs):
1039 '''
1040 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1041 in red if stderr is a tty file.
1042 '''
1043 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1044
1045 def write_debug(self, message, only_once=False):
1046 '''Log debug message or Print message to stderr'''
1047 if not self.params.get('verbose', False):
1048 return
1049 message = f'[debug] {message}'
1050 if self.params.get('logger'):
1051 self.params['logger'].debug(message)
1052 else:
1053 self.to_stderr(message, only_once)
1054
1055 def report_file_already_downloaded(self, file_name):
1056 """Report file has already been fully downloaded."""
1057 try:
1058 self.to_screen('[download] %s has already been downloaded' % file_name)
1059 except UnicodeEncodeError:
1060 self.to_screen('[download] The file has already been downloaded')
1061
1062 def report_file_delete(self, file_name):
1063 """Report that existing file will be deleted."""
1064 try:
1065 self.to_screen('Deleting existing file %s' % file_name)
1066 except UnicodeEncodeError:
1067 self.to_screen('Deleting existing file')
1068
1069 def raise_no_formats(self, info, forced=False, *, msg=None):
1070 has_drm = info.get('_has_drm')
1071 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1072 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1073 if forced or not ignored:
1074 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1075 expected=has_drm or ignored or expected)
1076 else:
1077 self.report_warning(msg)
1078
1079 def parse_outtmpl(self):
1080 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1081 self._parse_outtmpl()
1082 return self.params['outtmpl']
1083
1084 def _parse_outtmpl(self):
1085 sanitize = IDENTITY
1086 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1087 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1088
1089 outtmpl = self.params.setdefault('outtmpl', {})
1090 if not isinstance(outtmpl, dict):
1091 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1092 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1093
1094 def get_output_path(self, dir_type='', filename=None):
1095 paths = self.params.get('paths', {})
1096 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1097 path = os.path.join(
1098 expand_path(paths.get('home', '').strip()),
1099 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1100 filename or '')
1101 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1102
1103 @staticmethod
1104 def _outtmpl_expandpath(outtmpl):
1105 # expand_path translates '%%' into '%' and '$$' into '$'
1106 # correspondingly that is not what we want since we need to keep
1107 # '%%' intact for template dict substitution step. Working around
1108 # with boundary-alike separator hack.
1109 sep = ''.join(random.choices(string.ascii_letters, k=32))
1110 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1111
1112 # outtmpl should be expand_path'ed before template dict substitution
1113 # because meta fields may contain env variables we don't want to
1114 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1115 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1116 return expand_path(outtmpl).replace(sep, '')
1117
1118 @staticmethod
1119 def escape_outtmpl(outtmpl):
1120 ''' Escape any remaining strings like %s, %abc% etc. '''
1121 return re.sub(
1122 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1123 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1124 outtmpl)
1125
1126 @classmethod
1127 def validate_outtmpl(cls, outtmpl):
1128 ''' @return None or Exception object '''
1129 outtmpl = re.sub(
1130 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1131 lambda mobj: f'{mobj.group(0)[:-1]}s',
1132 cls._outtmpl_expandpath(outtmpl))
1133 try:
1134 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1135 return None
1136 except ValueError as err:
1137 return err
1138
1139 @staticmethod
1140 def _copy_infodict(info_dict):
1141 info_dict = dict(info_dict)
1142 info_dict.pop('__postprocessors', None)
1143 info_dict.pop('__pending_error', None)
1144 return info_dict
1145
1146 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1147 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1148 @param sanitize Whether to sanitize the output as a filename.
1149 For backward compatibility, a function can also be passed
1150 """
1151
1152 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1153
1154 info_dict = self._copy_infodict(info_dict)
1155 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1156 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1157 if info_dict.get('duration', None) is not None
1158 else None)
1159 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1160 info_dict['video_autonumber'] = self._num_videos
1161 if info_dict.get('resolution') is None:
1162 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1163
1164 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1165 # of %(field)s to %(field)0Nd for backward compatibility
1166 field_size_compat_map = {
1167 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1168 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1169 'autonumber': self.params.get('autonumber_size') or 5,
1170 }
1171
1172 TMPL_DICT = {}
1173 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1174 MATH_FUNCTIONS = {
1175 '+': float.__add__,
1176 '-': float.__sub__,
1177 }
1178 # Field is of the form key1.key2...
1179 # where keys (except first) can be string, int, slice or "{field, ...}"
1180 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1181 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1182 'inner': FIELD_INNER_RE,
1183 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1184 }
1185 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1186 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1187 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1188 (?P<negate>-)?
1189 (?P<fields>{FIELD_RE})
1190 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1191 (?:>(?P<strf_format>.+?))?
1192 (?P<remaining>
1193 (?P<alternate>(?<!\\),[^|&)]+)?
1194 (?:&(?P<replacement>.*?))?
1195 (?:\|(?P<default>.*?))?
1196 )$''')
1197
1198 def _traverse_infodict(fields):
1199 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1200 for f in ([x] if x.startswith('{') else x.split('.'))]
1201 for i in (0, -1):
1202 if fields and not fields[i]:
1203 fields.pop(i)
1204
1205 for i, f in enumerate(fields):
1206 if not f.startswith('{'):
1207 continue
1208 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1209 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1210
1211 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1212
1213 def get_value(mdict):
1214 # Object traversal
1215 value = _traverse_infodict(mdict['fields'])
1216 # Negative
1217 if mdict['negate']:
1218 value = float_or_none(value)
1219 if value is not None:
1220 value *= -1
1221 # Do maths
1222 offset_key = mdict['maths']
1223 if offset_key:
1224 value = float_or_none(value)
1225 operator = None
1226 while offset_key:
1227 item = re.match(
1228 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1229 offset_key).group(0)
1230 offset_key = offset_key[len(item):]
1231 if operator is None:
1232 operator = MATH_FUNCTIONS[item]
1233 continue
1234 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1235 offset = float_or_none(item)
1236 if offset is None:
1237 offset = float_or_none(_traverse_infodict(item))
1238 try:
1239 value = operator(value, multiplier * offset)
1240 except (TypeError, ZeroDivisionError):
1241 return None
1242 operator = None
1243 # Datetime formatting
1244 if mdict['strf_format']:
1245 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1246
1247 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1248 if sanitize and value == '':
1249 value = None
1250 return value
1251
1252 na = self.params.get('outtmpl_na_placeholder', 'NA')
1253
1254 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1255 return sanitize_filename(str(value), restricted=restricted, is_id=(
1256 bool(re.search(r'(^|[_.])id(\.|$)', key))
1257 if 'filename-sanitization' in self.params['compat_opts']
1258 else NO_DEFAULT))
1259
1260 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1261 sanitize = bool(sanitize)
1262
1263 def _dumpjson_default(obj):
1264 if isinstance(obj, (set, LazyList)):
1265 return list(obj)
1266 return repr(obj)
1267
1268 class _ReplacementFormatter(string.Formatter):
1269 def get_field(self, field_name, args, kwargs):
1270 if field_name.isdigit():
1271 return args[0], -1
1272 raise ValueError('Unsupported field')
1273
1274 replacement_formatter = _ReplacementFormatter()
1275
1276 def create_key(outer_mobj):
1277 if not outer_mobj.group('has_key'):
1278 return outer_mobj.group(0)
1279 key = outer_mobj.group('key')
1280 mobj = re.match(INTERNAL_FORMAT_RE, key)
1281 value, replacement, default, last_field = None, None, na, ''
1282 while mobj:
1283 mobj = mobj.groupdict()
1284 default = mobj['default'] if mobj['default'] is not None else default
1285 value = get_value(mobj)
1286 last_field, replacement = mobj['fields'], mobj['replacement']
1287 if value is None and mobj['alternate']:
1288 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1289 else:
1290 break
1291
1292 fmt = outer_mobj.group('format')
1293 if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1294 fmt = f'0{field_size_compat_map[last_field]:d}d'
1295
1296 if None not in (value, replacement):
1297 try:
1298 value = replacement_formatter.format(replacement, value)
1299 except ValueError:
1300 value, default = None, na
1301
1302 flags = outer_mobj.group('conversion') or ''
1303 str_fmt = f'{fmt[:-1]}s'
1304 if value is None:
1305 value, fmt = default, 's'
1306 elif fmt[-1] == 'l': # list
1307 delim = '\n' if '#' in flags else ', '
1308 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1309 elif fmt[-1] == 'j': # json
1310 value, fmt = json.dumps(
1311 value, default=_dumpjson_default,
1312 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1313 elif fmt[-1] == 'h': # html
1314 value, fmt = escapeHTML(str(value)), str_fmt
1315 elif fmt[-1] == 'q': # quoted
1316 value = map(str, variadic(value) if '#' in flags else [value])
1317 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1318 elif fmt[-1] == 'B': # bytes
1319 value = f'%{str_fmt}'.encode() % str(value).encode()
1320 value, fmt = value.decode('utf-8', 'ignore'), 's'
1321 elif fmt[-1] == 'U': # unicode normalized
1322 value, fmt = unicodedata.normalize(
1323 # "+" = compatibility equivalence, "#" = NFD
1324 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1325 value), str_fmt
1326 elif fmt[-1] == 'D': # decimal suffix
1327 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1328 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1329 factor=1024 if '#' in flags else 1000)
1330 elif fmt[-1] == 'S': # filename sanitization
1331 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1332 elif fmt[-1] == 'c':
1333 if value:
1334 value = str(value)[0]
1335 else:
1336 fmt = str_fmt
1337 elif fmt[-1] not in 'rsa': # numeric
1338 value = float_or_none(value)
1339 if value is None:
1340 value, fmt = default, 's'
1341
1342 if sanitize:
1343 # If value is an object, sanitize might convert it to a string
1344 # So we convert it to repr first
1345 if fmt[-1] == 'r':
1346 value, fmt = repr(value), str_fmt
1347 elif fmt[-1] == 'a':
1348 value, fmt = ascii(value), str_fmt
1349 if fmt[-1] in 'csra':
1350 value = sanitizer(last_field, value)
1351
1352 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1353 TMPL_DICT[key] = value
1354 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1355
1356 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1357
1358 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1359 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1360 return self.escape_outtmpl(outtmpl) % info_dict
1361
1362 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1363 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1364 if outtmpl is None:
1365 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1366 try:
1367 outtmpl = self._outtmpl_expandpath(outtmpl)
1368 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1369 if not filename:
1370 return None
1371
1372 if tmpl_type in ('', 'temp'):
1373 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1374 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1375 filename = replace_extension(filename, ext, final_ext)
1376 elif tmpl_type:
1377 force_ext = OUTTMPL_TYPES[tmpl_type]
1378 if force_ext:
1379 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1380
1381 # https://github.com/blackjack4494/youtube-dlc/issues/85
1382 trim_file_name = self.params.get('trim_file_name', False)
1383 if trim_file_name:
1384 no_ext, *ext = filename.rsplit('.', 2)
1385 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1386
1387 return filename
1388 except ValueError as err:
1389 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1390 return None
1391
1392 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1393 """Generate the output filename"""
1394 if outtmpl:
1395 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1396 dir_type = None
1397 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1398 if not filename and dir_type not in ('', 'temp'):
1399 return ''
1400
1401 if warn:
1402 if not self.params.get('paths'):
1403 pass
1404 elif filename == '-':
1405 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1406 elif os.path.isabs(filename):
1407 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1408 if filename == '-' or not filename:
1409 return filename
1410
1411 return self.get_output_path(dir_type, filename)
1412
1413 def _match_entry(self, info_dict, incomplete=False, silent=False):
1414 """Returns None if the file should be downloaded"""
1415 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1416 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1417
1418 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1419
1420 def check_filter():
1421 if _type in ('playlist', 'multi_video'):
1422 return
1423 elif _type in ('url', 'url_transparent') and not try_call(
1424 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1425 return
1426
1427 if 'title' in info_dict:
1428 # This can happen when we're just evaluating the playlist
1429 title = info_dict['title']
1430 matchtitle = self.params.get('matchtitle', False)
1431 if matchtitle:
1432 if not re.search(matchtitle, title, re.IGNORECASE):
1433 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1434 rejecttitle = self.params.get('rejecttitle', False)
1435 if rejecttitle:
1436 if re.search(rejecttitle, title, re.IGNORECASE):
1437 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1438
1439 date = info_dict.get('upload_date')
1440 if date is not None:
1441 dateRange = self.params.get('daterange', DateRange())
1442 if date not in dateRange:
1443 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1444 view_count = info_dict.get('view_count')
1445 if view_count is not None:
1446 min_views = self.params.get('min_views')
1447 if min_views is not None and view_count < min_views:
1448 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1449 max_views = self.params.get('max_views')
1450 if max_views is not None and view_count > max_views:
1451 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1452 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1453 return 'Skipping "%s" because it is age restricted' % video_title
1454
1455 match_filter = self.params.get('match_filter')
1456 if match_filter is None:
1457 return None
1458
1459 cancelled = None
1460 try:
1461 try:
1462 ret = match_filter(info_dict, incomplete=incomplete)
1463 except TypeError:
1464 # For backward compatibility
1465 ret = None if incomplete else match_filter(info_dict)
1466 except DownloadCancelled as err:
1467 if err.msg is not NO_DEFAULT:
1468 raise
1469 ret, cancelled = err.msg, err
1470
1471 if ret is NO_DEFAULT:
1472 while True:
1473 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1474 reply = input(self._format_screen(
1475 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1476 if reply in {'y', ''}:
1477 return None
1478 elif reply == 'n':
1479 if cancelled:
1480 raise type(cancelled)(f'Skipping {video_title}')
1481 return f'Skipping {video_title}'
1482 return ret
1483
1484 if self.in_download_archive(info_dict):
1485 reason = '%s has already been recorded in the archive' % video_title
1486 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1487 else:
1488 try:
1489 reason = check_filter()
1490 except DownloadCancelled as e:
1491 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1492 else:
1493 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1494 if reason is not None:
1495 if not silent:
1496 self.to_screen('[download] ' + reason)
1497 if self.params.get(break_opt, False):
1498 raise break_err()
1499 return reason
1500
1501 @staticmethod
1502 def add_extra_info(info_dict, extra_info):
1503 '''Set the keys from extra_info in info dict if they are missing'''
1504 for key, value in extra_info.items():
1505 info_dict.setdefault(key, value)
1506
1507 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1508 process=True, force_generic_extractor=False):
1509 """
1510 Extract and return the information dictionary of the URL
1511
1512 Arguments:
1513 @param url URL to extract
1514
1515 Keyword arguments:
1516 @param download Whether to download videos
1517 @param process Whether to resolve all unresolved references (URLs, playlist items).
1518 Must be True for download to work
1519 @param ie_key Use only the extractor with this key
1520
1521 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1522 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1523 """
1524
1525 if extra_info is None:
1526 extra_info = {}
1527
1528 if not ie_key and force_generic_extractor:
1529 ie_key = 'Generic'
1530
1531 if ie_key:
1532 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1533 else:
1534 ies = self._ies
1535
1536 for key, ie in ies.items():
1537 if not ie.suitable(url):
1538 continue
1539
1540 if not ie.working():
1541 self.report_warning('The program functionality for this site has been marked as broken, '
1542 'and will probably not work.')
1543
1544 temp_id = ie.get_temp_id(url)
1545 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1546 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1547 if self.params.get('break_on_existing', False):
1548 raise ExistingVideoReached()
1549 break
1550 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1551 else:
1552 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1553 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1554 tb=False if extractors_restricted else None)
1555
1556 def _handle_extraction_exceptions(func):
1557 @functools.wraps(func)
1558 def wrapper(self, *args, **kwargs):
1559 while True:
1560 try:
1561 return func(self, *args, **kwargs)
1562 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1563 raise
1564 except ReExtractInfo as e:
1565 if e.expected:
1566 self.to_screen(f'{e}; Re-extracting data')
1567 else:
1568 self.to_stderr('\r')
1569 self.report_warning(f'{e}; Re-extracting data')
1570 continue
1571 except GeoRestrictedError as e:
1572 msg = e.msg
1573 if e.countries:
1574 msg += '\nThis video is available in %s.' % ', '.join(
1575 map(ISO3166Utils.short2full, e.countries))
1576 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1577 self.report_error(msg)
1578 except ExtractorError as e: # An error we somewhat expected
1579 self.report_error(str(e), e.format_traceback())
1580 except Exception as e:
1581 if self.params.get('ignoreerrors'):
1582 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1583 else:
1584 raise
1585 break
1586 return wrapper
1587
1588 def _wait_for_video(self, ie_result={}):
1589 if (not self.params.get('wait_for_video')
1590 or ie_result.get('_type', 'video') != 'video'
1591 or ie_result.get('formats') or ie_result.get('url')):
1592 return
1593
1594 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1595 last_msg = ''
1596
1597 def progress(msg):
1598 nonlocal last_msg
1599 full_msg = f'{msg}\n'
1600 if not self.params.get('noprogress'):
1601 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1602 elif last_msg:
1603 return
1604 self.to_screen(full_msg, skip_eol=True)
1605 last_msg = msg
1606
1607 min_wait, max_wait = self.params.get('wait_for_video')
1608 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1609 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1610 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1611 self.report_warning('Release time of video is not known')
1612 elif ie_result and (diff or 0) <= 0:
1613 self.report_warning('Video should already be available according to extracted info')
1614 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1615 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1616
1617 wait_till = time.time() + diff
1618 try:
1619 while True:
1620 diff = wait_till - time.time()
1621 if diff <= 0:
1622 progress('')
1623 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1624 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1625 time.sleep(1)
1626 except KeyboardInterrupt:
1627 progress('')
1628 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1629 except BaseException as e:
1630 if not isinstance(e, ReExtractInfo):
1631 self.to_screen('')
1632 raise
1633
1634 def _load_cookies(self, data, *, from_headers=True):
1635 """Loads cookies from a `Cookie` header
1636
1637 This tries to work around the security vulnerability of passing cookies to every domain.
1638 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1639 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1640
1641 @param data The Cookie header as string to load the cookies from
1642 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1643 """
1644 for cookie in LenientSimpleCookie(data).values():
1645 if from_headers and any(cookie.values()):
1646 raise ValueError('Invalid syntax in Cookie Header')
1647
1648 domain = cookie.get('domain') or ''
1649 expiry = cookie.get('expires')
1650 if expiry == '': # 0 is valid
1651 expiry = None
1652 prepared_cookie = http.cookiejar.Cookie(
1653 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1654 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1655 cookie.get('secure') or False, expiry, False, None, None, {})
1656
1657 if domain:
1658 self.cookiejar.set_cookie(prepared_cookie)
1659 elif from_headers:
1660 self.deprecated_feature(
1661 'Passing cookies as a header is a potential security risk; '
1662 'they will be scoped to the domain of the downloaded urls. '
1663 'Please consider loading cookies from a file or browser instead.')
1664 self.__header_cookies.append(prepared_cookie)
1665 else:
1666 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1667 tb=False, is_error=False)
1668
1669 def _apply_header_cookies(self, url):
1670 """Applies stray header cookies to the provided url
1671
1672 This loads header cookies and scopes them to the domain provided in `url`.
1673 While this is not ideal, it helps reduce the risk of them being sent
1674 to an unintended destination while mostly maintaining compatibility.
1675 """
1676 parsed = urllib.parse.urlparse(url)
1677 if not parsed.hostname:
1678 return
1679
1680 for cookie in map(copy.copy, self.__header_cookies):
1681 cookie.domain = f'.{parsed.hostname}'
1682 self.cookiejar.set_cookie(cookie)
1683
1684 @_handle_extraction_exceptions
1685 def __extract_info(self, url, ie, download, extra_info, process):
1686 self._apply_header_cookies(url)
1687
1688 try:
1689 ie_result = ie.extract(url)
1690 except UserNotLive as e:
1691 if process:
1692 if self.params.get('wait_for_video'):
1693 self.report_warning(e)
1694 self._wait_for_video()
1695 raise
1696 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1697 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1698 return
1699 if isinstance(ie_result, list):
1700 # Backwards compatibility: old IE result format
1701 ie_result = {
1702 '_type': 'compat_list',
1703 'entries': ie_result,
1704 }
1705 if extra_info.get('original_url'):
1706 ie_result.setdefault('original_url', extra_info['original_url'])
1707 self.add_default_extra_info(ie_result, ie, url)
1708 if process:
1709 self._wait_for_video(ie_result)
1710 return self.process_ie_result(ie_result, download, extra_info)
1711 else:
1712 return ie_result
1713
1714 def add_default_extra_info(self, ie_result, ie, url):
1715 if url is not None:
1716 self.add_extra_info(ie_result, {
1717 'webpage_url': url,
1718 'original_url': url,
1719 })
1720 webpage_url = ie_result.get('webpage_url')
1721 if webpage_url:
1722 self.add_extra_info(ie_result, {
1723 'webpage_url_basename': url_basename(webpage_url),
1724 'webpage_url_domain': get_domain(webpage_url),
1725 })
1726 if ie is not None:
1727 self.add_extra_info(ie_result, {
1728 'extractor': ie.IE_NAME,
1729 'extractor_key': ie.ie_key(),
1730 })
1731
1732 def process_ie_result(self, ie_result, download=True, extra_info=None):
1733 """
1734 Take the result of the ie(may be modified) and resolve all unresolved
1735 references (URLs, playlist items).
1736
1737 It will also download the videos if 'download'.
1738 Returns the resolved ie_result.
1739 """
1740 if extra_info is None:
1741 extra_info = {}
1742 result_type = ie_result.get('_type', 'video')
1743
1744 if result_type in ('url', 'url_transparent'):
1745 ie_result['url'] = sanitize_url(
1746 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1747 if ie_result.get('original_url') and not extra_info.get('original_url'):
1748 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1749
1750 extract_flat = self.params.get('extract_flat', False)
1751 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1752 or extract_flat is True):
1753 info_copy = ie_result.copy()
1754 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1755 if ie and not ie_result.get('id'):
1756 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1757 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1758 self.add_extra_info(info_copy, extra_info)
1759 info_copy, _ = self.pre_process(info_copy)
1760 self._fill_common_fields(info_copy, False)
1761 self.__forced_printings(info_copy)
1762 self._raise_pending_errors(info_copy)
1763 if self.params.get('force_write_download_archive', False):
1764 self.record_download_archive(info_copy)
1765 return ie_result
1766
1767 if result_type == 'video':
1768 self.add_extra_info(ie_result, extra_info)
1769 ie_result = self.process_video_result(ie_result, download=download)
1770 self._raise_pending_errors(ie_result)
1771 additional_urls = (ie_result or {}).get('additional_urls')
1772 if additional_urls:
1773 # TODO: Improve MetadataParserPP to allow setting a list
1774 if isinstance(additional_urls, str):
1775 additional_urls = [additional_urls]
1776 self.to_screen(
1777 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1778 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1779 ie_result['additional_entries'] = [
1780 self.extract_info(
1781 url, download, extra_info=extra_info,
1782 force_generic_extractor=self.params.get('force_generic_extractor'))
1783 for url in additional_urls
1784 ]
1785 return ie_result
1786 elif result_type == 'url':
1787 # We have to add extra_info to the results because it may be
1788 # contained in a playlist
1789 return self.extract_info(
1790 ie_result['url'], download,
1791 ie_key=ie_result.get('ie_key'),
1792 extra_info=extra_info)
1793 elif result_type == 'url_transparent':
1794 # Use the information from the embedding page
1795 info = self.extract_info(
1796 ie_result['url'], ie_key=ie_result.get('ie_key'),
1797 extra_info=extra_info, download=False, process=False)
1798
1799 # extract_info may return None when ignoreerrors is enabled and
1800 # extraction failed with an error, don't crash and return early
1801 # in this case
1802 if not info:
1803 return info
1804
1805 exempted_fields = {'_type', 'url', 'ie_key'}
1806 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1807 # For video clips, the id etc of the clip extractor should be used
1808 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1809
1810 new_result = info.copy()
1811 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1812
1813 # Extracted info may not be a video result (i.e.
1814 # info.get('_type', 'video') != video) but rather an url or
1815 # url_transparent. In such cases outer metadata (from ie_result)
1816 # should be propagated to inner one (info). For this to happen
1817 # _type of info should be overridden with url_transparent. This
1818 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1819 if new_result.get('_type') == 'url':
1820 new_result['_type'] = 'url_transparent'
1821
1822 return self.process_ie_result(
1823 new_result, download=download, extra_info=extra_info)
1824 elif result_type in ('playlist', 'multi_video'):
1825 # Protect from infinite recursion due to recursively nested playlists
1826 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1827 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1828 if webpage_url and webpage_url in self._playlist_urls:
1829 self.to_screen(
1830 '[download] Skipping already downloaded playlist: %s'
1831 % ie_result.get('title') or ie_result.get('id'))
1832 return
1833
1834 self._playlist_level += 1
1835 self._playlist_urls.add(webpage_url)
1836 self._fill_common_fields(ie_result, False)
1837 self._sanitize_thumbnails(ie_result)
1838 try:
1839 return self.__process_playlist(ie_result, download)
1840 finally:
1841 self._playlist_level -= 1
1842 if not self._playlist_level:
1843 self._playlist_urls.clear()
1844 elif result_type == 'compat_list':
1845 self.report_warning(
1846 'Extractor %s returned a compat_list result. '
1847 'It needs to be updated.' % ie_result.get('extractor'))
1848
1849 def _fixup(r):
1850 self.add_extra_info(r, {
1851 'extractor': ie_result['extractor'],
1852 'webpage_url': ie_result['webpage_url'],
1853 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1854 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1855 'extractor_key': ie_result['extractor_key'],
1856 })
1857 return r
1858 ie_result['entries'] = [
1859 self.process_ie_result(_fixup(r), download, extra_info)
1860 for r in ie_result['entries']
1861 ]
1862 return ie_result
1863 else:
1864 raise Exception('Invalid result type: %s' % result_type)
1865
1866 def _ensure_dir_exists(self, path):
1867 return make_dir(path, self.report_error)
1868
1869 @staticmethod
1870 def _playlist_infodict(ie_result, strict=False, **kwargs):
1871 info = {
1872 'playlist_count': ie_result.get('playlist_count'),
1873 'playlist': ie_result.get('title') or ie_result.get('id'),
1874 'playlist_id': ie_result.get('id'),
1875 'playlist_title': ie_result.get('title'),
1876 'playlist_uploader': ie_result.get('uploader'),
1877 'playlist_uploader_id': ie_result.get('uploader_id'),
1878 **kwargs,
1879 }
1880 if strict:
1881 return info
1882 if ie_result.get('webpage_url'):
1883 info.update({
1884 'webpage_url': ie_result['webpage_url'],
1885 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1886 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1887 })
1888 return {
1889 **info,
1890 'playlist_index': 0,
1891 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1892 'extractor': ie_result['extractor'],
1893 'extractor_key': ie_result['extractor_key'],
1894 }
1895
1896 def __process_playlist(self, ie_result, download):
1897 """Process each entry in the playlist"""
1898 assert ie_result['_type'] in ('playlist', 'multi_video')
1899
1900 common_info = self._playlist_infodict(ie_result, strict=True)
1901 title = common_info.get('playlist') or '<Untitled>'
1902 if self._match_entry(common_info, incomplete=True) is not None:
1903 return
1904 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1905
1906 all_entries = PlaylistEntries(self, ie_result)
1907 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1908
1909 lazy = self.params.get('lazy_playlist')
1910 if lazy:
1911 resolved_entries, n_entries = [], 'N/A'
1912 ie_result['requested_entries'], ie_result['entries'] = None, None
1913 else:
1914 entries = resolved_entries = list(entries)
1915 n_entries = len(resolved_entries)
1916 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1917 if not ie_result.get('playlist_count'):
1918 # Better to do this after potentially exhausting entries
1919 ie_result['playlist_count'] = all_entries.get_full_count()
1920
1921 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1922 ie_copy = collections.ChainMap(ie_result, extra)
1923
1924 _infojson_written = False
1925 write_playlist_files = self.params.get('allow_playlist_files', True)
1926 if write_playlist_files and self.params.get('list_thumbnails'):
1927 self.list_thumbnails(ie_result)
1928 if write_playlist_files and not self.params.get('simulate'):
1929 _infojson_written = self._write_info_json(
1930 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1931 if _infojson_written is None:
1932 return
1933 if self._write_description('playlist', ie_result,
1934 self.prepare_filename(ie_copy, 'pl_description')) is None:
1935 return
1936 # TODO: This should be passed to ThumbnailsConvertor if necessary
1937 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1938
1939 if lazy:
1940 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1941 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1942 elif self.params.get('playlistreverse'):
1943 entries.reverse()
1944 elif self.params.get('playlistrandom'):
1945 random.shuffle(entries)
1946
1947 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1948 f'{format_field(ie_result, "playlist_count", " of %s")}')
1949
1950 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1951 if self.params.get('extract_flat') == 'discard_in_playlist':
1952 keep_resolved_entries = ie_result['_type'] != 'playlist'
1953 if keep_resolved_entries:
1954 self.write_debug('The information of all playlist entries will be held in memory')
1955
1956 failures = 0
1957 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1958 for i, (playlist_index, entry) in enumerate(entries):
1959 if lazy:
1960 resolved_entries.append((playlist_index, entry))
1961 if not entry:
1962 continue
1963
1964 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1965 if not lazy and 'playlist-index' in self.params['compat_opts']:
1966 playlist_index = ie_result['requested_entries'][i]
1967
1968 entry_copy = collections.ChainMap(entry, {
1969 **common_info,
1970 'n_entries': int_or_none(n_entries),
1971 'playlist_index': playlist_index,
1972 'playlist_autonumber': i + 1,
1973 })
1974
1975 if self._match_entry(entry_copy, incomplete=True) is not None:
1976 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1977 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1978 continue
1979
1980 self.to_screen('[download] Downloading item %s of %s' % (
1981 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1982
1983 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1984 'playlist_index': playlist_index,
1985 'playlist_autonumber': i + 1,
1986 }, extra))
1987 if not entry_result:
1988 failures += 1
1989 if failures >= max_failures:
1990 self.report_error(
1991 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1992 break
1993 if keep_resolved_entries:
1994 resolved_entries[i] = (playlist_index, entry_result)
1995
1996 # Update with processed data
1997 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1998 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1999 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2000 # Do not set for full playlist
2001 ie_result.pop('requested_entries')
2002
2003 # Write the updated info to json
2004 if _infojson_written is True and self._write_info_json(
2005 'updated playlist', ie_result,
2006 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2007 return
2008
2009 ie_result = self.run_all_pps('playlist', ie_result)
2010 self.to_screen(f'[download] Finished downloading playlist: {title}')
2011 return ie_result
2012
2013 @_handle_extraction_exceptions
2014 def __process_iterable_entry(self, entry, download, extra_info):
2015 return self.process_ie_result(
2016 entry, download=download, extra_info=extra_info)
2017
2018 def _build_format_filter(self, filter_spec):
2019 " Returns a function to filter the formats according to the filter_spec "
2020
2021 OPERATORS = {
2022 '<': operator.lt,
2023 '<=': operator.le,
2024 '>': operator.gt,
2025 '>=': operator.ge,
2026 '=': operator.eq,
2027 '!=': operator.ne,
2028 }
2029 operator_rex = re.compile(r'''(?x)\s*
2030 (?P<key>[\w.-]+)\s*
2031 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2032 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2033 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2034 m = operator_rex.fullmatch(filter_spec)
2035 if m:
2036 try:
2037 comparison_value = int(m.group('value'))
2038 except ValueError:
2039 comparison_value = parse_filesize(m.group('value'))
2040 if comparison_value is None:
2041 comparison_value = parse_filesize(m.group('value') + 'B')
2042 if comparison_value is None:
2043 raise ValueError(
2044 'Invalid value %r in format specification %r' % (
2045 m.group('value'), filter_spec))
2046 op = OPERATORS[m.group('op')]
2047
2048 if not m:
2049 STR_OPERATORS = {
2050 '=': operator.eq,
2051 '^=': lambda attr, value: attr.startswith(value),
2052 '$=': lambda attr, value: attr.endswith(value),
2053 '*=': lambda attr, value: value in attr,
2054 '~=': lambda attr, value: value.search(attr) is not None
2055 }
2056 str_operator_rex = re.compile(r'''(?x)\s*
2057 (?P<key>[a-zA-Z0-9._-]+)\s*
2058 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2059 (?P<quote>["'])?
2060 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2061 (?(quote)(?P=quote))\s*
2062 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2063 m = str_operator_rex.fullmatch(filter_spec)
2064 if m:
2065 if m.group('op') == '~=':
2066 comparison_value = re.compile(m.group('value'))
2067 else:
2068 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2069 str_op = STR_OPERATORS[m.group('op')]
2070 if m.group('negation'):
2071 op = lambda attr, value: not str_op(attr, value)
2072 else:
2073 op = str_op
2074
2075 if not m:
2076 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2077
2078 def _filter(f):
2079 actual_value = f.get(m.group('key'))
2080 if actual_value is None:
2081 return m.group('none_inclusive')
2082 return op(actual_value, comparison_value)
2083 return _filter
2084
2085 def _check_formats(self, formats):
2086 for f in formats:
2087 self.to_screen('[info] Testing format %s' % f['format_id'])
2088 path = self.get_output_path('temp')
2089 if not self._ensure_dir_exists(f'{path}/'):
2090 continue
2091 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2092 temp_file.close()
2093 try:
2094 success, _ = self.dl(temp_file.name, f, test=True)
2095 except (DownloadError, OSError, ValueError) + network_exceptions:
2096 success = False
2097 finally:
2098 if os.path.exists(temp_file.name):
2099 try:
2100 os.remove(temp_file.name)
2101 except OSError:
2102 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2103 if success:
2104 yield f
2105 else:
2106 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2107
2108 def _default_format_spec(self, info_dict, download=True):
2109
2110 def can_merge():
2111 merger = FFmpegMergerPP(self)
2112 return merger.available and merger.can_merge()
2113
2114 prefer_best = (
2115 not self.params.get('simulate')
2116 and download
2117 and (
2118 not can_merge()
2119 or info_dict.get('is_live') and not self.params.get('live_from_start')
2120 or self.params['outtmpl']['default'] == '-'))
2121 compat = (
2122 prefer_best
2123 or self.params.get('allow_multiple_audio_streams', False)
2124 or 'format-spec' in self.params['compat_opts'])
2125
2126 return (
2127 'best/bestvideo+bestaudio' if prefer_best
2128 else 'bestvideo*+bestaudio/best' if not compat
2129 else 'bestvideo+bestaudio/best')
2130
2131 def build_format_selector(self, format_spec):
2132 def syntax_error(note, start):
2133 message = (
2134 'Invalid format specification: '
2135 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2136 return SyntaxError(message)
2137
2138 PICKFIRST = 'PICKFIRST'
2139 MERGE = 'MERGE'
2140 SINGLE = 'SINGLE'
2141 GROUP = 'GROUP'
2142 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2143
2144 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2145 'video': self.params.get('allow_multiple_video_streams', False)}
2146
2147 def _parse_filter(tokens):
2148 filter_parts = []
2149 for type, string_, start, _, _ in tokens:
2150 if type == tokenize.OP and string_ == ']':
2151 return ''.join(filter_parts)
2152 else:
2153 filter_parts.append(string_)
2154
2155 def _remove_unused_ops(tokens):
2156 # Remove operators that we don't use and join them with the surrounding strings.
2157 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2158 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2159 last_string, last_start, last_end, last_line = None, None, None, None
2160 for type, string_, start, end, line in tokens:
2161 if type == tokenize.OP and string_ == '[':
2162 if last_string:
2163 yield tokenize.NAME, last_string, last_start, last_end, last_line
2164 last_string = None
2165 yield type, string_, start, end, line
2166 # everything inside brackets will be handled by _parse_filter
2167 for type, string_, start, end, line in tokens:
2168 yield type, string_, start, end, line
2169 if type == tokenize.OP and string_ == ']':
2170 break
2171 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2172 if last_string:
2173 yield tokenize.NAME, last_string, last_start, last_end, last_line
2174 last_string = None
2175 yield type, string_, start, end, line
2176 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2177 if not last_string:
2178 last_string = string_
2179 last_start = start
2180 last_end = end
2181 else:
2182 last_string += string_
2183 if last_string:
2184 yield tokenize.NAME, last_string, last_start, last_end, last_line
2185
2186 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2187 selectors = []
2188 current_selector = None
2189 for type, string_, start, _, _ in tokens:
2190 # ENCODING is only defined in python 3.x
2191 if type == getattr(tokenize, 'ENCODING', None):
2192 continue
2193 elif type in [tokenize.NAME, tokenize.NUMBER]:
2194 current_selector = FormatSelector(SINGLE, string_, [])
2195 elif type == tokenize.OP:
2196 if string_ == ')':
2197 if not inside_group:
2198 # ')' will be handled by the parentheses group
2199 tokens.restore_last_token()
2200 break
2201 elif inside_merge and string_ in ['/', ',']:
2202 tokens.restore_last_token()
2203 break
2204 elif inside_choice and string_ == ',':
2205 tokens.restore_last_token()
2206 break
2207 elif string_ == ',':
2208 if not current_selector:
2209 raise syntax_error('"," must follow a format selector', start)
2210 selectors.append(current_selector)
2211 current_selector = None
2212 elif string_ == '/':
2213 if not current_selector:
2214 raise syntax_error('"/" must follow a format selector', start)
2215 first_choice = current_selector
2216 second_choice = _parse_format_selection(tokens, inside_choice=True)
2217 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2218 elif string_ == '[':
2219 if not current_selector:
2220 current_selector = FormatSelector(SINGLE, 'best', [])
2221 format_filter = _parse_filter(tokens)
2222 current_selector.filters.append(format_filter)
2223 elif string_ == '(':
2224 if current_selector:
2225 raise syntax_error('Unexpected "("', start)
2226 group = _parse_format_selection(tokens, inside_group=True)
2227 current_selector = FormatSelector(GROUP, group, [])
2228 elif string_ == '+':
2229 if not current_selector:
2230 raise syntax_error('Unexpected "+"', start)
2231 selector_1 = current_selector
2232 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2233 if not selector_2:
2234 raise syntax_error('Expected a selector', start)
2235 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2236 else:
2237 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2238 elif type == tokenize.ENDMARKER:
2239 break
2240 if current_selector:
2241 selectors.append(current_selector)
2242 return selectors
2243
2244 def _merge(formats_pair):
2245 format_1, format_2 = formats_pair
2246
2247 formats_info = []
2248 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2249 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2250
2251 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2252 get_no_more = {'video': False, 'audio': False}
2253 for (i, fmt_info) in enumerate(formats_info):
2254 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2255 formats_info.pop(i)
2256 continue
2257 for aud_vid in ['audio', 'video']:
2258 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2259 if get_no_more[aud_vid]:
2260 formats_info.pop(i)
2261 break
2262 get_no_more[aud_vid] = True
2263
2264 if len(formats_info) == 1:
2265 return formats_info[0]
2266
2267 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2268 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2269
2270 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2271 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2272
2273 output_ext = get_compatible_ext(
2274 vcodecs=[f.get('vcodec') for f in video_fmts],
2275 acodecs=[f.get('acodec') for f in audio_fmts],
2276 vexts=[f['ext'] for f in video_fmts],
2277 aexts=[f['ext'] for f in audio_fmts],
2278 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2279 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2280
2281 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2282
2283 new_dict = {
2284 'requested_formats': formats_info,
2285 'format': '+'.join(filtered('format')),
2286 'format_id': '+'.join(filtered('format_id')),
2287 'ext': output_ext,
2288 'protocol': '+'.join(map(determine_protocol, formats_info)),
2289 'language': '+'.join(orderedSet(filtered('language'))) or None,
2290 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2291 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2292 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2293 }
2294
2295 if the_only_video:
2296 new_dict.update({
2297 'width': the_only_video.get('width'),
2298 'height': the_only_video.get('height'),
2299 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2300 'fps': the_only_video.get('fps'),
2301 'dynamic_range': the_only_video.get('dynamic_range'),
2302 'vcodec': the_only_video.get('vcodec'),
2303 'vbr': the_only_video.get('vbr'),
2304 'stretched_ratio': the_only_video.get('stretched_ratio'),
2305 'aspect_ratio': the_only_video.get('aspect_ratio'),
2306 })
2307
2308 if the_only_audio:
2309 new_dict.update({
2310 'acodec': the_only_audio.get('acodec'),
2311 'abr': the_only_audio.get('abr'),
2312 'asr': the_only_audio.get('asr'),
2313 'audio_channels': the_only_audio.get('audio_channels')
2314 })
2315
2316 return new_dict
2317
2318 def _check_formats(formats):
2319 if (self.params.get('check_formats') is not None
2320 or self.params.get('allow_unplayable_formats')):
2321 yield from formats
2322 return
2323 elif self.params.get('check_formats') == 'selected':
2324 yield from self._check_formats(formats)
2325 return
2326
2327 for f in formats:
2328 if f.get('has_drm'):
2329 yield from self._check_formats([f])
2330 else:
2331 yield f
2332
2333 def _build_selector_function(selector):
2334 if isinstance(selector, list): # ,
2335 fs = [_build_selector_function(s) for s in selector]
2336
2337 def selector_function(ctx):
2338 for f in fs:
2339 yield from f(ctx)
2340 return selector_function
2341
2342 elif selector.type == GROUP: # ()
2343 selector_function = _build_selector_function(selector.selector)
2344
2345 elif selector.type == PICKFIRST: # /
2346 fs = [_build_selector_function(s) for s in selector.selector]
2347
2348 def selector_function(ctx):
2349 for f in fs:
2350 picked_formats = list(f(ctx))
2351 if picked_formats:
2352 return picked_formats
2353 return []
2354
2355 elif selector.type == MERGE: # +
2356 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2357
2358 def selector_function(ctx):
2359 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2360 yield _merge(pair)
2361
2362 elif selector.type == SINGLE: # atom
2363 format_spec = selector.selector or 'best'
2364
2365 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2366 if format_spec == 'all':
2367 def selector_function(ctx):
2368 yield from _check_formats(ctx['formats'][::-1])
2369 elif format_spec == 'mergeall':
2370 def selector_function(ctx):
2371 formats = list(_check_formats(
2372 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2373 if not formats:
2374 return
2375 merged_format = formats[-1]
2376 for f in formats[-2::-1]:
2377 merged_format = _merge((merged_format, f))
2378 yield merged_format
2379
2380 else:
2381 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2382 mobj = re.match(
2383 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2384 format_spec)
2385 if mobj is not None:
2386 format_idx = int_or_none(mobj.group('n'), default=1)
2387 format_reverse = mobj.group('bw')[0] == 'b'
2388 format_type = (mobj.group('type') or [None])[0]
2389 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2390 format_modified = mobj.group('mod') is not None
2391
2392 format_fallback = not format_type and not format_modified # for b, w
2393 _filter_f = (
2394 (lambda f: f.get('%scodec' % format_type) != 'none')
2395 if format_type and format_modified # bv*, ba*, wv*, wa*
2396 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2397 if format_type # bv, ba, wv, wa
2398 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2399 if not format_modified # b, w
2400 else lambda f: True) # b*, w*
2401 filter_f = lambda f: _filter_f(f) and (
2402 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2403 else:
2404 if format_spec in self._format_selection_exts['audio']:
2405 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2406 elif format_spec in self._format_selection_exts['video']:
2407 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2408 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2409 elif format_spec in self._format_selection_exts['storyboards']:
2410 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2411 else:
2412 filter_f = lambda f: f.get('format_id') == format_spec # id
2413
2414 def selector_function(ctx):
2415 formats = list(ctx['formats'])
2416 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2417 if not matches:
2418 if format_fallback and ctx['incomplete_formats']:
2419 # for extractors with incomplete formats (audio only (soundcloud)
2420 # or video only (imgur)) best/worst will fallback to
2421 # best/worst {video,audio}-only format
2422 matches = formats
2423 elif seperate_fallback and not ctx['has_merged_format']:
2424 # for compatibility with youtube-dl when there is no pre-merged format
2425 matches = list(filter(seperate_fallback, formats))
2426 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2427 try:
2428 yield matches[format_idx - 1]
2429 except LazyList.IndexError:
2430 return
2431
2432 filters = [self._build_format_filter(f) for f in selector.filters]
2433
2434 def final_selector(ctx):
2435 ctx_copy = dict(ctx)
2436 for _filter in filters:
2437 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2438 return selector_function(ctx_copy)
2439 return final_selector
2440
2441 stream = io.BytesIO(format_spec.encode())
2442 try:
2443 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2444 except tokenize.TokenError:
2445 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2446
2447 class TokenIterator:
2448 def __init__(self, tokens):
2449 self.tokens = tokens
2450 self.counter = 0
2451
2452 def __iter__(self):
2453 return self
2454
2455 def __next__(self):
2456 if self.counter >= len(self.tokens):
2457 raise StopIteration()
2458 value = self.tokens[self.counter]
2459 self.counter += 1
2460 return value
2461
2462 next = __next__
2463
2464 def restore_last_token(self):
2465 self.counter -= 1
2466
2467 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2468 return _build_selector_function(parsed_selector)
2469
2470 def _calc_headers(self, info_dict):
2471 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2472 clean_headers(res)
2473 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2474 if cookies:
2475 encoder = LenientSimpleCookie()
2476 values = []
2477 for cookie in cookies:
2478 _, value = encoder.value_encode(cookie.value)
2479 values.append(f'{cookie.name}={value}')
2480 if cookie.domain:
2481 values.append(f'Domain={cookie.domain}')
2482 if cookie.path:
2483 values.append(f'Path={cookie.path}')
2484 if cookie.secure:
2485 values.append('Secure')
2486 if cookie.expires:
2487 values.append(f'Expires={cookie.expires}')
2488 if cookie.version:
2489 values.append(f'Version={cookie.version}')
2490 info_dict['cookies'] = '; '.join(values)
2491
2492 if 'X-Forwarded-For' not in res:
2493 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2494 if x_forwarded_for_ip:
2495 res['X-Forwarded-For'] = x_forwarded_for_ip
2496
2497 return res
2498
2499 def _calc_cookies(self, url):
2500 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2501 return self.cookiejar.get_cookie_header(url)
2502
2503 def _sort_thumbnails(self, thumbnails):
2504 thumbnails.sort(key=lambda t: (
2505 t.get('preference') if t.get('preference') is not None else -1,
2506 t.get('width') if t.get('width') is not None else -1,
2507 t.get('height') if t.get('height') is not None else -1,
2508 t.get('id') if t.get('id') is not None else '',
2509 t.get('url')))
2510
2511 def _sanitize_thumbnails(self, info_dict):
2512 thumbnails = info_dict.get('thumbnails')
2513 if thumbnails is None:
2514 thumbnail = info_dict.get('thumbnail')
2515 if thumbnail:
2516 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2517 if not thumbnails:
2518 return
2519
2520 def check_thumbnails(thumbnails):
2521 for t in thumbnails:
2522 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2523 try:
2524 self.urlopen(HEADRequest(t['url']))
2525 except network_exceptions as err:
2526 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2527 continue
2528 yield t
2529
2530 self._sort_thumbnails(thumbnails)
2531 for i, t in enumerate(thumbnails):
2532 if t.get('id') is None:
2533 t['id'] = '%d' % i
2534 if t.get('width') and t.get('height'):
2535 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2536 t['url'] = sanitize_url(t['url'])
2537
2538 if self.params.get('check_formats') is True:
2539 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2540 else:
2541 info_dict['thumbnails'] = thumbnails
2542
2543 def _fill_common_fields(self, info_dict, final=True):
2544 # TODO: move sanitization here
2545 if final:
2546 title = info_dict['fulltitle'] = info_dict.get('title')
2547 if not title:
2548 if title == '':
2549 self.write_debug('Extractor gave empty title. Creating a generic title')
2550 else:
2551 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2552 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2553
2554 if info_dict.get('duration') is not None:
2555 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2556
2557 for ts_key, date_key in (
2558 ('timestamp', 'upload_date'),
2559 ('release_timestamp', 'release_date'),
2560 ('modified_timestamp', 'modified_date'),
2561 ):
2562 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2563 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2564 # see http://bugs.python.org/issue1646728)
2565 with contextlib.suppress(ValueError, OverflowError, OSError):
2566 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2567 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2568
2569 live_keys = ('is_live', 'was_live')
2570 live_status = info_dict.get('live_status')
2571 if live_status is None:
2572 for key in live_keys:
2573 if info_dict.get(key) is False:
2574 continue
2575 if info_dict.get(key):
2576 live_status = key
2577 break
2578 if all(info_dict.get(key) is False for key in live_keys):
2579 live_status = 'not_live'
2580 if live_status:
2581 info_dict['live_status'] = live_status
2582 for key in live_keys:
2583 if info_dict.get(key) is None:
2584 info_dict[key] = (live_status == key)
2585 if live_status == 'post_live':
2586 info_dict['was_live'] = True
2587
2588 # Auto generate title fields corresponding to the *_number fields when missing
2589 # in order to always have clean titles. This is very common for TV series.
2590 for field in ('chapter', 'season', 'episode'):
2591 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2592 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2593
2594 def _raise_pending_errors(self, info):
2595 err = info.pop('__pending_error', None)
2596 if err:
2597 self.report_error(err, tb=False)
2598
2599 def sort_formats(self, info_dict):
2600 formats = self._get_formats(info_dict)
2601 formats.sort(key=FormatSorter(
2602 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2603
2604 def process_video_result(self, info_dict, download=True):
2605 assert info_dict.get('_type', 'video') == 'video'
2606 self._num_videos += 1
2607
2608 if 'id' not in info_dict:
2609 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2610 elif not info_dict.get('id'):
2611 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2612
2613 def report_force_conversion(field, field_not, conversion):
2614 self.report_warning(
2615 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2616 % (field, field_not, conversion))
2617
2618 def sanitize_string_field(info, string_field):
2619 field = info.get(string_field)
2620 if field is None or isinstance(field, str):
2621 return
2622 report_force_conversion(string_field, 'a string', 'string')
2623 info[string_field] = str(field)
2624
2625 def sanitize_numeric_fields(info):
2626 for numeric_field in self._NUMERIC_FIELDS:
2627 field = info.get(numeric_field)
2628 if field is None or isinstance(field, (int, float)):
2629 continue
2630 report_force_conversion(numeric_field, 'numeric', 'int')
2631 info[numeric_field] = int_or_none(field)
2632
2633 sanitize_string_field(info_dict, 'id')
2634 sanitize_numeric_fields(info_dict)
2635 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2636 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2637 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2638 self.report_warning('"duration" field is negative, there is an error in extractor')
2639
2640 chapters = info_dict.get('chapters') or []
2641 if chapters and chapters[0].get('start_time'):
2642 chapters.insert(0, {'start_time': 0})
2643
2644 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2645 for idx, (prev, current, next_) in enumerate(zip(
2646 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2647 if current.get('start_time') is None:
2648 current['start_time'] = prev.get('end_time')
2649 if not current.get('end_time'):
2650 current['end_time'] = next_.get('start_time')
2651 if not current.get('title'):
2652 current['title'] = f'<Untitled Chapter {idx}>'
2653
2654 if 'playlist' not in info_dict:
2655 # It isn't part of a playlist
2656 info_dict['playlist'] = None
2657 info_dict['playlist_index'] = None
2658
2659 self._sanitize_thumbnails(info_dict)
2660
2661 thumbnail = info_dict.get('thumbnail')
2662 thumbnails = info_dict.get('thumbnails')
2663 if thumbnail:
2664 info_dict['thumbnail'] = sanitize_url(thumbnail)
2665 elif thumbnails:
2666 info_dict['thumbnail'] = thumbnails[-1]['url']
2667
2668 if info_dict.get('display_id') is None and 'id' in info_dict:
2669 info_dict['display_id'] = info_dict['id']
2670
2671 self._fill_common_fields(info_dict)
2672
2673 for cc_kind in ('subtitles', 'automatic_captions'):
2674 cc = info_dict.get(cc_kind)
2675 if cc:
2676 for _, subtitle in cc.items():
2677 for subtitle_format in subtitle:
2678 if subtitle_format.get('url'):
2679 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2680 if subtitle_format.get('ext') is None:
2681 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2682
2683 automatic_captions = info_dict.get('automatic_captions')
2684 subtitles = info_dict.get('subtitles')
2685
2686 info_dict['requested_subtitles'] = self.process_subtitles(
2687 info_dict['id'], subtitles, automatic_captions)
2688
2689 formats = self._get_formats(info_dict)
2690
2691 # Backward compatibility with InfoExtractor._sort_formats
2692 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2693 if field_preference:
2694 info_dict['_format_sort_fields'] = field_preference
2695
2696 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2697 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2698 if not self.params.get('allow_unplayable_formats'):
2699 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2700
2701 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2702 self.report_warning(
2703 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2704 'only images are available for download. Use --list-formats to see them'.capitalize())
2705
2706 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2707 if not get_from_start:
2708 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2709 if info_dict.get('is_live') and formats:
2710 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2711 if get_from_start and not formats:
2712 self.raise_no_formats(info_dict, msg=(
2713 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2714 'If you want to download from the current time, use --no-live-from-start'))
2715
2716 def is_wellformed(f):
2717 url = f.get('url')
2718 if not url:
2719 self.report_warning(
2720 '"url" field is missing or empty - skipping format, '
2721 'there is an error in extractor')
2722 return False
2723 if isinstance(url, bytes):
2724 sanitize_string_field(f, 'url')
2725 return True
2726
2727 # Filter out malformed formats for better extraction robustness
2728 formats = list(filter(is_wellformed, formats or []))
2729
2730 if not formats:
2731 self.raise_no_formats(info_dict)
2732
2733 for format in formats:
2734 sanitize_string_field(format, 'format_id')
2735 sanitize_numeric_fields(format)
2736 format['url'] = sanitize_url(format['url'])
2737 if format.get('ext') is None:
2738 format['ext'] = determine_ext(format['url']).lower()
2739 if format.get('protocol') is None:
2740 format['protocol'] = determine_protocol(format)
2741 if format.get('resolution') is None:
2742 format['resolution'] = self.format_resolution(format, default=None)
2743 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2744 format['dynamic_range'] = 'SDR'
2745 if format.get('aspect_ratio') is None:
2746 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2747 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2748 and info_dict.get('duration') and format.get('tbr')
2749 and not format.get('filesize') and not format.get('filesize_approx')):
2750 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2751 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2752
2753 # This is copied to http_headers by the above _calc_headers and can now be removed
2754 if '__x_forwarded_for_ip' in info_dict:
2755 del info_dict['__x_forwarded_for_ip']
2756
2757 self.sort_formats({
2758 'formats': formats,
2759 '_format_sort_fields': info_dict.get('_format_sort_fields')
2760 })
2761
2762 # Sanitize and group by format_id
2763 formats_dict = {}
2764 for i, format in enumerate(formats):
2765 if not format.get('format_id'):
2766 format['format_id'] = str(i)
2767 else:
2768 # Sanitize format_id from characters used in format selector expression
2769 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2770 formats_dict.setdefault(format['format_id'], []).append(format)
2771
2772 # Make sure all formats have unique format_id
2773 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2774 for format_id, ambiguous_formats in formats_dict.items():
2775 ambigious_id = len(ambiguous_formats) > 1
2776 for i, format in enumerate(ambiguous_formats):
2777 if ambigious_id:
2778 format['format_id'] = '%s-%d' % (format_id, i)
2779 # Ensure there is no conflict between id and ext in format selection
2780 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2781 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2782 format['format_id'] = 'f%s' % format['format_id']
2783
2784 if format.get('format') is None:
2785 format['format'] = '{id} - {res}{note}'.format(
2786 id=format['format_id'],
2787 res=self.format_resolution(format),
2788 note=format_field(format, 'format_note', ' (%s)'),
2789 )
2790
2791 if self.params.get('check_formats') is True:
2792 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2793
2794 if not formats or formats[0] is not info_dict:
2795 # only set the 'formats' fields if the original info_dict list them
2796 # otherwise we end up with a circular reference, the first (and unique)
2797 # element in the 'formats' field in info_dict is info_dict itself,
2798 # which can't be exported to json
2799 info_dict['formats'] = formats
2800
2801 info_dict, _ = self.pre_process(info_dict)
2802
2803 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2804 return info_dict
2805
2806 self.post_extract(info_dict)
2807 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2808
2809 # The pre-processors may have modified the formats
2810 formats = self._get_formats(info_dict)
2811
2812 list_only = self.params.get('simulate') == 'list_only'
2813 interactive_format_selection = not list_only and self.format_selector == '-'
2814 if self.params.get('list_thumbnails'):
2815 self.list_thumbnails(info_dict)
2816 if self.params.get('listsubtitles'):
2817 if 'automatic_captions' in info_dict:
2818 self.list_subtitles(
2819 info_dict['id'], automatic_captions, 'automatic captions')
2820 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2821 if self.params.get('listformats') or interactive_format_selection:
2822 self.list_formats(info_dict)
2823 if list_only:
2824 # Without this printing, -F --print-json will not work
2825 self.__forced_printings(info_dict)
2826 return info_dict
2827
2828 format_selector = self.format_selector
2829 while True:
2830 if interactive_format_selection:
2831 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2832 + '(Press ENTER for default, or Ctrl+C to quit)'
2833 + self._format_screen(': ', self.Styles.EMPHASIS))
2834 try:
2835 format_selector = self.build_format_selector(req_format) if req_format else None
2836 except SyntaxError as err:
2837 self.report_error(err, tb=False, is_error=False)
2838 continue
2839
2840 if format_selector is None:
2841 req_format = self._default_format_spec(info_dict, download=download)
2842 self.write_debug(f'Default format spec: {req_format}')
2843 format_selector = self.build_format_selector(req_format)
2844
2845 formats_to_download = list(format_selector({
2846 'formats': formats,
2847 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2848 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2849 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2850 }))
2851 if interactive_format_selection and not formats_to_download:
2852 self.report_error('Requested format is not available', tb=False, is_error=False)
2853 continue
2854 break
2855
2856 if not formats_to_download:
2857 if not self.params.get('ignore_no_formats_error'):
2858 raise ExtractorError(
2859 'Requested format is not available. Use --list-formats for a list of available formats',
2860 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2861 self.report_warning('Requested format is not available')
2862 # Process what we can, even without any available formats.
2863 formats_to_download = [{}]
2864
2865 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2866 best_format, downloaded_formats = formats_to_download[-1], []
2867 if download:
2868 if best_format and requested_ranges:
2869 def to_screen(*msg):
2870 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2871
2872 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2873 (f['format_id'] for f in formats_to_download))
2874 if requested_ranges != ({}, ):
2875 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2876 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2877 max_downloads_reached = False
2878
2879 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2880 new_info = self._copy_infodict(info_dict)
2881 new_info.update(fmt)
2882 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2883 end_time = offset + min(chapter.get('end_time', duration), duration)
2884 # duration may not be accurate. So allow deviations <1sec
2885 if end_time == float('inf') or end_time > offset + duration + 1:
2886 end_time = None
2887 if chapter or offset:
2888 new_info.update({
2889 'section_start': offset + chapter.get('start_time', 0),
2890 'section_end': end_time,
2891 'section_title': chapter.get('title'),
2892 'section_number': chapter.get('index'),
2893 })
2894 downloaded_formats.append(new_info)
2895 try:
2896 self.process_info(new_info)
2897 except MaxDownloadsReached:
2898 max_downloads_reached = True
2899 self._raise_pending_errors(new_info)
2900 # Remove copied info
2901 for key, val in tuple(new_info.items()):
2902 if info_dict.get(key) == val:
2903 new_info.pop(key)
2904 if max_downloads_reached:
2905 break
2906
2907 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2908 assert write_archive.issubset({True, False, 'ignore'})
2909 if True in write_archive and False not in write_archive:
2910 self.record_download_archive(info_dict)
2911
2912 info_dict['requested_downloads'] = downloaded_formats
2913 info_dict = self.run_all_pps('after_video', info_dict)
2914 if max_downloads_reached:
2915 raise MaxDownloadsReached()
2916
2917 # We update the info dict with the selected best quality format (backwards compatibility)
2918 info_dict.update(best_format)
2919 return info_dict
2920
2921 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2922 """Select the requested subtitles and their format"""
2923 available_subs, normal_sub_langs = {}, []
2924 if normal_subtitles and self.params.get('writesubtitles'):
2925 available_subs.update(normal_subtitles)
2926 normal_sub_langs = tuple(normal_subtitles.keys())
2927 if automatic_captions and self.params.get('writeautomaticsub'):
2928 for lang, cap_info in automatic_captions.items():
2929 if lang not in available_subs:
2930 available_subs[lang] = cap_info
2931
2932 if not available_subs or (
2933 not self.params.get('writesubtitles')
2934 and not self.params.get('writeautomaticsub')):
2935 return None
2936
2937 all_sub_langs = tuple(available_subs.keys())
2938 if self.params.get('allsubtitles', False):
2939 requested_langs = all_sub_langs
2940 elif self.params.get('subtitleslangs', False):
2941 try:
2942 requested_langs = orderedSet_from_options(
2943 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2944 except re.error as e:
2945 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2946 else:
2947 requested_langs = LazyList(itertools.chain(
2948 ['en'] if 'en' in normal_sub_langs else [],
2949 filter(lambda f: f.startswith('en'), normal_sub_langs),
2950 ['en'] if 'en' in all_sub_langs else [],
2951 filter(lambda f: f.startswith('en'), all_sub_langs),
2952 normal_sub_langs, all_sub_langs,
2953 ))[:1]
2954 if requested_langs:
2955 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2956
2957 formats_query = self.params.get('subtitlesformat', 'best')
2958 formats_preference = formats_query.split('/') if formats_query else []
2959 subs = {}
2960 for lang in requested_langs:
2961 formats = available_subs.get(lang)
2962 if formats is None:
2963 self.report_warning(f'{lang} subtitles not available for {video_id}')
2964 continue
2965 for ext in formats_preference:
2966 if ext == 'best':
2967 f = formats[-1]
2968 break
2969 matches = list(filter(lambda f: f['ext'] == ext, formats))
2970 if matches:
2971 f = matches[-1]
2972 break
2973 else:
2974 f = formats[-1]
2975 self.report_warning(
2976 'No subtitle format found matching "%s" for language %s, '
2977 'using %s' % (formats_query, lang, f['ext']))
2978 subs[lang] = f
2979 return subs
2980
2981 def _forceprint(self, key, info_dict):
2982 if info_dict is None:
2983 return
2984 info_copy = info_dict.copy()
2985 info_copy.setdefault('filename', self.prepare_filename(info_dict))
2986 if info_dict.get('requested_formats') is not None:
2987 # For RTMP URLs, also include the playpath
2988 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2989 elif info_dict.get('url'):
2990 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2991 info_copy['formats_table'] = self.render_formats_table(info_dict)
2992 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2993 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2994 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2995
2996 def format_tmpl(tmpl):
2997 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2998 if not mobj:
2999 return tmpl
3000
3001 fmt = '%({})s'
3002 if tmpl.startswith('{'):
3003 tmpl, fmt = f'.{tmpl}', '%({})j'
3004 if tmpl.endswith('='):
3005 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3006 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3007
3008 for tmpl in self.params['forceprint'].get(key, []):
3009 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3010
3011 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3012 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3013 tmpl = format_tmpl(tmpl)
3014 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3015 if self._ensure_dir_exists(filename):
3016 with open(filename, 'a', encoding='utf-8', newline='') as f:
3017 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3018
3019 return info_copy
3020
3021 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3022 if (self.params.get('forcejson')
3023 or self.params['forceprint'].get('video')
3024 or self.params['print_to_file'].get('video')):
3025 self.post_extract(info_dict)
3026 if filename:
3027 info_dict['filename'] = filename
3028 info_copy = self._forceprint('video', info_dict)
3029
3030 def print_field(field, actual_field=None, optional=False):
3031 if actual_field is None:
3032 actual_field = field
3033 if self.params.get(f'force{field}') and (
3034 info_copy.get(field) is not None or (not optional and not incomplete)):
3035 self.to_stdout(info_copy[actual_field])
3036
3037 print_field('title')
3038 print_field('id')
3039 print_field('url', 'urls')
3040 print_field('thumbnail', optional=True)
3041 print_field('description', optional=True)
3042 print_field('filename')
3043 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3044 self.to_stdout(formatSeconds(info_copy['duration']))
3045 print_field('format')
3046
3047 if self.params.get('forcejson'):
3048 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3049
3050 def dl(self, name, info, subtitle=False, test=False):
3051 if not info.get('url'):
3052 self.raise_no_formats(info, True)
3053
3054 if test:
3055 verbose = self.params.get('verbose')
3056 params = {
3057 'test': True,
3058 'quiet': self.params.get('quiet') or not verbose,
3059 'verbose': verbose,
3060 'noprogress': not verbose,
3061 'nopart': True,
3062 'skip_unavailable_fragments': False,
3063 'keep_fragments': False,
3064 'overwrites': True,
3065 '_no_ytdl_file': True,
3066 }
3067 else:
3068 params = self.params
3069 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3070 if not test:
3071 for ph in self._progress_hooks:
3072 fd.add_progress_hook(ph)
3073 urls = '", "'.join(
3074 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3075 for f in info.get('requested_formats', []) or [info])
3076 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3077
3078 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3079 # But it may contain objects that are not deep-copyable
3080 new_info = self._copy_infodict(info)
3081 if new_info.get('http_headers') is None:
3082 new_info['http_headers'] = self._calc_headers(new_info)
3083 return fd.download(name, new_info, subtitle)
3084
3085 def existing_file(self, filepaths, *, default_overwrite=True):
3086 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3087 if existing_files and not self.params.get('overwrites', default_overwrite):
3088 return existing_files[0]
3089
3090 for file in existing_files:
3091 self.report_file_delete(file)
3092 os.remove(file)
3093 return None
3094
3095 def process_info(self, info_dict):
3096 """Process a single resolved IE result. (Modifies it in-place)"""
3097
3098 assert info_dict.get('_type', 'video') == 'video'
3099 original_infodict = info_dict
3100
3101 if 'format' not in info_dict and 'ext' in info_dict:
3102 info_dict['format'] = info_dict['ext']
3103
3104 if self._match_entry(info_dict) is not None:
3105 info_dict['__write_download_archive'] = 'ignore'
3106 return
3107
3108 # Does nothing under normal operation - for backward compatibility of process_info
3109 self.post_extract(info_dict)
3110
3111 def replace_info_dict(new_info):
3112 nonlocal info_dict
3113 if new_info == info_dict:
3114 return
3115 info_dict.clear()
3116 info_dict.update(new_info)
3117
3118 new_info, _ = self.pre_process(info_dict, 'video')
3119 replace_info_dict(new_info)
3120 self._num_downloads += 1
3121
3122 # info_dict['_filename'] needs to be set for backward compatibility
3123 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3124 temp_filename = self.prepare_filename(info_dict, 'temp')
3125 files_to_move = {}
3126
3127 # Forced printings
3128 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3129
3130 def check_max_downloads():
3131 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3132 raise MaxDownloadsReached()
3133
3134 if self.params.get('simulate'):
3135 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3136 check_max_downloads()
3137 return
3138
3139 if full_filename is None:
3140 return
3141 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3142 return
3143 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3144 return
3145
3146 if self._write_description('video', info_dict,
3147 self.prepare_filename(info_dict, 'description')) is None:
3148 return
3149
3150 sub_files = self._write_subtitles(info_dict, temp_filename)
3151 if sub_files is None:
3152 return
3153 files_to_move.update(dict(sub_files))
3154
3155 thumb_files = self._write_thumbnails(
3156 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3157 if thumb_files is None:
3158 return
3159 files_to_move.update(dict(thumb_files))
3160
3161 infofn = self.prepare_filename(info_dict, 'infojson')
3162 _infojson_written = self._write_info_json('video', info_dict, infofn)
3163 if _infojson_written:
3164 info_dict['infojson_filename'] = infofn
3165 # For backward compatibility, even though it was a private field
3166 info_dict['__infojson_filename'] = infofn
3167 elif _infojson_written is None:
3168 return
3169
3170 # Note: Annotations are deprecated
3171 annofn = None
3172 if self.params.get('writeannotations', False):
3173 annofn = self.prepare_filename(info_dict, 'annotation')
3174 if annofn:
3175 if not self._ensure_dir_exists(encodeFilename(annofn)):
3176 return
3177 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3178 self.to_screen('[info] Video annotations are already present')
3179 elif not info_dict.get('annotations'):
3180 self.report_warning('There are no annotations to write.')
3181 else:
3182 try:
3183 self.to_screen('[info] Writing video annotations to: ' + annofn)
3184 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3185 annofile.write(info_dict['annotations'])
3186 except (KeyError, TypeError):
3187 self.report_warning('There are no annotations to write.')
3188 except OSError:
3189 self.report_error('Cannot write annotations file: ' + annofn)
3190 return
3191
3192 # Write internet shortcut files
3193 def _write_link_file(link_type):
3194 url = try_get(info_dict['webpage_url'], iri_to_uri)
3195 if not url:
3196 self.report_warning(
3197 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3198 return True
3199 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3200 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3201 return False
3202 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3203 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3204 return True
3205 try:
3206 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3207 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3208 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3209 template_vars = {'url': url}
3210 if link_type == 'desktop':
3211 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3212 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3213 except OSError:
3214 self.report_error(f'Cannot write internet shortcut {linkfn}')
3215 return False
3216 return True
3217
3218 write_links = {
3219 'url': self.params.get('writeurllink'),
3220 'webloc': self.params.get('writewebloclink'),
3221 'desktop': self.params.get('writedesktoplink'),
3222 }
3223 if self.params.get('writelink'):
3224 link_type = ('webloc' if sys.platform == 'darwin'
3225 else 'desktop' if sys.platform.startswith('linux')
3226 else 'url')
3227 write_links[link_type] = True
3228
3229 if any(should_write and not _write_link_file(link_type)
3230 for link_type, should_write in write_links.items()):
3231 return
3232
3233 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3234 replace_info_dict(new_info)
3235
3236 if self.params.get('skip_download'):
3237 info_dict['filepath'] = temp_filename
3238 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3239 info_dict['__files_to_move'] = files_to_move
3240 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3241 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3242 else:
3243 # Download
3244 info_dict.setdefault('__postprocessors', [])
3245 try:
3246
3247 def existing_video_file(*filepaths):
3248 ext = info_dict.get('ext')
3249 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3250 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3251 default_overwrite=False)
3252 if file:
3253 info_dict['ext'] = os.path.splitext(file)[1][1:]
3254 return file
3255
3256 fd, success = None, True
3257 if info_dict.get('protocol') or info_dict.get('url'):
3258 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3259 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3260 info_dict.get('section_start') or info_dict.get('section_end')):
3261 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3262 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3263 self.report_error(f'{msg}. Aborting')
3264 return
3265
3266 if info_dict.get('requested_formats') is not None:
3267 old_ext = info_dict['ext']
3268 if self.params.get('merge_output_format') is None:
3269 if (info_dict['ext'] == 'webm'
3270 and info_dict.get('thumbnails')
3271 # check with type instead of pp_key, __name__, or isinstance
3272 # since we dont want any custom PPs to trigger this
3273 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3274 info_dict['ext'] = 'mkv'
3275 self.report_warning(
3276 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3277 new_ext = info_dict['ext']
3278
3279 def correct_ext(filename, ext=new_ext):
3280 if filename == '-':
3281 return filename
3282 filename_real_ext = os.path.splitext(filename)[1][1:]
3283 filename_wo_ext = (
3284 os.path.splitext(filename)[0]
3285 if filename_real_ext in (old_ext, new_ext)
3286 else filename)
3287 return f'{filename_wo_ext}.{ext}'
3288
3289 # Ensure filename always has a correct extension for successful merge
3290 full_filename = correct_ext(full_filename)
3291 temp_filename = correct_ext(temp_filename)
3292 dl_filename = existing_video_file(full_filename, temp_filename)
3293
3294 info_dict['__real_download'] = False
3295 # NOTE: Copy so that original format dicts are not modified
3296 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3297
3298 merger = FFmpegMergerPP(self)
3299 downloaded = []
3300 if dl_filename is not None:
3301 self.report_file_already_downloaded(dl_filename)
3302 elif fd:
3303 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3304 f['filepath'] = fname = prepend_extension(
3305 correct_ext(temp_filename, info_dict['ext']),
3306 'f%s' % f['format_id'], info_dict['ext'])
3307 downloaded.append(fname)
3308 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3309 success, real_download = self.dl(temp_filename, info_dict)
3310 info_dict['__real_download'] = real_download
3311 else:
3312 if self.params.get('allow_unplayable_formats'):
3313 self.report_warning(
3314 'You have requested merging of multiple formats '
3315 'while also allowing unplayable formats to be downloaded. '
3316 'The formats won\'t be merged to prevent data corruption.')
3317 elif not merger.available:
3318 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3319 if not self.params.get('ignoreerrors'):
3320 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3321 return
3322 self.report_warning(f'{msg}. The formats won\'t be merged')
3323
3324 if temp_filename == '-':
3325 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3326 else 'but the formats are incompatible for simultaneous download' if merger.available
3327 else 'but ffmpeg is not installed')
3328 self.report_warning(
3329 f'You have requested downloading multiple formats to stdout {reason}. '
3330 'The formats will be streamed one after the other')
3331 fname = temp_filename
3332 for f in info_dict['requested_formats']:
3333 new_info = dict(info_dict)
3334 del new_info['requested_formats']
3335 new_info.update(f)
3336 if temp_filename != '-':
3337 fname = prepend_extension(
3338 correct_ext(temp_filename, new_info['ext']),
3339 'f%s' % f['format_id'], new_info['ext'])
3340 if not self._ensure_dir_exists(fname):
3341 return
3342 f['filepath'] = fname
3343 downloaded.append(fname)
3344 partial_success, real_download = self.dl(fname, new_info)
3345 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3346 success = success and partial_success
3347
3348 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3349 info_dict['__postprocessors'].append(merger)
3350 info_dict['__files_to_merge'] = downloaded
3351 # Even if there were no downloads, it is being merged only now
3352 info_dict['__real_download'] = True
3353 else:
3354 for file in downloaded:
3355 files_to_move[file] = None
3356 else:
3357 # Just a single file
3358 dl_filename = existing_video_file(full_filename, temp_filename)
3359 if dl_filename is None or dl_filename == temp_filename:
3360 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3361 # So we should try to resume the download
3362 success, real_download = self.dl(temp_filename, info_dict)
3363 info_dict['__real_download'] = real_download
3364 else:
3365 self.report_file_already_downloaded(dl_filename)
3366
3367 dl_filename = dl_filename or temp_filename
3368 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3369
3370 except network_exceptions as err:
3371 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3372 return
3373 except OSError as err:
3374 raise UnavailableVideoError(err)
3375 except (ContentTooShortError, ) as err:
3376 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3377 return
3378
3379 self._raise_pending_errors(info_dict)
3380 if success and full_filename != '-':
3381
3382 def fixup():
3383 do_fixup = True
3384 fixup_policy = self.params.get('fixup')
3385 vid = info_dict['id']
3386
3387 if fixup_policy in ('ignore', 'never'):
3388 return
3389 elif fixup_policy == 'warn':
3390 do_fixup = 'warn'
3391 elif fixup_policy != 'force':
3392 assert fixup_policy in ('detect_or_warn', None)
3393 if not info_dict.get('__real_download'):
3394 do_fixup = False
3395
3396 def ffmpeg_fixup(cndn, msg, cls):
3397 if not (do_fixup and cndn):
3398 return
3399 elif do_fixup == 'warn':
3400 self.report_warning(f'{vid}: {msg}')
3401 return
3402 pp = cls(self)
3403 if pp.available:
3404 info_dict['__postprocessors'].append(pp)
3405 else:
3406 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3407
3408 stretched_ratio = info_dict.get('stretched_ratio')
3409 ffmpeg_fixup(stretched_ratio not in (1, None),
3410 f'Non-uniform pixel ratio {stretched_ratio}',
3411 FFmpegFixupStretchedPP)
3412
3413 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3414 downloader = downloader.FD_NAME if downloader else None
3415
3416 ext = info_dict.get('ext')
3417 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3418 isinstance(pp, FFmpegVideoConvertorPP)
3419 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3420 ) for pp in self._pps['post_process'])
3421
3422 if not postprocessed_by_ffmpeg:
3423 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3424 'writing DASH m4a. Only some players support this container',
3425 FFmpegFixupM4aPP)
3426 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3427 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3428 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3429 FFmpegFixupM3u8PP)
3430 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3431 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3432
3433 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3434 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3435
3436 fixup()
3437 try:
3438 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3439 except PostProcessingError as err:
3440 self.report_error('Postprocessing: %s' % str(err))
3441 return
3442 try:
3443 for ph in self._post_hooks:
3444 ph(info_dict['filepath'])
3445 except Exception as err:
3446 self.report_error('post hooks: %s' % str(err))
3447 return
3448 info_dict['__write_download_archive'] = True
3449
3450 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3451 if self.params.get('force_write_download_archive'):
3452 info_dict['__write_download_archive'] = True
3453 check_max_downloads()
3454
3455 def __download_wrapper(self, func):
3456 @functools.wraps(func)
3457 def wrapper(*args, **kwargs):
3458 try:
3459 res = func(*args, **kwargs)
3460 except UnavailableVideoError as e:
3461 self.report_error(e)
3462 except DownloadCancelled as e:
3463 self.to_screen(f'[info] {e}')
3464 if not self.params.get('break_per_url'):
3465 raise
3466 self._num_downloads = 0
3467 else:
3468 if self.params.get('dump_single_json', False):
3469 self.post_extract(res)
3470 self.to_stdout(json.dumps(self.sanitize_info(res)))
3471 return wrapper
3472
3473 def download(self, url_list):
3474 """Download a given list of URLs."""
3475 url_list = variadic(url_list) # Passing a single URL is a common mistake
3476 outtmpl = self.params['outtmpl']['default']
3477 if (len(url_list) > 1
3478 and outtmpl != '-'
3479 and '%' not in outtmpl
3480 and self.params.get('max_downloads') != 1):
3481 raise SameFileError(outtmpl)
3482
3483 for url in url_list:
3484 self.__download_wrapper(self.extract_info)(
3485 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3486
3487 return self._download_retcode
3488
3489 def download_with_info_file(self, info_filename):
3490 with contextlib.closing(fileinput.FileInput(
3491 [info_filename], mode='r',
3492 openhook=fileinput.hook_encoded('utf-8'))) as f:
3493 # FileInput doesn't have a read method, we can't call json.load
3494 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3495 for info in variadic(json.loads('\n'.join(f)))]
3496 for info in infos:
3497 self._load_cookies(info.get('cookies'), from_headers=False)
3498 self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
3499 try:
3500 self.__download_wrapper(self.process_ie_result)(info, download=True)
3501 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3502 if not isinstance(e, EntryNotInPlaylist):
3503 self.to_stderr('\r')
3504 webpage_url = info.get('webpage_url')
3505 if webpage_url is None:
3506 raise
3507 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3508 self.download([webpage_url])
3509 return self._download_retcode
3510
3511 @staticmethod
3512 def sanitize_info(info_dict, remove_private_keys=False):
3513 ''' Sanitize the infodict for converting to json '''
3514 if info_dict is None:
3515 return info_dict
3516 info_dict.setdefault('epoch', int(time.time()))
3517 info_dict.setdefault('_type', 'video')
3518 info_dict.setdefault('_version', {
3519 'version': __version__,
3520 'current_git_head': current_git_head(),
3521 'release_git_head': RELEASE_GIT_HEAD,
3522 'repository': REPOSITORY,
3523 })
3524
3525 if remove_private_keys:
3526 reject = lambda k, v: v is None or k.startswith('__') or k in {
3527 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3528 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3529 'playlist_autonumber', '_format_sort_fields',
3530 }
3531 else:
3532 reject = lambda k, v: False
3533
3534 def filter_fn(obj):
3535 if isinstance(obj, dict):
3536 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3537 elif isinstance(obj, (list, tuple, set, LazyList)):
3538 return list(map(filter_fn, obj))
3539 elif obj is None or isinstance(obj, (str, int, float, bool)):
3540 return obj
3541 else:
3542 return repr(obj)
3543
3544 return filter_fn(info_dict)
3545
3546 @staticmethod
3547 def filter_requested_info(info_dict, actually_filter=True):
3548 ''' Alias of sanitize_info for backward compatibility '''
3549 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3550
3551 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3552 for filename in set(filter(None, files_to_delete)):
3553 if msg:
3554 self.to_screen(msg % filename)
3555 try:
3556 os.remove(filename)
3557 except OSError:
3558 self.report_warning(f'Unable to delete file {filename}')
3559 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3560 del info['__files_to_move'][filename]
3561
3562 @staticmethod
3563 def post_extract(info_dict):
3564 def actual_post_extract(info_dict):
3565 if info_dict.get('_type') in ('playlist', 'multi_video'):
3566 for video_dict in info_dict.get('entries', {}):
3567 actual_post_extract(video_dict or {})
3568 return
3569
3570 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3571 info_dict.update(post_extractor())
3572
3573 actual_post_extract(info_dict or {})
3574
3575 def run_pp(self, pp, infodict):
3576 files_to_delete = []
3577 if '__files_to_move' not in infodict:
3578 infodict['__files_to_move'] = {}
3579 try:
3580 files_to_delete, infodict = pp.run(infodict)
3581 except PostProcessingError as e:
3582 # Must be True and not 'only_download'
3583 if self.params.get('ignoreerrors') is True:
3584 self.report_error(e)
3585 return infodict
3586 raise
3587
3588 if not files_to_delete:
3589 return infodict
3590 if self.params.get('keepvideo', False):
3591 for f in files_to_delete:
3592 infodict['__files_to_move'].setdefault(f, '')
3593 else:
3594 self._delete_downloaded_files(
3595 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3596 return infodict
3597
3598 def run_all_pps(self, key, info, *, additional_pps=None):
3599 if key != 'video':
3600 self._forceprint(key, info)
3601 for pp in (additional_pps or []) + self._pps[key]:
3602 info = self.run_pp(pp, info)
3603 return info
3604
3605 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3606 info = dict(ie_info)
3607 info['__files_to_move'] = files_to_move or {}
3608 try:
3609 info = self.run_all_pps(key, info)
3610 except PostProcessingError as err:
3611 msg = f'Preprocessing: {err}'
3612 info.setdefault('__pending_error', msg)
3613 self.report_error(msg, is_error=False)
3614 return info, info.pop('__files_to_move', None)
3615
3616 def post_process(self, filename, info, files_to_move=None):
3617 """Run all the postprocessors on the given file."""
3618 info['filepath'] = filename
3619 info['__files_to_move'] = files_to_move or {}
3620 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3621 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3622 del info['__files_to_move']
3623 return self.run_all_pps('after_move', info)
3624
3625 def _make_archive_id(self, info_dict):
3626 video_id = info_dict.get('id')
3627 if not video_id:
3628 return
3629 # Future-proof against any change in case
3630 # and backwards compatibility with prior versions
3631 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3632 if extractor is None:
3633 url = str_or_none(info_dict.get('url'))
3634 if not url:
3635 return
3636 # Try to find matching extractor for the URL and take its ie_key
3637 for ie_key, ie in self._ies.items():
3638 if ie.suitable(url):
3639 extractor = ie_key
3640 break
3641 else:
3642 return
3643 return make_archive_id(extractor, video_id)
3644
3645 def in_download_archive(self, info_dict):
3646 if not self.archive:
3647 return False
3648
3649 vid_ids = [self._make_archive_id(info_dict)]
3650 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3651 return any(id_ in self.archive for id_ in vid_ids)
3652
3653 def record_download_archive(self, info_dict):
3654 fn = self.params.get('download_archive')
3655 if fn is None:
3656 return
3657 vid_id = self._make_archive_id(info_dict)
3658 assert vid_id
3659
3660 self.write_debug(f'Adding to archive: {vid_id}')
3661 if is_path_like(fn):
3662 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3663 archive_file.write(vid_id + '\n')
3664 self.archive.add(vid_id)
3665
3666 @staticmethod
3667 def format_resolution(format, default='unknown'):
3668 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3669 return 'audio only'
3670 if format.get('resolution') is not None:
3671 return format['resolution']
3672 if format.get('width') and format.get('height'):
3673 return '%dx%d' % (format['width'], format['height'])
3674 elif format.get('height'):
3675 return '%sp' % format['height']
3676 elif format.get('width'):
3677 return '%dx?' % format['width']
3678 return default
3679
3680 def _list_format_headers(self, *headers):
3681 if self.params.get('listformats_table', True) is not False:
3682 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3683 return headers
3684
3685 def _format_note(self, fdict):
3686 res = ''
3687 if fdict.get('ext') in ['f4f', 'f4m']:
3688 res += '(unsupported)'
3689 if fdict.get('language'):
3690 if res:
3691 res += ' '
3692 res += '[%s]' % fdict['language']
3693 if fdict.get('format_note') is not None:
3694 if res:
3695 res += ' '
3696 res += fdict['format_note']
3697 if fdict.get('tbr') is not None:
3698 if res:
3699 res += ', '
3700 res += '%4dk' % fdict['tbr']
3701 if fdict.get('container') is not None:
3702 if res:
3703 res += ', '
3704 res += '%s container' % fdict['container']
3705 if (fdict.get('vcodec') is not None
3706 and fdict.get('vcodec') != 'none'):
3707 if res:
3708 res += ', '
3709 res += fdict['vcodec']
3710 if fdict.get('vbr') is not None:
3711 res += '@'
3712 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3713 res += 'video@'
3714 if fdict.get('vbr') is not None:
3715 res += '%4dk' % fdict['vbr']
3716 if fdict.get('fps') is not None:
3717 if res:
3718 res += ', '
3719 res += '%sfps' % fdict['fps']
3720 if fdict.get('acodec') is not None:
3721 if res:
3722 res += ', '
3723 if fdict['acodec'] == 'none':
3724 res += 'video only'
3725 else:
3726 res += '%-5s' % fdict['acodec']
3727 elif fdict.get('abr') is not None:
3728 if res:
3729 res += ', '
3730 res += 'audio'
3731 if fdict.get('abr') is not None:
3732 res += '@%3dk' % fdict['abr']
3733 if fdict.get('asr') is not None:
3734 res += ' (%5dHz)' % fdict['asr']
3735 if fdict.get('filesize') is not None:
3736 if res:
3737 res += ', '
3738 res += format_bytes(fdict['filesize'])
3739 elif fdict.get('filesize_approx') is not None:
3740 if res:
3741 res += ', '
3742 res += '~' + format_bytes(fdict['filesize_approx'])
3743 return res
3744
3745 def _get_formats(self, info_dict):
3746 if info_dict.get('formats') is None:
3747 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3748 return [info_dict]
3749 return []
3750 return info_dict['formats']
3751
3752 def render_formats_table(self, info_dict):
3753 formats = self._get_formats(info_dict)
3754 if not formats:
3755 return
3756 if not self.params.get('listformats_table', True) is not False:
3757 table = [
3758 [
3759 format_field(f, 'format_id'),
3760 format_field(f, 'ext'),
3761 self.format_resolution(f),
3762 self._format_note(f)
3763 ] for f in formats if (f.get('preference') or 0) >= -1000]
3764 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3765
3766 def simplified_codec(f, field):
3767 assert field in ('acodec', 'vcodec')
3768 codec = f.get(field)
3769 if not codec:
3770 return 'unknown'
3771 elif codec != 'none':
3772 return '.'.join(codec.split('.')[:4])
3773
3774 if field == 'vcodec' and f.get('acodec') == 'none':
3775 return 'images'
3776 elif field == 'acodec' and f.get('vcodec') == 'none':
3777 return ''
3778 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3779 self.Styles.SUPPRESS)
3780
3781 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3782 table = [
3783 [
3784 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3785 format_field(f, 'ext'),
3786 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3787 format_field(f, 'fps', '\t%d', func=round),
3788 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3789 format_field(f, 'audio_channels', '\t%s'),
3790 delim, (
3791 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3792 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3793 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3794 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3795 format_field(f, 'tbr', '\t%dk', func=round),
3796 shorten_protocol_name(f.get('protocol', '')),
3797 delim,
3798 simplified_codec(f, 'vcodec'),
3799 format_field(f, 'vbr', '\t%dk', func=round),
3800 simplified_codec(f, 'acodec'),
3801 format_field(f, 'abr', '\t%dk', func=round),
3802 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3803 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3804 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3805 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3806 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3807 format_field(f, 'format_note'),
3808 format_field(f, 'container', ignore=(None, f.get('ext'))),
3809 delim=', '), delim=' '),
3810 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3811 header_line = self._list_format_headers(
3812 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3813 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3814
3815 return render_table(
3816 header_line, table, hide_empty=True,
3817 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3818
3819 def render_thumbnails_table(self, info_dict):
3820 thumbnails = list(info_dict.get('thumbnails') or [])
3821 if not thumbnails:
3822 return None
3823 return render_table(
3824 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3825 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3826
3827 def render_subtitles_table(self, video_id, subtitles):
3828 def _row(lang, formats):
3829 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3830 if len(set(names)) == 1:
3831 names = [] if names[0] == 'unknown' else names[:1]
3832 return [lang, ', '.join(names), ', '.join(exts)]
3833
3834 if not subtitles:
3835 return None
3836 return render_table(
3837 self._list_format_headers('Language', 'Name', 'Formats'),
3838 [_row(lang, formats) for lang, formats in subtitles.items()],
3839 hide_empty=True)
3840
3841 def __list_table(self, video_id, name, func, *args):
3842 table = func(*args)
3843 if not table:
3844 self.to_screen(f'{video_id} has no {name}')
3845 return
3846 self.to_screen(f'[info] Available {name} for {video_id}:')
3847 self.to_stdout(table)
3848
3849 def list_formats(self, info_dict):
3850 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3851
3852 def list_thumbnails(self, info_dict):
3853 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3854
3855 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3856 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3857
3858 def print_debug_header(self):
3859 if not self.params.get('verbose'):
3860 return
3861
3862 from . import _IN_CLI # Must be delayed import
3863
3864 # These imports can be slow. So import them only as needed
3865 from .extractor.extractors import _LAZY_LOADER
3866 from .extractor.extractors import (
3867 _PLUGIN_CLASSES as plugin_ies,
3868 _PLUGIN_OVERRIDES as plugin_ie_overrides
3869 )
3870
3871 def get_encoding(stream):
3872 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3873 additional_info = []
3874 if os.environ.get('TERM', '').lower() == 'dumb':
3875 additional_info.append('dumb')
3876 if not supports_terminal_sequences(stream):
3877 from .utils import WINDOWS_VT_MODE # Must be imported locally
3878 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3879 if additional_info:
3880 ret = f'{ret} ({",".join(additional_info)})'
3881 return ret
3882
3883 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3884 locale.getpreferredencoding(),
3885 sys.getfilesystemencoding(),
3886 self.get_encoding(),
3887 ', '.join(
3888 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3889 if stream is not None and key != 'console')
3890 )
3891
3892 logger = self.params.get('logger')
3893 if logger:
3894 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3895 write_debug(encoding_str)
3896 else:
3897 write_string(f'[debug] {encoding_str}\n', encoding=None)
3898 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3899
3900 source = detect_variant()
3901 if VARIANT not in (None, 'pip'):
3902 source += '*'
3903 klass = type(self)
3904 write_debug(join_nonempty(
3905 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3906 f'{CHANNEL}@{__version__}',
3907 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3908 '' if source == 'unknown' else f'({source})',
3909 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3910 delim=' '))
3911
3912 if not _IN_CLI:
3913 write_debug(f'params: {self.params}')
3914
3915 if not _LAZY_LOADER:
3916 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3917 write_debug('Lazy loading extractors is forcibly disabled')
3918 else:
3919 write_debug('Lazy loading extractors is disabled')
3920 if self.params['compat_opts']:
3921 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3922
3923 if current_git_head():
3924 write_debug(f'Git HEAD: {current_git_head()}')
3925 write_debug(system_identifier())
3926
3927 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3928 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3929 if ffmpeg_features:
3930 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3931
3932 exe_versions['rtmpdump'] = rtmpdump_version()
3933 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3934 exe_str = ', '.join(
3935 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3936 ) or 'none'
3937 write_debug('exe versions: %s' % exe_str)
3938
3939 from .compat.compat_utils import get_package_info
3940 from .dependencies import available_dependencies
3941
3942 write_debug('Optional libraries: %s' % (', '.join(sorted({
3943 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3944 })) or 'none'))
3945
3946 self._setup_opener()
3947 proxy_map = {}
3948 for handler in self._opener.handlers:
3949 if hasattr(handler, 'proxies'):
3950 proxy_map.update(handler.proxies)
3951 write_debug(f'Proxy map: {proxy_map}')
3952
3953 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3954 display_list = ['%s%s' % (
3955 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3956 for name, klass in plugins.items()]
3957 if plugin_type == 'Extractor':
3958 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3959 for parent, plugins in plugin_ie_overrides.items())
3960 if not display_list:
3961 continue
3962 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3963
3964 plugin_dirs = plugin_directories()
3965 if plugin_dirs:
3966 write_debug(f'Plugin directories: {plugin_dirs}')
3967
3968 # Not implemented
3969 if False and self.params.get('call_home'):
3970 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3971 write_debug('Public IP address: %s' % ipaddr)
3972 latest_version = self.urlopen(
3973 'https://yt-dl.org/latest/version').read().decode()
3974 if version_tuple(latest_version) > version_tuple(__version__):
3975 self.report_warning(
3976 'You are using an outdated version (newest version: %s)! '
3977 'See https://yt-dl.org/update if you need help updating.' %
3978 latest_version)
3979
3980 def _setup_opener(self):
3981 if hasattr(self, '_opener'):
3982 return
3983 timeout_val = self.params.get('socket_timeout')
3984 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3985 opts_proxy = self.params.get('proxy')
3986
3987 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3988 if opts_proxy is not None:
3989 if opts_proxy == '':
3990 proxies = {}
3991 else:
3992 proxies = {'http': opts_proxy, 'https': opts_proxy}
3993 else:
3994 proxies = urllib.request.getproxies()
3995 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3996 if 'http' in proxies and 'https' not in proxies:
3997 proxies['https'] = proxies['http']
3998 proxy_handler = PerRequestProxyHandler(proxies)
3999
4000 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
4001 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
4002 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
4003 redirect_handler = YoutubeDLRedirectHandler()
4004 data_handler = urllib.request.DataHandler()
4005
4006 # When passing our own FileHandler instance, build_opener won't add the
4007 # default FileHandler and allows us to disable the file protocol, which
4008 # can be used for malicious purposes (see
4009 # https://github.com/ytdl-org/youtube-dl/issues/8227)
4010 file_handler = urllib.request.FileHandler()
4011
4012 if not self.params.get('enable_file_urls'):
4013 def file_open(*args, **kwargs):
4014 raise urllib.error.URLError(
4015 'file:// URLs are explicitly disabled in yt-dlp for security reasons. '
4016 'Use --enable-file-urls to enable at your own risk.')
4017 file_handler.file_open = file_open
4018
4019 opener = urllib.request.build_opener(
4020 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
4021
4022 # Delete the default user-agent header, which would otherwise apply in
4023 # cases where our custom HTTP handler doesn't come into play
4024 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
4025 opener.addheaders = []
4026 self._opener = opener
4027
4028 @functools.cached_property
4029 def cookiejar(self):
4030 """Global cookiejar instance"""
4031 return load_cookies(
4032 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4033
4034 def urlopen(self, req):
4035 """ Start an HTTP download """
4036 if isinstance(req, str):
4037 req = sanitized_Request(req)
4038 return self._opener.open(req, timeout=self._socket_timeout)
4039
4040 def encode(self, s):
4041 if isinstance(s, bytes):
4042 return s # Already encoded
4043
4044 try:
4045 return s.encode(self.get_encoding())
4046 except UnicodeEncodeError as err:
4047 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4048 raise
4049
4050 def get_encoding(self):
4051 encoding = self.params.get('encoding')
4052 if encoding is None:
4053 encoding = preferredencoding()
4054 return encoding
4055
4056 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4057 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4058 if overwrite is None:
4059 overwrite = self.params.get('overwrites', True)
4060 if not self.params.get('writeinfojson'):
4061 return False
4062 elif not infofn:
4063 self.write_debug(f'Skipping writing {label} infojson')
4064 return False
4065 elif not self._ensure_dir_exists(infofn):
4066 return None
4067 elif not overwrite and os.path.exists(infofn):
4068 self.to_screen(f'[info] {label.title()} metadata is already present')
4069 return 'exists'
4070
4071 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4072 try:
4073 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4074 return True
4075 except OSError:
4076 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4077 return None
4078
4079 def _write_description(self, label, ie_result, descfn):
4080 ''' Write description and returns True = written, False = skip, None = error '''
4081 if not self.params.get('writedescription'):
4082 return False
4083 elif not descfn:
4084 self.write_debug(f'Skipping writing {label} description')
4085 return False
4086 elif not self._ensure_dir_exists(descfn):
4087 return None
4088 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4089 self.to_screen(f'[info] {label.title()} description is already present')
4090 elif ie_result.get('description') is None:
4091 self.to_screen(f'[info] There\'s no {label} description to write')
4092 return False
4093 else:
4094 try:
4095 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4096 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4097 descfile.write(ie_result['description'])
4098 except OSError:
4099 self.report_error(f'Cannot write {label} description file {descfn}')
4100 return None
4101 return True
4102
4103 def _write_subtitles(self, info_dict, filename):
4104 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4105 ret = []
4106 subtitles = info_dict.get('requested_subtitles')
4107 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4108 # subtitles download errors are already managed as troubles in relevant IE
4109 # that way it will silently go on when used with unsupporting IE
4110 return ret
4111 elif not subtitles:
4112 self.to_screen('[info] There are no subtitles for the requested languages')
4113 return ret
4114 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4115 if not sub_filename_base:
4116 self.to_screen('[info] Skipping writing video subtitles')
4117 return ret
4118
4119 for sub_lang, sub_info in subtitles.items():
4120 sub_format = sub_info['ext']
4121 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4122 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4123 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4124 if existing_sub:
4125 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4126 sub_info['filepath'] = existing_sub
4127 ret.append((existing_sub, sub_filename_final))
4128 continue
4129
4130 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4131 if sub_info.get('data') is not None:
4132 try:
4133 # Use newline='' to prevent conversion of newline characters
4134 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4135 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4136 subfile.write(sub_info['data'])
4137 sub_info['filepath'] = sub_filename
4138 ret.append((sub_filename, sub_filename_final))
4139 continue
4140 except OSError:
4141 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4142 return None
4143
4144 try:
4145 sub_copy = sub_info.copy()
4146 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4147 self.dl(sub_filename, sub_copy, subtitle=True)
4148 sub_info['filepath'] = sub_filename
4149 ret.append((sub_filename, sub_filename_final))
4150 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4151 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4152 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4153 if not self.params.get('ignoreerrors'):
4154 self.report_error(msg)
4155 raise DownloadError(msg)
4156 self.report_warning(msg)
4157 return ret
4158
4159 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4160 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4161 write_all = self.params.get('write_all_thumbnails', False)
4162 thumbnails, ret = [], []
4163 if write_all or self.params.get('writethumbnail', False):
4164 thumbnails = info_dict.get('thumbnails') or []
4165 if not thumbnails:
4166 self.to_screen(f'[info] There are no {label} thumbnails to download')
4167 return ret
4168 multiple = write_all and len(thumbnails) > 1
4169
4170 if thumb_filename_base is None:
4171 thumb_filename_base = filename
4172 if thumbnails and not thumb_filename_base:
4173 self.write_debug(f'Skipping writing {label} thumbnail')
4174 return ret
4175
4176 for idx, t in list(enumerate(thumbnails))[::-1]:
4177 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4178 thumb_display_id = f'{label} thumbnail {t["id"]}'
4179 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4180 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4181
4182 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4183 if existing_thumb:
4184 self.to_screen('[info] %s is already present' % (
4185 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4186 t['filepath'] = existing_thumb
4187 ret.append((existing_thumb, thumb_filename_final))
4188 else:
4189 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4190 try:
4191 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4192 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4193 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4194 shutil.copyfileobj(uf, thumbf)
4195 ret.append((thumb_filename, thumb_filename_final))
4196 t['filepath'] = thumb_filename
4197 except network_exceptions as err:
4198 if isinstance(err, urllib.error.HTTPError) and err.code == 404:
4199 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4200 else:
4201 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4202 thumbnails.pop(idx)
4203 if ret and not write_all:
4204 break
4205 return ret