]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] Add color to `download-archive` message (#5138)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import copy
4 import datetime
5 import errno
6 import fileinput
7 import http.cookiejar
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import random
15 import re
16 import shutil
17 import string
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25
26 from .cache import Cache
27 from .compat import functools, urllib # isort: split
28 from .compat import compat_os_name, compat_shlex_quote, urllib_req_to_req
29 from .cookies import LenientSimpleCookie, load_cookies
30 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
31 from .downloader.rtmp import rtmpdump_version
32 from .extractor import gen_extractor_classes, get_info_extractor
33 from .extractor.common import UnsupportedURLIE
34 from .extractor.openload import PhantomJSwrapper
35 from .minicurses import format_text
36 from .networking import HEADRequest, Request, RequestDirector
37 from .networking.common import _REQUEST_HANDLERS
38 from .networking.exceptions import (
39 HTTPError,
40 NoSupportingHandlers,
41 RequestError,
42 SSLError,
43 _CompatHTTPError,
44 network_exceptions,
45 )
46 from .plugins import directories as plugin_directories
47 from .postprocessor import _PLUGIN_CLASSES as plugin_pps
48 from .postprocessor import (
49 EmbedThumbnailPP,
50 FFmpegFixupDuplicateMoovPP,
51 FFmpegFixupDurationPP,
52 FFmpegFixupM3u8PP,
53 FFmpegFixupM4aPP,
54 FFmpegFixupStretchedPP,
55 FFmpegFixupTimestampPP,
56 FFmpegMergerPP,
57 FFmpegPostProcessor,
58 FFmpegVideoConvertorPP,
59 MoveFilesAfterDownloadPP,
60 get_postprocessor,
61 )
62 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
63 from .update import REPOSITORY, current_git_head, detect_variant
64 from .utils import (
65 DEFAULT_OUTTMPL,
66 IDENTITY,
67 LINK_TEMPLATES,
68 MEDIA_EXTENSIONS,
69 NO_DEFAULT,
70 NUMBER_RE,
71 OUTTMPL_TYPES,
72 POSTPROCESS_WHEN,
73 STR_FORMAT_RE_TMPL,
74 STR_FORMAT_TYPES,
75 ContentTooShortError,
76 DateRange,
77 DownloadCancelled,
78 DownloadError,
79 EntryNotInPlaylist,
80 ExistingVideoReached,
81 ExtractorError,
82 FormatSorter,
83 GeoRestrictedError,
84 ISO3166Utils,
85 LazyList,
86 MaxDownloadsReached,
87 Namespace,
88 PagedList,
89 PlaylistEntries,
90 Popen,
91 PostProcessingError,
92 ReExtractInfo,
93 RejectedVideoReached,
94 SameFileError,
95 UnavailableVideoError,
96 UserNotLive,
97 age_restricted,
98 args_to_str,
99 bug_reports_message,
100 date_from_str,
101 deprecation_warning,
102 determine_ext,
103 determine_protocol,
104 encode_compat_str,
105 encodeFilename,
106 error_to_compat_str,
107 escapeHTML,
108 expand_path,
109 extract_basic_auth,
110 filter_dict,
111 float_or_none,
112 format_bytes,
113 format_decimal_suffix,
114 format_field,
115 formatSeconds,
116 get_compatible_ext,
117 get_domain,
118 int_or_none,
119 iri_to_uri,
120 is_path_like,
121 join_nonempty,
122 locked_file,
123 make_archive_id,
124 make_dir,
125 number_of_digits,
126 orderedSet,
127 orderedSet_from_options,
128 parse_filesize,
129 preferredencoding,
130 prepend_extension,
131 remove_terminal_sequences,
132 render_table,
133 replace_extension,
134 sanitize_filename,
135 sanitize_path,
136 sanitize_url,
137 str_or_none,
138 strftime_or_none,
139 subtitles_filename,
140 supports_terminal_sequences,
141 system_identifier,
142 timetuple_from_msec,
143 to_high_limit_path,
144 traverse_obj,
145 try_call,
146 try_get,
147 url_basename,
148 variadic,
149 version_tuple,
150 windows_enable_vt_mode,
151 write_json_file,
152 write_string,
153 )
154 from .utils._utils import _YDLLogger
155 from .utils.networking import (
156 HTTPHeaderDict,
157 clean_headers,
158 clean_proxies,
159 std_headers,
160 )
161 from .version import CHANNEL, RELEASE_GIT_HEAD, VARIANT, __version__
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL:
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 netrc_location: Location of the netrc file. Defaults to ~/.netrc.
202 netrc_cmd: Use a shell command to get credentials
203 verbose: Print additional info to stdout.
204 quiet: Do not print messages to stdout.
205 no_warnings: Do not print out anything for warnings.
206 forceprint: A dict with keys WHEN mapped to a list of templates to
207 print to stdout. The allowed keys are video or any of the
208 items in utils.POSTPROCESS_WHEN.
209 For compatibility, a single list is also accepted
210 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
211 a list of tuples with (template, filename)
212 forcejson: Force printing info_dict as JSON.
213 dump_single_json: Force printing the info_dict of the whole playlist
214 (or video) as a single JSON line.
215 force_write_download_archive: Force writing download archive regardless
216 of 'skip_download' or 'simulate'.
217 simulate: Do not download the video files. If unset (or None),
218 simulate only if listsubtitles, listformats or list_thumbnails is used
219 format: Video format code. see "FORMAT SELECTION" for more details.
220 You can also pass a function. The function takes 'ctx' as
221 argument and returns the formats to download.
222 See "build_format_selector" for an implementation
223 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
224 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
225 extracting metadata even if the video is not actually
226 available for download (experimental)
227 format_sort: A list of fields by which to sort the video formats.
228 See "Sorting Formats" for more details.
229 format_sort_force: Force the given format_sort. see "Sorting Formats"
230 for more details.
231 prefer_free_formats: Whether to prefer video formats with free containers
232 over non-free ones of same quality.
233 allow_multiple_video_streams: Allow multiple video streams to be merged
234 into a single file
235 allow_multiple_audio_streams: Allow multiple audio streams to be merged
236 into a single file
237 check_formats Whether to test if the formats are downloadable.
238 Can be True (check all), False (check none),
239 'selected' (check selected formats),
240 or None (check only if requested by extractor)
241 paths: Dictionary of output paths. The allowed keys are 'home'
242 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
243 outtmpl: Dictionary of templates for output names. Allowed keys
244 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
245 For compatibility with youtube-dl, a single string can also be used
246 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
247 restrictfilenames: Do not allow "&" and spaces in file names
248 trim_file_name: Limit length of filename (extension excluded)
249 windowsfilenames: Force the filenames to be windows compatible
250 ignoreerrors: Do not stop on download/postprocessing errors.
251 Can be 'only_download' to ignore only download errors.
252 Default is 'only_download' for CLI, but False for API
253 skip_playlist_after_errors: Number of allowed failures until the rest of
254 the playlist is skipped
255 allowed_extractors: List of regexes to match against extractor names that are allowed
256 overwrites: Overwrite all video and metadata files if True,
257 overwrite only non-video files if None
258 and don't overwrite any file if False
259 For compatibility with youtube-dl,
260 "nooverwrites" may also be used instead
261 playlist_items: Specific indices of playlist to download.
262 playlistrandom: Download playlist items in random order.
263 lazy_playlist: Process playlist entries as they are received.
264 matchtitle: Download only matching titles.
265 rejecttitle: Reject downloads for matching titles.
266 logger: Log messages to a logging.Logger instance.
267 logtostderr: Print everything to stderr instead of stdout.
268 consoletitle: Display progress in console window's titlebar.
269 writedescription: Write the video description to a .description file
270 writeinfojson: Write the video description to a .info.json file
271 clean_infojson: Remove internal metadata from the infojson
272 getcomments: Extract video comments. This will not be written to disk
273 unless writeinfojson is also given
274 writeannotations: Write the video annotations to a .annotations.xml file
275 writethumbnail: Write the thumbnail image to a file
276 allow_playlist_files: Whether to write playlists' description, infojson etc
277 also to disk when using the 'write*' options
278 write_all_thumbnails: Write all thumbnail formats to files
279 writelink: Write an internet shortcut file, depending on the
280 current platform (.url/.webloc/.desktop)
281 writeurllink: Write a Windows internet shortcut file (.url)
282 writewebloclink: Write a macOS internet shortcut file (.webloc)
283 writedesktoplink: Write a Linux internet shortcut file (.desktop)
284 writesubtitles: Write the video subtitles to a file
285 writeautomaticsub: Write the automatically generated subtitles to a file
286 listsubtitles: Lists all available subtitles for the video
287 subtitlesformat: The format code for subtitles
288 subtitleslangs: List of languages of the subtitles to download (can be regex).
289 The list may contain "all" to refer to all the available
290 subtitles. The language can be prefixed with a "-" to
291 exclude it from the requested languages, e.g. ['all', '-live_chat']
292 keepvideo: Keep the video file after post-processing
293 daterange: A utils.DateRange object, download only if the upload_date is in the range.
294 skip_download: Skip the actual download of the video file
295 cachedir: Location of the cache files in the filesystem.
296 False to disable filesystem cache.
297 noplaylist: Download single video instead of a playlist if in doubt.
298 age_limit: An integer representing the user's age in years.
299 Unsuitable videos for the given age are skipped.
300 min_views: An integer representing the minimum view count the video
301 must have in order to not be skipped.
302 Videos without view count information are always
303 downloaded. None for no limit.
304 max_views: An integer representing the maximum view count.
305 Videos that are more popular than that are not
306 downloaded.
307 Videos without view count information are always
308 downloaded. None for no limit.
309 download_archive: A set, or the name of a file where all downloads are recorded.
310 Videos already present in the file are not downloaded again.
311 break_on_existing: Stop the download process after attempting to download a
312 file that is in the archive.
313 break_per_url: Whether break_on_reject and break_on_existing
314 should act on each input URL as opposed to for the entire queue
315 cookiefile: File name or text stream from where cookies should be read and dumped to
316 cookiesfrombrowser: A tuple containing the name of the browser, the profile
317 name/path from where cookies are loaded, the name of the keyring,
318 and the container name, e.g. ('chrome', ) or
319 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
320 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
321 support RFC 5746 secure renegotiation
322 nocheckcertificate: Do not verify SSL certificates
323 client_certificate: Path to client certificate file in PEM format. May include the private key
324 client_certificate_key: Path to private key file for client certificate
325 client_certificate_password: Password for client certificate private key, if encrypted.
326 If not provided and the key is encrypted, yt-dlp will ask interactively
327 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
328 (Only supported by some extractors)
329 enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons.
330 http_headers: A dictionary of custom headers to be used for all requests
331 proxy: URL of the proxy server to use
332 geo_verification_proxy: URL of the proxy to use for IP address verification
333 on geo-restricted sites.
334 socket_timeout: Time to wait for unresponsive hosts, in seconds
335 bidi_workaround: Work around buggy terminals without bidirectional text
336 support, using fridibi
337 debug_printtraffic:Print out sent and received HTTP traffic
338 default_search: Prepend this string if an input url is not valid.
339 'auto' for elaborate guessing
340 encoding: Use this encoding instead of the system-specified.
341 extract_flat: Whether to resolve and process url_results further
342 * False: Always process. Default for API
343 * True: Never process
344 * 'in_playlist': Do not process inside playlist/multi_video
345 * 'discard': Always process, but don't return the result
346 from inside playlist/multi_video
347 * 'discard_in_playlist': Same as "discard", but only for
348 playlists (not multi_video). Default for CLI
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 progress_hooks: A list of functions that get called on download
359 progress, with a dictionary with the entries
360 * status: One of "downloading", "error", or "finished".
361 Check this first and ignore unknown values.
362 * info_dict: The extracted info_dict
363
364 If status is one of "downloading", or "finished", the
365 following properties may also be present:
366 * filename: The final filename (always present)
367 * tmpfilename: The filename we're currently writing to
368 * downloaded_bytes: Bytes on disk
369 * total_bytes: Size of the whole file, None if unknown
370 * total_bytes_estimate: Guess of the eventual file size,
371 None if unavailable.
372 * elapsed: The number of seconds since download started.
373 * eta: The estimated time in seconds, None if unknown
374 * speed: The download speed in bytes/second, None if
375 unknown
376 * fragment_index: The counter of the currently
377 downloaded video fragment.
378 * fragment_count: The number of fragments (= individual
379 files that will be merged)
380
381 Progress hooks are guaranteed to be called at least once
382 (with status "finished") if the download is successful.
383 postprocessor_hooks: A list of functions that get called on postprocessing
384 progress, with a dictionary with the entries
385 * status: One of "started", "processing", or "finished".
386 Check this first and ignore unknown values.
387 * postprocessor: Name of the postprocessor
388 * info_dict: The extracted info_dict
389
390 Progress hooks are guaranteed to be called at least twice
391 (with status "started" and "finished") if the processing is successful.
392 merge_output_format: "/" separated list of extensions to use when merging formats.
393 final_ext: Expected final extension; used to detect when the file was
394 already downloaded and converted
395 fixup: Automatically correct known faults of the file.
396 One of:
397 - "never": do nothing
398 - "warn": only emit a warning
399 - "detect_or_warn": check whether we can do anything
400 about it, warn otherwise (default)
401 source_address: Client-side IP address to bind to.
402 sleep_interval_requests: Number of seconds to sleep between requests
403 during extraction
404 sleep_interval: Number of seconds to sleep before each download when
405 used alone or a lower bound of a range for randomized
406 sleep before each download (minimum possible number
407 of seconds to sleep) when used along with
408 max_sleep_interval.
409 max_sleep_interval:Upper bound of a range for randomized sleep before each
410 download (maximum possible number of seconds to sleep).
411 Must only be used along with sleep_interval.
412 Actual sleep time will be a random float from range
413 [sleep_interval; max_sleep_interval].
414 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
415 listformats: Print an overview of available video formats and exit.
416 list_thumbnails: Print a table of all thumbnails and exit.
417 match_filter: A function that gets called for every video with the signature
418 (info_dict, *, incomplete: bool) -> Optional[str]
419 For backward compatibility with youtube-dl, the signature
420 (info_dict) -> Optional[str] is also allowed.
421 - If it returns a message, the video is ignored.
422 - If it returns None, the video is downloaded.
423 - If it returns utils.NO_DEFAULT, the user is interactively
424 asked whether to download the video.
425 - Raise utils.DownloadCancelled(msg) to abort remaining
426 downloads when a video is rejected.
427 match_filter_func in utils.py is one example for this.
428 color: A Dictionary with output stream names as keys
429 and their respective color policy as values.
430 Can also just be a single color policy,
431 in which case it applies to all outputs.
432 Valid stream names are 'stdout' and 'stderr'.
433 Valid color policies are one of 'always', 'auto', 'no_color' or 'never'.
434 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
435 HTTP header
436 geo_bypass_country:
437 Two-letter ISO 3166-2 country code that will be used for
438 explicit geographic restriction bypassing via faking
439 X-Forwarded-For HTTP header
440 geo_bypass_ip_block:
441 IP range in CIDR notation that will be used similarly to
442 geo_bypass_country
443 external_downloader: A dictionary of protocol keys and the executable of the
444 external downloader to use for it. The allowed protocols
445 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
446 Set the value to 'native' to use the native downloader
447 compat_opts: Compatibility options. See "Differences in default behavior".
448 The following options do not work when used through the API:
449 filename, abort-on-error, multistreams, no-live-chat, format-sort
450 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
451 Refer __init__.py for their implementation
452 progress_template: Dictionary of templates for progress outputs.
453 Allowed keys are 'download', 'postprocess',
454 'download-title' (console title) and 'postprocess-title'.
455 The template is mapped on a dictionary with keys 'progress' and 'info'
456 retry_sleep_functions: Dictionary of functions that takes the number of attempts
457 as argument and returns the time to sleep in seconds.
458 Allowed keys are 'http', 'fragment', 'file_access'
459 download_ranges: A callback function that gets called for every video with
460 the signature (info_dict, ydl) -> Iterable[Section].
461 Only the returned sections will be downloaded.
462 Each Section is a dict with the following keys:
463 * start_time: Start time of the section in seconds
464 * end_time: End time of the section in seconds
465 * title: Section title (Optional)
466 * index: Section number (Optional)
467 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
468 noprogress: Do not print the progress bar
469 live_from_start: Whether to download livestreams videos from the start
470
471 The following parameters are not used by YoutubeDL itself, they are used by
472 the downloader (see yt_dlp/downloader/common.py):
473 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
474 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
475 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
476 external_downloader_args, concurrent_fragment_downloads.
477
478 The following options are used by the post processors:
479 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
480 to the binary or its containing directory.
481 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
482 and a list of additional command-line arguments for the
483 postprocessor/executable. The dict can also have "PP+EXE" keys
484 which are used when the given exe is used by the given PP.
485 Use 'default' as the name for arguments to passed to all PP
486 For compatibility with youtube-dl, a single list of args
487 can also be used
488
489 The following options are used by the extractors:
490 extractor_retries: Number of times to retry for known errors (default: 3)
491 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
492 hls_split_discontinuity: Split HLS playlists to different formats at
493 discontinuities such as ad breaks (default: False)
494 extractor_args: A dictionary of arguments to be passed to the extractors.
495 See "EXTRACTOR ARGUMENTS" for details.
496 E.g. {'youtube': {'skip': ['dash', 'hls']}}
497 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
498
499 The following options are deprecated and may be removed in the future:
500
501 break_on_reject: Stop the download process when encountering a video that
502 has been filtered out.
503 - `raise DownloadCancelled(msg)` in match_filter instead
504 force_generic_extractor: Force downloader to use the generic extractor
505 - Use allowed_extractors = ['generic', 'default']
506 playliststart: - Use playlist_items
507 Playlist item to start at.
508 playlistend: - Use playlist_items
509 Playlist item to end at.
510 playlistreverse: - Use playlist_items
511 Download playlist items in reverse order.
512 forceurl: - Use forceprint
513 Force printing final URL.
514 forcetitle: - Use forceprint
515 Force printing title.
516 forceid: - Use forceprint
517 Force printing ID.
518 forcethumbnail: - Use forceprint
519 Force printing thumbnail URL.
520 forcedescription: - Use forceprint
521 Force printing description.
522 forcefilename: - Use forceprint
523 Force printing final filename.
524 forceduration: - Use forceprint
525 Force printing duration.
526 allsubtitles: - Use subtitleslangs = ['all']
527 Downloads all the subtitles of the video
528 (requires writesubtitles or writeautomaticsub)
529 include_ads: - Doesn't work
530 Download ads as well
531 call_home: - Not implemented
532 Boolean, true iff we are allowed to contact the
533 yt-dlp servers for debugging.
534 post_hooks: - Register a custom postprocessor
535 A list of functions that get called as the final step
536 for each video file, after all postprocessors have been
537 called. The filename will be passed as the only argument.
538 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
539 Use the native HLS downloader instead of ffmpeg/avconv
540 if True, otherwise use ffmpeg/avconv if False, otherwise
541 use downloader suggested by extractor if None.
542 prefer_ffmpeg: - avconv support is deprecated
543 If False, use avconv instead of ffmpeg if both are available,
544 otherwise prefer ffmpeg.
545 youtube_include_dash_manifest: - Use extractor_args
546 If True (default), DASH manifests and related
547 data will be downloaded and processed by extractor.
548 You can reduce network I/O by disabling it if you don't
549 care about DASH. (only for youtube)
550 youtube_include_hls_manifest: - Use extractor_args
551 If True (default), HLS manifests and related
552 data will be downloaded and processed by extractor.
553 You can reduce network I/O by disabling it if you don't
554 care about HLS. (only for youtube)
555 no_color: Same as `color='no_color'`
556 """
557
558 _NUMERIC_FIELDS = {
559 'width', 'height', 'asr', 'audio_channels', 'fps',
560 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
561 'timestamp', 'release_timestamp',
562 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
563 'average_rating', 'comment_count', 'age_limit',
564 'start_time', 'end_time',
565 'chapter_number', 'season_number', 'episode_number',
566 'track_number', 'disc_number', 'release_year',
567 }
568
569 _format_fields = {
570 # NB: Keep in sync with the docstring of extractor/common.py
571 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
572 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
573 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
574 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
575 'preference', 'language', 'language_preference', 'quality', 'source_preference',
576 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'hls_aes', 'downloader_options',
577 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
578 }
579 _format_selection_exts = {
580 'audio': set(MEDIA_EXTENSIONS.common_audio),
581 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
582 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
583 }
584
585 def __init__(self, params=None, auto_init=True):
586 """Create a FileDownloader object with the given options.
587 @param auto_init Whether to load the default extractors and print header (if verbose).
588 Set to 'no_verbose_header' to not print the header
589 """
590 if params is None:
591 params = {}
592 self.params = params
593 self._ies = {}
594 self._ies_instances = {}
595 self._pps = {k: [] for k in POSTPROCESS_WHEN}
596 self._printed_messages = set()
597 self._first_webpage_request = True
598 self._post_hooks = []
599 self._progress_hooks = []
600 self._postprocessor_hooks = []
601 self._download_retcode = 0
602 self._num_downloads = 0
603 self._num_videos = 0
604 self._playlist_level = 0
605 self._playlist_urls = set()
606 self.cache = Cache(self)
607
608 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
609 self._out_files = Namespace(
610 out=stdout,
611 error=sys.stderr,
612 screen=sys.stderr if self.params.get('quiet') else stdout,
613 console=None if compat_os_name == 'nt' else next(
614 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
615 )
616
617 try:
618 windows_enable_vt_mode()
619 except Exception as e:
620 self.write_debug(f'Failed to enable VT mode: {e}')
621
622 if self.params.get('no_color'):
623 if self.params.get('color') is not None:
624 self.report_warning('Overwriting params from "color" with "no_color"')
625 self.params['color'] = 'no_color'
626
627 term_allow_color = os.environ.get('TERM', '').lower() != 'dumb'
628
629 def process_color_policy(stream):
630 stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream]
631 policy = traverse_obj(self.params, ('color', (stream_name, None), {str}), get_all=False)
632 if policy in ('auto', None):
633 return term_allow_color and supports_terminal_sequences(stream)
634 assert policy in ('always', 'never', 'no_color')
635 return {'always': True, 'never': False}.get(policy, policy)
636
637 self._allow_colors = Namespace(**{
638 name: process_color_policy(stream)
639 for name, stream in self._out_files.items_ if name != 'console'
640 })
641
642 # The code is left like this to be reused for future deprecations
643 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
644 current_version = sys.version_info[:2]
645 if current_version < MIN_RECOMMENDED:
646 msg = ('Support for Python version %d.%d has been deprecated. '
647 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
648 '\n You will no longer receive updates on this version')
649 if current_version < MIN_SUPPORTED:
650 msg = 'Python version %d.%d is no longer supported'
651 self.deprecated_feature(
652 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
653
654 if self.params.get('allow_unplayable_formats'):
655 self.report_warning(
656 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
657 'This is a developer option intended for debugging. \n'
658 ' If you experience any issues while using this option, '
659 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
660
661 if self.params.get('bidi_workaround', False):
662 try:
663 import pty
664 master, slave = pty.openpty()
665 width = shutil.get_terminal_size().columns
666 width_args = [] if width is None else ['-w', str(width)]
667 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
668 try:
669 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
670 except OSError:
671 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
672 self._output_channel = os.fdopen(master, 'rb')
673 except OSError as ose:
674 if ose.errno == errno.ENOENT:
675 self.report_warning(
676 'Could not find fribidi executable, ignoring --bidi-workaround. '
677 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
678 else:
679 raise
680
681 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
682 self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers'))
683 self._request_director = self.build_request_director(
684 sorted(_REQUEST_HANDLERS.values(), key=lambda rh: rh.RH_NAME.lower()))
685 if auto_init and auto_init != 'no_verbose_header':
686 self.print_debug_header()
687
688 self.__header_cookies = []
689 self._load_cookies(traverse_obj(self.params.get('http_headers'), 'cookie', casesense=False)) # compat
690
691 def check_deprecated(param, option, suggestion):
692 if self.params.get(param) is not None:
693 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
694 return True
695 return False
696
697 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
698 if self.params.get('geo_verification_proxy') is None:
699 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
700
701 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
702 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
703 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
704
705 for msg in self.params.get('_warnings', []):
706 self.report_warning(msg)
707 for msg in self.params.get('_deprecation_warnings', []):
708 self.deprecated_feature(msg)
709
710 if 'list-formats' in self.params['compat_opts']:
711 self.params['listformats_table'] = False
712
713 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
714 # nooverwrites was unnecessarily changed to overwrites
715 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
716 # This ensures compatibility with both keys
717 self.params['overwrites'] = not self.params['nooverwrites']
718 elif self.params.get('overwrites') is None:
719 self.params.pop('overwrites', None)
720 else:
721 self.params['nooverwrites'] = not self.params['overwrites']
722
723 if self.params.get('simulate') is None and any((
724 self.params.get('list_thumbnails'),
725 self.params.get('listformats'),
726 self.params.get('listsubtitles'),
727 )):
728 self.params['simulate'] = 'list_only'
729
730 self.params.setdefault('forceprint', {})
731 self.params.setdefault('print_to_file', {})
732
733 # Compatibility with older syntax
734 if not isinstance(params['forceprint'], dict):
735 self.params['forceprint'] = {'video': params['forceprint']}
736
737 if auto_init:
738 self.add_default_info_extractors()
739
740 if (sys.platform != 'win32'
741 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
742 and not self.params.get('restrictfilenames', False)):
743 # Unicode filesystem API will throw errors (#1474, #13027)
744 self.report_warning(
745 'Assuming --restrict-filenames since file system encoding '
746 'cannot encode all characters. '
747 'Set the LC_ALL environment variable to fix this.')
748 self.params['restrictfilenames'] = True
749
750 self._parse_outtmpl()
751
752 # Creating format selector here allows us to catch syntax errors before the extraction
753 self.format_selector = (
754 self.params.get('format') if self.params.get('format') in (None, '-')
755 else self.params['format'] if callable(self.params['format'])
756 else self.build_format_selector(self.params['format']))
757
758 hooks = {
759 'post_hooks': self.add_post_hook,
760 'progress_hooks': self.add_progress_hook,
761 'postprocessor_hooks': self.add_postprocessor_hook,
762 }
763 for opt, fn in hooks.items():
764 for ph in self.params.get(opt, []):
765 fn(ph)
766
767 for pp_def_raw in self.params.get('postprocessors', []):
768 pp_def = dict(pp_def_raw)
769 when = pp_def.pop('when', 'post_process')
770 self.add_post_processor(
771 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
772 when=when)
773
774 def preload_download_archive(fn):
775 """Preload the archive, if any is specified"""
776 archive = set()
777 if fn is None:
778 return archive
779 elif not is_path_like(fn):
780 return fn
781
782 self.write_debug(f'Loading archive file {fn!r}')
783 try:
784 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
785 for line in archive_file:
786 archive.add(line.strip())
787 except OSError as ioe:
788 if ioe.errno != errno.ENOENT:
789 raise
790 return archive
791
792 self.archive = preload_download_archive(self.params.get('download_archive'))
793
794 def warn_if_short_id(self, argv):
795 # short YouTube ID starting with dash?
796 idxs = [
797 i for i, a in enumerate(argv)
798 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
799 if idxs:
800 correct_argv = (
801 ['yt-dlp']
802 + [a for i, a in enumerate(argv) if i not in idxs]
803 + ['--'] + [argv[i] for i in idxs]
804 )
805 self.report_warning(
806 'Long argument string detected. '
807 'Use -- to separate parameters and URLs, like this:\n%s' %
808 args_to_str(correct_argv))
809
810 def add_info_extractor(self, ie):
811 """Add an InfoExtractor object to the end of the list."""
812 ie_key = ie.ie_key()
813 self._ies[ie_key] = ie
814 if not isinstance(ie, type):
815 self._ies_instances[ie_key] = ie
816 ie.set_downloader(self)
817
818 def get_info_extractor(self, ie_key):
819 """
820 Get an instance of an IE with name ie_key, it will try to get one from
821 the _ies list, if there's no instance it will create a new one and add
822 it to the extractor list.
823 """
824 ie = self._ies_instances.get(ie_key)
825 if ie is None:
826 ie = get_info_extractor(ie_key)()
827 self.add_info_extractor(ie)
828 return ie
829
830 def add_default_info_extractors(self):
831 """
832 Add the InfoExtractors returned by gen_extractors to the end of the list
833 """
834 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
835 all_ies['end'] = UnsupportedURLIE()
836 try:
837 ie_names = orderedSet_from_options(
838 self.params.get('allowed_extractors', ['default']), {
839 'all': list(all_ies),
840 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
841 }, use_regex=True)
842 except re.error as e:
843 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
844 for name in ie_names:
845 self.add_info_extractor(all_ies[name])
846 self.write_debug(f'Loaded {len(ie_names)} extractors')
847
848 def add_post_processor(self, pp, when='post_process'):
849 """Add a PostProcessor object to the end of the chain."""
850 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
851 self._pps[when].append(pp)
852 pp.set_downloader(self)
853
854 def add_post_hook(self, ph):
855 """Add the post hook"""
856 self._post_hooks.append(ph)
857
858 def add_progress_hook(self, ph):
859 """Add the download progress hook"""
860 self._progress_hooks.append(ph)
861
862 def add_postprocessor_hook(self, ph):
863 """Add the postprocessing progress hook"""
864 self._postprocessor_hooks.append(ph)
865 for pps in self._pps.values():
866 for pp in pps:
867 pp.add_progress_hook(ph)
868
869 def _bidi_workaround(self, message):
870 if not hasattr(self, '_output_channel'):
871 return message
872
873 assert hasattr(self, '_output_process')
874 assert isinstance(message, str)
875 line_count = message.count('\n') + 1
876 self._output_process.stdin.write((message + '\n').encode())
877 self._output_process.stdin.flush()
878 res = ''.join(self._output_channel.readline().decode()
879 for _ in range(line_count))
880 return res[:-len('\n')]
881
882 def _write_string(self, message, out=None, only_once=False):
883 if only_once:
884 if message in self._printed_messages:
885 return
886 self._printed_messages.add(message)
887 write_string(message, out=out, encoding=self.params.get('encoding'))
888
889 def to_stdout(self, message, skip_eol=False, quiet=None):
890 """Print message to stdout"""
891 if quiet is not None:
892 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
893 'Use "YoutubeDL.to_screen" instead')
894 if skip_eol is not False:
895 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
896 'Use "YoutubeDL.to_screen" instead')
897 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
898
899 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
900 """Print message to screen if not in quiet mode"""
901 if self.params.get('logger'):
902 self.params['logger'].debug(message)
903 return
904 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
905 return
906 self._write_string(
907 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
908 self._out_files.screen, only_once=only_once)
909
910 def to_stderr(self, message, only_once=False):
911 """Print message to stderr"""
912 assert isinstance(message, str)
913 if self.params.get('logger'):
914 self.params['logger'].error(message)
915 else:
916 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
917
918 def _send_console_code(self, code):
919 if compat_os_name == 'nt' or not self._out_files.console:
920 return
921 self._write_string(code, self._out_files.console)
922
923 def to_console_title(self, message):
924 if not self.params.get('consoletitle', False):
925 return
926 message = remove_terminal_sequences(message)
927 if compat_os_name == 'nt':
928 if ctypes.windll.kernel32.GetConsoleWindow():
929 # c_wchar_p() might not be necessary if `message` is
930 # already of type unicode()
931 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
932 else:
933 self._send_console_code(f'\033]0;{message}\007')
934
935 def save_console_title(self):
936 if not self.params.get('consoletitle') or self.params.get('simulate'):
937 return
938 self._send_console_code('\033[22;0t') # Save the title on stack
939
940 def restore_console_title(self):
941 if not self.params.get('consoletitle') or self.params.get('simulate'):
942 return
943 self._send_console_code('\033[23;0t') # Restore the title from stack
944
945 def __enter__(self):
946 self.save_console_title()
947 return self
948
949 def save_cookies(self):
950 if self.params.get('cookiefile') is not None:
951 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
952
953 def __exit__(self, *args):
954 self.restore_console_title()
955 self.close()
956
957 def close(self):
958 self.save_cookies()
959 self._request_director.close()
960
961 def trouble(self, message=None, tb=None, is_error=True):
962 """Determine action to take when a download problem appears.
963
964 Depending on if the downloader has been configured to ignore
965 download errors or not, this method may throw an exception or
966 not when errors are found, after printing the message.
967
968 @param tb If given, is additional traceback information
969 @param is_error Whether to raise error according to ignorerrors
970 """
971 if message is not None:
972 self.to_stderr(message)
973 if self.params.get('verbose'):
974 if tb is None:
975 if sys.exc_info()[0]: # if .trouble has been called from an except block
976 tb = ''
977 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
978 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
979 tb += encode_compat_str(traceback.format_exc())
980 else:
981 tb_data = traceback.format_list(traceback.extract_stack())
982 tb = ''.join(tb_data)
983 if tb:
984 self.to_stderr(tb)
985 if not is_error:
986 return
987 if not self.params.get('ignoreerrors'):
988 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
989 exc_info = sys.exc_info()[1].exc_info
990 else:
991 exc_info = sys.exc_info()
992 raise DownloadError(message, exc_info)
993 self._download_retcode = 1
994
995 Styles = Namespace(
996 HEADERS='yellow',
997 EMPHASIS='light blue',
998 FILENAME='green',
999 ID='green',
1000 DELIM='blue',
1001 ERROR='red',
1002 BAD_FORMAT='light red',
1003 WARNING='yellow',
1004 SUPPRESS='light black',
1005 )
1006
1007 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
1008 text = str(text)
1009 if test_encoding:
1010 original_text = text
1011 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
1012 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
1013 text = text.encode(encoding, 'ignore').decode(encoding)
1014 if fallback is not None and text != original_text:
1015 text = fallback
1016 return format_text(text, f) if allow_colors is True else text if fallback is None else fallback
1017
1018 def _format_out(self, *args, **kwargs):
1019 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
1020
1021 def _format_screen(self, *args, **kwargs):
1022 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
1023
1024 def _format_err(self, *args, **kwargs):
1025 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
1026
1027 def report_warning(self, message, only_once=False):
1028 '''
1029 Print the message to stderr, it will be prefixed with 'WARNING:'
1030 If stderr is a tty file the 'WARNING:' will be colored
1031 '''
1032 if self.params.get('logger') is not None:
1033 self.params['logger'].warning(message)
1034 else:
1035 if self.params.get('no_warnings'):
1036 return
1037 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
1038
1039 def deprecation_warning(self, message, *, stacklevel=0):
1040 deprecation_warning(
1041 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
1042
1043 def deprecated_feature(self, message):
1044 if self.params.get('logger') is not None:
1045 self.params['logger'].warning(f'Deprecated Feature: {message}')
1046 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1047
1048 def report_error(self, message, *args, **kwargs):
1049 '''
1050 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1051 in red if stderr is a tty file.
1052 '''
1053 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1054
1055 def write_debug(self, message, only_once=False):
1056 '''Log debug message or Print message to stderr'''
1057 if not self.params.get('verbose', False):
1058 return
1059 message = f'[debug] {message}'
1060 if self.params.get('logger'):
1061 self.params['logger'].debug(message)
1062 else:
1063 self.to_stderr(message, only_once)
1064
1065 def report_file_already_downloaded(self, file_name):
1066 """Report file has already been fully downloaded."""
1067 try:
1068 self.to_screen('[download] %s has already been downloaded' % file_name)
1069 except UnicodeEncodeError:
1070 self.to_screen('[download] The file has already been downloaded')
1071
1072 def report_file_delete(self, file_name):
1073 """Report that existing file will be deleted."""
1074 try:
1075 self.to_screen('Deleting existing file %s' % file_name)
1076 except UnicodeEncodeError:
1077 self.to_screen('Deleting existing file')
1078
1079 def raise_no_formats(self, info, forced=False, *, msg=None):
1080 has_drm = info.get('_has_drm')
1081 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1082 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1083 if forced or not ignored:
1084 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1085 expected=has_drm or ignored or expected)
1086 else:
1087 self.report_warning(msg)
1088
1089 def parse_outtmpl(self):
1090 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1091 self._parse_outtmpl()
1092 return self.params['outtmpl']
1093
1094 def _parse_outtmpl(self):
1095 sanitize = IDENTITY
1096 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1097 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1098
1099 outtmpl = self.params.setdefault('outtmpl', {})
1100 if not isinstance(outtmpl, dict):
1101 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1102 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1103
1104 def get_output_path(self, dir_type='', filename=None):
1105 paths = self.params.get('paths', {})
1106 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1107 path = os.path.join(
1108 expand_path(paths.get('home', '').strip()),
1109 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1110 filename or '')
1111 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1112
1113 @staticmethod
1114 def _outtmpl_expandpath(outtmpl):
1115 # expand_path translates '%%' into '%' and '$$' into '$'
1116 # correspondingly that is not what we want since we need to keep
1117 # '%%' intact for template dict substitution step. Working around
1118 # with boundary-alike separator hack.
1119 sep = ''.join(random.choices(string.ascii_letters, k=32))
1120 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1121
1122 # outtmpl should be expand_path'ed before template dict substitution
1123 # because meta fields may contain env variables we don't want to
1124 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1125 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1126 return expand_path(outtmpl).replace(sep, '')
1127
1128 @staticmethod
1129 def escape_outtmpl(outtmpl):
1130 ''' Escape any remaining strings like %s, %abc% etc. '''
1131 return re.sub(
1132 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1133 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1134 outtmpl)
1135
1136 @classmethod
1137 def validate_outtmpl(cls, outtmpl):
1138 ''' @return None or Exception object '''
1139 outtmpl = re.sub(
1140 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1141 lambda mobj: f'{mobj.group(0)[:-1]}s',
1142 cls._outtmpl_expandpath(outtmpl))
1143 try:
1144 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1145 return None
1146 except ValueError as err:
1147 return err
1148
1149 @staticmethod
1150 def _copy_infodict(info_dict):
1151 info_dict = dict(info_dict)
1152 info_dict.pop('__postprocessors', None)
1153 info_dict.pop('__pending_error', None)
1154 return info_dict
1155
1156 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1157 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1158 @param sanitize Whether to sanitize the output as a filename.
1159 For backward compatibility, a function can also be passed
1160 """
1161
1162 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1163
1164 info_dict = self._copy_infodict(info_dict)
1165 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1166 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1167 if info_dict.get('duration', None) is not None
1168 else None)
1169 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1170 info_dict['video_autonumber'] = self._num_videos
1171 if info_dict.get('resolution') is None:
1172 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1173
1174 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1175 # of %(field)s to %(field)0Nd for backward compatibility
1176 field_size_compat_map = {
1177 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1178 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1179 'autonumber': self.params.get('autonumber_size') or 5,
1180 }
1181
1182 TMPL_DICT = {}
1183 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1184 MATH_FUNCTIONS = {
1185 '+': float.__add__,
1186 '-': float.__sub__,
1187 }
1188 # Field is of the form key1.key2...
1189 # where keys (except first) can be string, int, slice or "{field, ...}"
1190 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1191 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1192 'inner': FIELD_INNER_RE,
1193 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1194 }
1195 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1196 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1197 INTERNAL_FORMAT_RE = re.compile(rf'''(?xs)
1198 (?P<negate>-)?
1199 (?P<fields>{FIELD_RE})
1200 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1201 (?:>(?P<strf_format>.+?))?
1202 (?P<remaining>
1203 (?P<alternate>(?<!\\),[^|&)]+)?
1204 (?:&(?P<replacement>.*?))?
1205 (?:\|(?P<default>.*?))?
1206 )$''')
1207
1208 def _traverse_infodict(fields):
1209 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1210 for f in ([x] if x.startswith('{') else x.split('.'))]
1211 for i in (0, -1):
1212 if fields and not fields[i]:
1213 fields.pop(i)
1214
1215 for i, f in enumerate(fields):
1216 if not f.startswith('{'):
1217 continue
1218 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1219 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1220
1221 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1222
1223 def get_value(mdict):
1224 # Object traversal
1225 value = _traverse_infodict(mdict['fields'])
1226 # Negative
1227 if mdict['negate']:
1228 value = float_or_none(value)
1229 if value is not None:
1230 value *= -1
1231 # Do maths
1232 offset_key = mdict['maths']
1233 if offset_key:
1234 value = float_or_none(value)
1235 operator = None
1236 while offset_key:
1237 item = re.match(
1238 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1239 offset_key).group(0)
1240 offset_key = offset_key[len(item):]
1241 if operator is None:
1242 operator = MATH_FUNCTIONS[item]
1243 continue
1244 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1245 offset = float_or_none(item)
1246 if offset is None:
1247 offset = float_or_none(_traverse_infodict(item))
1248 try:
1249 value = operator(value, multiplier * offset)
1250 except (TypeError, ZeroDivisionError):
1251 return None
1252 operator = None
1253 # Datetime formatting
1254 if mdict['strf_format']:
1255 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1256
1257 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1258 if sanitize and value == '':
1259 value = None
1260 return value
1261
1262 na = self.params.get('outtmpl_na_placeholder', 'NA')
1263
1264 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1265 return sanitize_filename(str(value), restricted=restricted, is_id=(
1266 bool(re.search(r'(^|[_.])id(\.|$)', key))
1267 if 'filename-sanitization' in self.params['compat_opts']
1268 else NO_DEFAULT))
1269
1270 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1271 sanitize = bool(sanitize)
1272
1273 def _dumpjson_default(obj):
1274 if isinstance(obj, (set, LazyList)):
1275 return list(obj)
1276 return repr(obj)
1277
1278 class _ReplacementFormatter(string.Formatter):
1279 def get_field(self, field_name, args, kwargs):
1280 if field_name.isdigit():
1281 return args[0], -1
1282 raise ValueError('Unsupported field')
1283
1284 replacement_formatter = _ReplacementFormatter()
1285
1286 def create_key(outer_mobj):
1287 if not outer_mobj.group('has_key'):
1288 return outer_mobj.group(0)
1289 key = outer_mobj.group('key')
1290 mobj = re.match(INTERNAL_FORMAT_RE, key)
1291 value, replacement, default, last_field = None, None, na, ''
1292 while mobj:
1293 mobj = mobj.groupdict()
1294 default = mobj['default'] if mobj['default'] is not None else default
1295 value = get_value(mobj)
1296 last_field, replacement = mobj['fields'], mobj['replacement']
1297 if value is None and mobj['alternate']:
1298 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1299 else:
1300 break
1301
1302 fmt = outer_mobj.group('format')
1303 if fmt == 's' and value is not None and last_field in field_size_compat_map.keys():
1304 fmt = f'0{field_size_compat_map[last_field]:d}d'
1305
1306 if None not in (value, replacement):
1307 try:
1308 value = replacement_formatter.format(replacement, value)
1309 except ValueError:
1310 value, default = None, na
1311
1312 flags = outer_mobj.group('conversion') or ''
1313 str_fmt = f'{fmt[:-1]}s'
1314 if value is None:
1315 value, fmt = default, 's'
1316 elif fmt[-1] == 'l': # list
1317 delim = '\n' if '#' in flags else ', '
1318 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1319 elif fmt[-1] == 'j': # json
1320 value, fmt = json.dumps(
1321 value, default=_dumpjson_default,
1322 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1323 elif fmt[-1] == 'h': # html
1324 value, fmt = escapeHTML(str(value)), str_fmt
1325 elif fmt[-1] == 'q': # quoted
1326 value = map(str, variadic(value) if '#' in flags else [value])
1327 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1328 elif fmt[-1] == 'B': # bytes
1329 value = f'%{str_fmt}'.encode() % str(value).encode()
1330 value, fmt = value.decode('utf-8', 'ignore'), 's'
1331 elif fmt[-1] == 'U': # unicode normalized
1332 value, fmt = unicodedata.normalize(
1333 # "+" = compatibility equivalence, "#" = NFD
1334 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1335 value), str_fmt
1336 elif fmt[-1] == 'D': # decimal suffix
1337 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1338 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1339 factor=1024 if '#' in flags else 1000)
1340 elif fmt[-1] == 'S': # filename sanitization
1341 value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt
1342 elif fmt[-1] == 'c':
1343 if value:
1344 value = str(value)[0]
1345 else:
1346 fmt = str_fmt
1347 elif fmt[-1] not in 'rsa': # numeric
1348 value = float_or_none(value)
1349 if value is None:
1350 value, fmt = default, 's'
1351
1352 if sanitize:
1353 # If value is an object, sanitize might convert it to a string
1354 # So we convert it to repr first
1355 if fmt[-1] == 'r':
1356 value, fmt = repr(value), str_fmt
1357 elif fmt[-1] == 'a':
1358 value, fmt = ascii(value), str_fmt
1359 if fmt[-1] in 'csra':
1360 value = sanitizer(last_field, value)
1361
1362 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1363 TMPL_DICT[key] = value
1364 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1365
1366 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1367
1368 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1369 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1370 return self.escape_outtmpl(outtmpl) % info_dict
1371
1372 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1373 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1374 if outtmpl is None:
1375 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1376 try:
1377 outtmpl = self._outtmpl_expandpath(outtmpl)
1378 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1379 if not filename:
1380 return None
1381
1382 if tmpl_type in ('', 'temp'):
1383 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1384 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1385 filename = replace_extension(filename, ext, final_ext)
1386 elif tmpl_type:
1387 force_ext = OUTTMPL_TYPES[tmpl_type]
1388 if force_ext:
1389 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1390
1391 # https://github.com/blackjack4494/youtube-dlc/issues/85
1392 trim_file_name = self.params.get('trim_file_name', False)
1393 if trim_file_name:
1394 no_ext, *ext = filename.rsplit('.', 2)
1395 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1396
1397 return filename
1398 except ValueError as err:
1399 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1400 return None
1401
1402 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1403 """Generate the output filename"""
1404 if outtmpl:
1405 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1406 dir_type = None
1407 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1408 if not filename and dir_type not in ('', 'temp'):
1409 return ''
1410
1411 if warn:
1412 if not self.params.get('paths'):
1413 pass
1414 elif filename == '-':
1415 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1416 elif os.path.isabs(filename):
1417 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1418 if filename == '-' or not filename:
1419 return filename
1420
1421 return self.get_output_path(dir_type, filename)
1422
1423 def _match_entry(self, info_dict, incomplete=False, silent=False):
1424 """Returns None if the file should be downloaded"""
1425 _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video')
1426 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1427
1428 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1429
1430 def check_filter():
1431 if _type in ('playlist', 'multi_video'):
1432 return
1433 elif _type in ('url', 'url_transparent') and not try_call(
1434 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1435 return
1436
1437 if 'title' in info_dict:
1438 # This can happen when we're just evaluating the playlist
1439 title = info_dict['title']
1440 matchtitle = self.params.get('matchtitle', False)
1441 if matchtitle:
1442 if not re.search(matchtitle, title, re.IGNORECASE):
1443 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1444 rejecttitle = self.params.get('rejecttitle', False)
1445 if rejecttitle:
1446 if re.search(rejecttitle, title, re.IGNORECASE):
1447 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1448
1449 date = info_dict.get('upload_date')
1450 if date is not None:
1451 dateRange = self.params.get('daterange', DateRange())
1452 if date not in dateRange:
1453 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1454 view_count = info_dict.get('view_count')
1455 if view_count is not None:
1456 min_views = self.params.get('min_views')
1457 if min_views is not None and view_count < min_views:
1458 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1459 max_views = self.params.get('max_views')
1460 if max_views is not None and view_count > max_views:
1461 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1462 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1463 return 'Skipping "%s" because it is age restricted' % video_title
1464
1465 match_filter = self.params.get('match_filter')
1466 if match_filter is None:
1467 return None
1468
1469 cancelled = None
1470 try:
1471 try:
1472 ret = match_filter(info_dict, incomplete=incomplete)
1473 except TypeError:
1474 # For backward compatibility
1475 ret = None if incomplete else match_filter(info_dict)
1476 except DownloadCancelled as err:
1477 if err.msg is not NO_DEFAULT:
1478 raise
1479 ret, cancelled = err.msg, err
1480
1481 if ret is NO_DEFAULT:
1482 while True:
1483 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1484 reply = input(self._format_screen(
1485 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1486 if reply in {'y', ''}:
1487 return None
1488 elif reply == 'n':
1489 if cancelled:
1490 raise type(cancelled)(f'Skipping {video_title}')
1491 return f'Skipping {video_title}'
1492 return ret
1493
1494 if self.in_download_archive(info_dict):
1495 reason = ''.join((
1496 format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '),
1497 format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '),
1498 'has already been recorded in the archive'))
1499 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1500 else:
1501 try:
1502 reason = check_filter()
1503 except DownloadCancelled as e:
1504 reason, break_opt, break_err = e.msg, 'match_filter', type(e)
1505 else:
1506 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1507 if reason is not None:
1508 if not silent:
1509 self.to_screen('[download] ' + reason)
1510 if self.params.get(break_opt, False):
1511 raise break_err()
1512 return reason
1513
1514 @staticmethod
1515 def add_extra_info(info_dict, extra_info):
1516 '''Set the keys from extra_info in info dict if they are missing'''
1517 for key, value in extra_info.items():
1518 info_dict.setdefault(key, value)
1519
1520 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1521 process=True, force_generic_extractor=False):
1522 """
1523 Extract and return the information dictionary of the URL
1524
1525 Arguments:
1526 @param url URL to extract
1527
1528 Keyword arguments:
1529 @param download Whether to download videos
1530 @param process Whether to resolve all unresolved references (URLs, playlist items).
1531 Must be True for download to work
1532 @param ie_key Use only the extractor with this key
1533
1534 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1535 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1536 """
1537
1538 if extra_info is None:
1539 extra_info = {}
1540
1541 if not ie_key and force_generic_extractor:
1542 ie_key = 'Generic'
1543
1544 if ie_key:
1545 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1546 else:
1547 ies = self._ies
1548
1549 for key, ie in ies.items():
1550 if not ie.suitable(url):
1551 continue
1552
1553 if not ie.working():
1554 self.report_warning('The program functionality for this site has been marked as broken, '
1555 'and will probably not work.')
1556
1557 temp_id = ie.get_temp_id(url)
1558 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1559 self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: '
1560 'has already been recorded in the archive')
1561 if self.params.get('break_on_existing', False):
1562 raise ExistingVideoReached()
1563 break
1564 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1565 else:
1566 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1567 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1568 tb=False if extractors_restricted else None)
1569
1570 def _handle_extraction_exceptions(func):
1571 @functools.wraps(func)
1572 def wrapper(self, *args, **kwargs):
1573 while True:
1574 try:
1575 return func(self, *args, **kwargs)
1576 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1577 raise
1578 except ReExtractInfo as e:
1579 if e.expected:
1580 self.to_screen(f'{e}; Re-extracting data')
1581 else:
1582 self.to_stderr('\r')
1583 self.report_warning(f'{e}; Re-extracting data')
1584 continue
1585 except GeoRestrictedError as e:
1586 msg = e.msg
1587 if e.countries:
1588 msg += '\nThis video is available in %s.' % ', '.join(
1589 map(ISO3166Utils.short2full, e.countries))
1590 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1591 self.report_error(msg)
1592 except ExtractorError as e: # An error we somewhat expected
1593 self.report_error(str(e), e.format_traceback())
1594 except Exception as e:
1595 if self.params.get('ignoreerrors'):
1596 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1597 else:
1598 raise
1599 break
1600 return wrapper
1601
1602 def _wait_for_video(self, ie_result={}):
1603 if (not self.params.get('wait_for_video')
1604 or ie_result.get('_type', 'video') != 'video'
1605 or ie_result.get('formats') or ie_result.get('url')):
1606 return
1607
1608 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1609 last_msg = ''
1610
1611 def progress(msg):
1612 nonlocal last_msg
1613 full_msg = f'{msg}\n'
1614 if not self.params.get('noprogress'):
1615 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1616 elif last_msg:
1617 return
1618 self.to_screen(full_msg, skip_eol=True)
1619 last_msg = msg
1620
1621 min_wait, max_wait = self.params.get('wait_for_video')
1622 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1623 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1624 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1625 self.report_warning('Release time of video is not known')
1626 elif ie_result and (diff or 0) <= 0:
1627 self.report_warning('Video should already be available according to extracted info')
1628 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1629 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1630
1631 wait_till = time.time() + diff
1632 try:
1633 while True:
1634 diff = wait_till - time.time()
1635 if diff <= 0:
1636 progress('')
1637 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1638 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1639 time.sleep(1)
1640 except KeyboardInterrupt:
1641 progress('')
1642 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1643 except BaseException as e:
1644 if not isinstance(e, ReExtractInfo):
1645 self.to_screen('')
1646 raise
1647
1648 def _load_cookies(self, data, *, from_headers=True):
1649 """Loads cookies from a `Cookie` header
1650
1651 This tries to work around the security vulnerability of passing cookies to every domain.
1652 See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
1653 The unscoped cookies are saved for later to be stored in the jar with a limited scope.
1654
1655 @param data The Cookie header as string to load the cookies from
1656 @param from_headers If `False`, allows Set-Cookie syntax in the cookie string (at least a domain will be required)
1657 """
1658 for cookie in LenientSimpleCookie(data).values():
1659 if from_headers and any(cookie.values()):
1660 raise ValueError('Invalid syntax in Cookie Header')
1661
1662 domain = cookie.get('domain') or ''
1663 expiry = cookie.get('expires')
1664 if expiry == '': # 0 is valid
1665 expiry = None
1666 prepared_cookie = http.cookiejar.Cookie(
1667 cookie.get('version') or 0, cookie.key, cookie.value, None, False,
1668 domain, True, True, cookie.get('path') or '', bool(cookie.get('path')),
1669 cookie.get('secure') or False, expiry, False, None, None, {})
1670
1671 if domain:
1672 self.cookiejar.set_cookie(prepared_cookie)
1673 elif from_headers:
1674 self.deprecated_feature(
1675 'Passing cookies as a header is a potential security risk; '
1676 'they will be scoped to the domain of the downloaded urls. '
1677 'Please consider loading cookies from a file or browser instead.')
1678 self.__header_cookies.append(prepared_cookie)
1679 else:
1680 self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping',
1681 tb=False, is_error=False)
1682
1683 def _apply_header_cookies(self, url):
1684 """Applies stray header cookies to the provided url
1685
1686 This loads header cookies and scopes them to the domain provided in `url`.
1687 While this is not ideal, it helps reduce the risk of them being sent
1688 to an unintended destination while mostly maintaining compatibility.
1689 """
1690 parsed = urllib.parse.urlparse(url)
1691 if not parsed.hostname:
1692 return
1693
1694 for cookie in map(copy.copy, self.__header_cookies):
1695 cookie.domain = f'.{parsed.hostname}'
1696 self.cookiejar.set_cookie(cookie)
1697
1698 @_handle_extraction_exceptions
1699 def __extract_info(self, url, ie, download, extra_info, process):
1700 self._apply_header_cookies(url)
1701
1702 try:
1703 ie_result = ie.extract(url)
1704 except UserNotLive as e:
1705 if process:
1706 if self.params.get('wait_for_video'):
1707 self.report_warning(e)
1708 self._wait_for_video()
1709 raise
1710 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1711 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1712 return
1713 if isinstance(ie_result, list):
1714 # Backwards compatibility: old IE result format
1715 ie_result = {
1716 '_type': 'compat_list',
1717 'entries': ie_result,
1718 }
1719 if extra_info.get('original_url'):
1720 ie_result.setdefault('original_url', extra_info['original_url'])
1721 self.add_default_extra_info(ie_result, ie, url)
1722 if process:
1723 self._wait_for_video(ie_result)
1724 return self.process_ie_result(ie_result, download, extra_info)
1725 else:
1726 return ie_result
1727
1728 def add_default_extra_info(self, ie_result, ie, url):
1729 if url is not None:
1730 self.add_extra_info(ie_result, {
1731 'webpage_url': url,
1732 'original_url': url,
1733 })
1734 webpage_url = ie_result.get('webpage_url')
1735 if webpage_url:
1736 self.add_extra_info(ie_result, {
1737 'webpage_url_basename': url_basename(webpage_url),
1738 'webpage_url_domain': get_domain(webpage_url),
1739 })
1740 if ie is not None:
1741 self.add_extra_info(ie_result, {
1742 'extractor': ie.IE_NAME,
1743 'extractor_key': ie.ie_key(),
1744 })
1745
1746 def process_ie_result(self, ie_result, download=True, extra_info=None):
1747 """
1748 Take the result of the ie(may be modified) and resolve all unresolved
1749 references (URLs, playlist items).
1750
1751 It will also download the videos if 'download'.
1752 Returns the resolved ie_result.
1753 """
1754 if extra_info is None:
1755 extra_info = {}
1756 result_type = ie_result.get('_type', 'video')
1757
1758 if result_type in ('url', 'url_transparent'):
1759 ie_result['url'] = sanitize_url(
1760 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1761 if ie_result.get('original_url') and not extra_info.get('original_url'):
1762 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1763
1764 extract_flat = self.params.get('extract_flat', False)
1765 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1766 or extract_flat is True):
1767 info_copy = ie_result.copy()
1768 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1769 if ie and not ie_result.get('id'):
1770 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1771 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1772 self.add_extra_info(info_copy, extra_info)
1773 info_copy, _ = self.pre_process(info_copy)
1774 self._fill_common_fields(info_copy, False)
1775 self.__forced_printings(info_copy)
1776 self._raise_pending_errors(info_copy)
1777 if self.params.get('force_write_download_archive', False):
1778 self.record_download_archive(info_copy)
1779 return ie_result
1780
1781 if result_type == 'video':
1782 self.add_extra_info(ie_result, extra_info)
1783 ie_result = self.process_video_result(ie_result, download=download)
1784 self._raise_pending_errors(ie_result)
1785 additional_urls = (ie_result or {}).get('additional_urls')
1786 if additional_urls:
1787 # TODO: Improve MetadataParserPP to allow setting a list
1788 if isinstance(additional_urls, str):
1789 additional_urls = [additional_urls]
1790 self.to_screen(
1791 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1792 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1793 ie_result['additional_entries'] = [
1794 self.extract_info(
1795 url, download, extra_info=extra_info,
1796 force_generic_extractor=self.params.get('force_generic_extractor'))
1797 for url in additional_urls
1798 ]
1799 return ie_result
1800 elif result_type == 'url':
1801 # We have to add extra_info to the results because it may be
1802 # contained in a playlist
1803 return self.extract_info(
1804 ie_result['url'], download,
1805 ie_key=ie_result.get('ie_key'),
1806 extra_info=extra_info)
1807 elif result_type == 'url_transparent':
1808 # Use the information from the embedding page
1809 info = self.extract_info(
1810 ie_result['url'], ie_key=ie_result.get('ie_key'),
1811 extra_info=extra_info, download=False, process=False)
1812
1813 # extract_info may return None when ignoreerrors is enabled and
1814 # extraction failed with an error, don't crash and return early
1815 # in this case
1816 if not info:
1817 return info
1818
1819 exempted_fields = {'_type', 'url', 'ie_key'}
1820 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1821 # For video clips, the id etc of the clip extractor should be used
1822 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1823
1824 new_result = info.copy()
1825 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1826
1827 # Extracted info may not be a video result (i.e.
1828 # info.get('_type', 'video') != video) but rather an url or
1829 # url_transparent. In such cases outer metadata (from ie_result)
1830 # should be propagated to inner one (info). For this to happen
1831 # _type of info should be overridden with url_transparent. This
1832 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1833 if new_result.get('_type') == 'url':
1834 new_result['_type'] = 'url_transparent'
1835
1836 return self.process_ie_result(
1837 new_result, download=download, extra_info=extra_info)
1838 elif result_type in ('playlist', 'multi_video'):
1839 # Protect from infinite recursion due to recursively nested playlists
1840 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1841 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1842 if webpage_url and webpage_url in self._playlist_urls:
1843 self.to_screen(
1844 '[download] Skipping already downloaded playlist: %s'
1845 % ie_result.get('title') or ie_result.get('id'))
1846 return
1847
1848 self._playlist_level += 1
1849 self._playlist_urls.add(webpage_url)
1850 self._fill_common_fields(ie_result, False)
1851 self._sanitize_thumbnails(ie_result)
1852 try:
1853 return self.__process_playlist(ie_result, download)
1854 finally:
1855 self._playlist_level -= 1
1856 if not self._playlist_level:
1857 self._playlist_urls.clear()
1858 elif result_type == 'compat_list':
1859 self.report_warning(
1860 'Extractor %s returned a compat_list result. '
1861 'It needs to be updated.' % ie_result.get('extractor'))
1862
1863 def _fixup(r):
1864 self.add_extra_info(r, {
1865 'extractor': ie_result['extractor'],
1866 'webpage_url': ie_result['webpage_url'],
1867 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1868 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1869 'extractor_key': ie_result['extractor_key'],
1870 })
1871 return r
1872 ie_result['entries'] = [
1873 self.process_ie_result(_fixup(r), download, extra_info)
1874 for r in ie_result['entries']
1875 ]
1876 return ie_result
1877 else:
1878 raise Exception('Invalid result type: %s' % result_type)
1879
1880 def _ensure_dir_exists(self, path):
1881 return make_dir(path, self.report_error)
1882
1883 @staticmethod
1884 def _playlist_infodict(ie_result, strict=False, **kwargs):
1885 info = {
1886 'playlist_count': ie_result.get('playlist_count'),
1887 'playlist': ie_result.get('title') or ie_result.get('id'),
1888 'playlist_id': ie_result.get('id'),
1889 'playlist_title': ie_result.get('title'),
1890 'playlist_uploader': ie_result.get('uploader'),
1891 'playlist_uploader_id': ie_result.get('uploader_id'),
1892 **kwargs,
1893 }
1894 if strict:
1895 return info
1896 if ie_result.get('webpage_url'):
1897 info.update({
1898 'webpage_url': ie_result['webpage_url'],
1899 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1900 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1901 })
1902 return {
1903 **info,
1904 'playlist_index': 0,
1905 '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)),
1906 'extractor': ie_result['extractor'],
1907 'extractor_key': ie_result['extractor_key'],
1908 }
1909
1910 def __process_playlist(self, ie_result, download):
1911 """Process each entry in the playlist"""
1912 assert ie_result['_type'] in ('playlist', 'multi_video')
1913
1914 common_info = self._playlist_infodict(ie_result, strict=True)
1915 title = common_info.get('playlist') or '<Untitled>'
1916 if self._match_entry(common_info, incomplete=True) is not None:
1917 return
1918 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1919
1920 all_entries = PlaylistEntries(self, ie_result)
1921 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1922
1923 lazy = self.params.get('lazy_playlist')
1924 if lazy:
1925 resolved_entries, n_entries = [], 'N/A'
1926 ie_result['requested_entries'], ie_result['entries'] = None, None
1927 else:
1928 entries = resolved_entries = list(entries)
1929 n_entries = len(resolved_entries)
1930 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1931 if not ie_result.get('playlist_count'):
1932 # Better to do this after potentially exhausting entries
1933 ie_result['playlist_count'] = all_entries.get_full_count()
1934
1935 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1936 ie_copy = collections.ChainMap(ie_result, extra)
1937
1938 _infojson_written = False
1939 write_playlist_files = self.params.get('allow_playlist_files', True)
1940 if write_playlist_files and self.params.get('list_thumbnails'):
1941 self.list_thumbnails(ie_result)
1942 if write_playlist_files and not self.params.get('simulate'):
1943 _infojson_written = self._write_info_json(
1944 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1945 if _infojson_written is None:
1946 return
1947 if self._write_description('playlist', ie_result,
1948 self.prepare_filename(ie_copy, 'pl_description')) is None:
1949 return
1950 # TODO: This should be passed to ThumbnailsConvertor if necessary
1951 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1952
1953 if lazy:
1954 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1955 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1956 elif self.params.get('playlistreverse'):
1957 entries.reverse()
1958 elif self.params.get('playlistrandom'):
1959 random.shuffle(entries)
1960
1961 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1962 f'{format_field(ie_result, "playlist_count", " of %s")}')
1963
1964 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1965 if self.params.get('extract_flat') == 'discard_in_playlist':
1966 keep_resolved_entries = ie_result['_type'] != 'playlist'
1967 if keep_resolved_entries:
1968 self.write_debug('The information of all playlist entries will be held in memory')
1969
1970 failures = 0
1971 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1972 for i, (playlist_index, entry) in enumerate(entries):
1973 if lazy:
1974 resolved_entries.append((playlist_index, entry))
1975 if not entry:
1976 continue
1977
1978 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1979 if not lazy and 'playlist-index' in self.params['compat_opts']:
1980 playlist_index = ie_result['requested_entries'][i]
1981
1982 entry_copy = collections.ChainMap(entry, {
1983 **common_info,
1984 'n_entries': int_or_none(n_entries),
1985 'playlist_index': playlist_index,
1986 'playlist_autonumber': i + 1,
1987 })
1988
1989 if self._match_entry(entry_copy, incomplete=True) is not None:
1990 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1991 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1992 continue
1993
1994 self.to_screen('[download] Downloading item %s of %s' % (
1995 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1996
1997 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1998 'playlist_index': playlist_index,
1999 'playlist_autonumber': i + 1,
2000 }, extra))
2001 if not entry_result:
2002 failures += 1
2003 if failures >= max_failures:
2004 self.report_error(
2005 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
2006 break
2007 if keep_resolved_entries:
2008 resolved_entries[i] = (playlist_index, entry_result)
2009
2010 # Update with processed data
2011 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
2012 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
2013 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
2014 # Do not set for full playlist
2015 ie_result.pop('requested_entries')
2016
2017 # Write the updated info to json
2018 if _infojson_written is True and self._write_info_json(
2019 'updated playlist', ie_result,
2020 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
2021 return
2022
2023 ie_result = self.run_all_pps('playlist', ie_result)
2024 self.to_screen(f'[download] Finished downloading playlist: {title}')
2025 return ie_result
2026
2027 @_handle_extraction_exceptions
2028 def __process_iterable_entry(self, entry, download, extra_info):
2029 return self.process_ie_result(
2030 entry, download=download, extra_info=extra_info)
2031
2032 def _build_format_filter(self, filter_spec):
2033 " Returns a function to filter the formats according to the filter_spec "
2034
2035 OPERATORS = {
2036 '<': operator.lt,
2037 '<=': operator.le,
2038 '>': operator.gt,
2039 '>=': operator.ge,
2040 '=': operator.eq,
2041 '!=': operator.ne,
2042 }
2043 operator_rex = re.compile(r'''(?x)\s*
2044 (?P<key>[\w.-]+)\s*
2045 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
2046 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
2047 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
2048 m = operator_rex.fullmatch(filter_spec)
2049 if m:
2050 try:
2051 comparison_value = int(m.group('value'))
2052 except ValueError:
2053 comparison_value = parse_filesize(m.group('value'))
2054 if comparison_value is None:
2055 comparison_value = parse_filesize(m.group('value') + 'B')
2056 if comparison_value is None:
2057 raise ValueError(
2058 'Invalid value %r in format specification %r' % (
2059 m.group('value'), filter_spec))
2060 op = OPERATORS[m.group('op')]
2061
2062 if not m:
2063 STR_OPERATORS = {
2064 '=': operator.eq,
2065 '^=': lambda attr, value: attr.startswith(value),
2066 '$=': lambda attr, value: attr.endswith(value),
2067 '*=': lambda attr, value: value in attr,
2068 '~=': lambda attr, value: value.search(attr) is not None
2069 }
2070 str_operator_rex = re.compile(r'''(?x)\s*
2071 (?P<key>[a-zA-Z0-9._-]+)\s*
2072 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
2073 (?P<quote>["'])?
2074 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
2075 (?(quote)(?P=quote))\s*
2076 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
2077 m = str_operator_rex.fullmatch(filter_spec)
2078 if m:
2079 if m.group('op') == '~=':
2080 comparison_value = re.compile(m.group('value'))
2081 else:
2082 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
2083 str_op = STR_OPERATORS[m.group('op')]
2084 if m.group('negation'):
2085 op = lambda attr, value: not str_op(attr, value)
2086 else:
2087 op = str_op
2088
2089 if not m:
2090 raise SyntaxError('Invalid filter specification %r' % filter_spec)
2091
2092 def _filter(f):
2093 actual_value = f.get(m.group('key'))
2094 if actual_value is None:
2095 return m.group('none_inclusive')
2096 return op(actual_value, comparison_value)
2097 return _filter
2098
2099 def _check_formats(self, formats):
2100 for f in formats:
2101 self.to_screen('[info] Testing format %s' % f['format_id'])
2102 path = self.get_output_path('temp')
2103 if not self._ensure_dir_exists(f'{path}/'):
2104 continue
2105 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
2106 temp_file.close()
2107 try:
2108 success, _ = self.dl(temp_file.name, f, test=True)
2109 except (DownloadError, OSError, ValueError) + network_exceptions:
2110 success = False
2111 finally:
2112 if os.path.exists(temp_file.name):
2113 try:
2114 os.remove(temp_file.name)
2115 except OSError:
2116 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
2117 if success:
2118 yield f
2119 else:
2120 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
2121
2122 def _default_format_spec(self, info_dict, download=True):
2123
2124 def can_merge():
2125 merger = FFmpegMergerPP(self)
2126 return merger.available and merger.can_merge()
2127
2128 prefer_best = (
2129 not self.params.get('simulate')
2130 and download
2131 and (
2132 not can_merge()
2133 or info_dict.get('is_live') and not self.params.get('live_from_start')
2134 or self.params['outtmpl']['default'] == '-'))
2135 compat = (
2136 prefer_best
2137 or self.params.get('allow_multiple_audio_streams', False)
2138 or 'format-spec' in self.params['compat_opts'])
2139
2140 return (
2141 'best/bestvideo+bestaudio' if prefer_best
2142 else 'bestvideo*+bestaudio/best' if not compat
2143 else 'bestvideo+bestaudio/best')
2144
2145 def build_format_selector(self, format_spec):
2146 def syntax_error(note, start):
2147 message = (
2148 'Invalid format specification: '
2149 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2150 return SyntaxError(message)
2151
2152 PICKFIRST = 'PICKFIRST'
2153 MERGE = 'MERGE'
2154 SINGLE = 'SINGLE'
2155 GROUP = 'GROUP'
2156 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2157
2158 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2159 'video': self.params.get('allow_multiple_video_streams', False)}
2160
2161 def _parse_filter(tokens):
2162 filter_parts = []
2163 for type, string_, start, _, _ in tokens:
2164 if type == tokenize.OP and string_ == ']':
2165 return ''.join(filter_parts)
2166 else:
2167 filter_parts.append(string_)
2168
2169 def _remove_unused_ops(tokens):
2170 # Remove operators that we don't use and join them with the surrounding strings.
2171 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2172 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2173 last_string, last_start, last_end, last_line = None, None, None, None
2174 for type, string_, start, end, line in tokens:
2175 if type == tokenize.OP and string_ == '[':
2176 if last_string:
2177 yield tokenize.NAME, last_string, last_start, last_end, last_line
2178 last_string = None
2179 yield type, string_, start, end, line
2180 # everything inside brackets will be handled by _parse_filter
2181 for type, string_, start, end, line in tokens:
2182 yield type, string_, start, end, line
2183 if type == tokenize.OP and string_ == ']':
2184 break
2185 elif type == tokenize.OP and string_ in ALLOWED_OPS:
2186 if last_string:
2187 yield tokenize.NAME, last_string, last_start, last_end, last_line
2188 last_string = None
2189 yield type, string_, start, end, line
2190 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2191 if not last_string:
2192 last_string = string_
2193 last_start = start
2194 last_end = end
2195 else:
2196 last_string += string_
2197 if last_string:
2198 yield tokenize.NAME, last_string, last_start, last_end, last_line
2199
2200 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2201 selectors = []
2202 current_selector = None
2203 for type, string_, start, _, _ in tokens:
2204 # ENCODING is only defined in python 3.x
2205 if type == getattr(tokenize, 'ENCODING', None):
2206 continue
2207 elif type in [tokenize.NAME, tokenize.NUMBER]:
2208 current_selector = FormatSelector(SINGLE, string_, [])
2209 elif type == tokenize.OP:
2210 if string_ == ')':
2211 if not inside_group:
2212 # ')' will be handled by the parentheses group
2213 tokens.restore_last_token()
2214 break
2215 elif inside_merge and string_ in ['/', ',']:
2216 tokens.restore_last_token()
2217 break
2218 elif inside_choice and string_ == ',':
2219 tokens.restore_last_token()
2220 break
2221 elif string_ == ',':
2222 if not current_selector:
2223 raise syntax_error('"," must follow a format selector', start)
2224 selectors.append(current_selector)
2225 current_selector = None
2226 elif string_ == '/':
2227 if not current_selector:
2228 raise syntax_error('"/" must follow a format selector', start)
2229 first_choice = current_selector
2230 second_choice = _parse_format_selection(tokens, inside_choice=True)
2231 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2232 elif string_ == '[':
2233 if not current_selector:
2234 current_selector = FormatSelector(SINGLE, 'best', [])
2235 format_filter = _parse_filter(tokens)
2236 current_selector.filters.append(format_filter)
2237 elif string_ == '(':
2238 if current_selector:
2239 raise syntax_error('Unexpected "("', start)
2240 group = _parse_format_selection(tokens, inside_group=True)
2241 current_selector = FormatSelector(GROUP, group, [])
2242 elif string_ == '+':
2243 if not current_selector:
2244 raise syntax_error('Unexpected "+"', start)
2245 selector_1 = current_selector
2246 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2247 if not selector_2:
2248 raise syntax_error('Expected a selector', start)
2249 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2250 else:
2251 raise syntax_error(f'Operator not recognized: "{string_}"', start)
2252 elif type == tokenize.ENDMARKER:
2253 break
2254 if current_selector:
2255 selectors.append(current_selector)
2256 return selectors
2257
2258 def _merge(formats_pair):
2259 format_1, format_2 = formats_pair
2260
2261 formats_info = []
2262 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2263 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2264
2265 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2266 get_no_more = {'video': False, 'audio': False}
2267 for (i, fmt_info) in enumerate(formats_info):
2268 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2269 formats_info.pop(i)
2270 continue
2271 for aud_vid in ['audio', 'video']:
2272 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2273 if get_no_more[aud_vid]:
2274 formats_info.pop(i)
2275 break
2276 get_no_more[aud_vid] = True
2277
2278 if len(formats_info) == 1:
2279 return formats_info[0]
2280
2281 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2282 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2283
2284 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2285 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2286
2287 output_ext = get_compatible_ext(
2288 vcodecs=[f.get('vcodec') for f in video_fmts],
2289 acodecs=[f.get('acodec') for f in audio_fmts],
2290 vexts=[f['ext'] for f in video_fmts],
2291 aexts=[f['ext'] for f in audio_fmts],
2292 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2293 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2294
2295 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2296
2297 new_dict = {
2298 'requested_formats': formats_info,
2299 'format': '+'.join(filtered('format')),
2300 'format_id': '+'.join(filtered('format_id')),
2301 'ext': output_ext,
2302 'protocol': '+'.join(map(determine_protocol, formats_info)),
2303 'language': '+'.join(orderedSet(filtered('language'))) or None,
2304 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2305 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2306 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2307 }
2308
2309 if the_only_video:
2310 new_dict.update({
2311 'width': the_only_video.get('width'),
2312 'height': the_only_video.get('height'),
2313 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2314 'fps': the_only_video.get('fps'),
2315 'dynamic_range': the_only_video.get('dynamic_range'),
2316 'vcodec': the_only_video.get('vcodec'),
2317 'vbr': the_only_video.get('vbr'),
2318 'stretched_ratio': the_only_video.get('stretched_ratio'),
2319 'aspect_ratio': the_only_video.get('aspect_ratio'),
2320 })
2321
2322 if the_only_audio:
2323 new_dict.update({
2324 'acodec': the_only_audio.get('acodec'),
2325 'abr': the_only_audio.get('abr'),
2326 'asr': the_only_audio.get('asr'),
2327 'audio_channels': the_only_audio.get('audio_channels')
2328 })
2329
2330 return new_dict
2331
2332 def _check_formats(formats):
2333 if (self.params.get('check_formats') is not None
2334 or self.params.get('allow_unplayable_formats')):
2335 yield from formats
2336 return
2337 elif self.params.get('check_formats') == 'selected':
2338 yield from self._check_formats(formats)
2339 return
2340
2341 for f in formats:
2342 if f.get('has_drm'):
2343 yield from self._check_formats([f])
2344 else:
2345 yield f
2346
2347 def _build_selector_function(selector):
2348 if isinstance(selector, list): # ,
2349 fs = [_build_selector_function(s) for s in selector]
2350
2351 def selector_function(ctx):
2352 for f in fs:
2353 yield from f(ctx)
2354 return selector_function
2355
2356 elif selector.type == GROUP: # ()
2357 selector_function = _build_selector_function(selector.selector)
2358
2359 elif selector.type == PICKFIRST: # /
2360 fs = [_build_selector_function(s) for s in selector.selector]
2361
2362 def selector_function(ctx):
2363 for f in fs:
2364 picked_formats = list(f(ctx))
2365 if picked_formats:
2366 return picked_formats
2367 return []
2368
2369 elif selector.type == MERGE: # +
2370 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2371
2372 def selector_function(ctx):
2373 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2374 yield _merge(pair)
2375
2376 elif selector.type == SINGLE: # atom
2377 format_spec = selector.selector or 'best'
2378
2379 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2380 if format_spec == 'all':
2381 def selector_function(ctx):
2382 yield from _check_formats(ctx['formats'][::-1])
2383 elif format_spec == 'mergeall':
2384 def selector_function(ctx):
2385 formats = list(_check_formats(
2386 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2387 if not formats:
2388 return
2389 merged_format = formats[-1]
2390 for f in formats[-2::-1]:
2391 merged_format = _merge((merged_format, f))
2392 yield merged_format
2393
2394 else:
2395 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2396 mobj = re.match(
2397 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2398 format_spec)
2399 if mobj is not None:
2400 format_idx = int_or_none(mobj.group('n'), default=1)
2401 format_reverse = mobj.group('bw')[0] == 'b'
2402 format_type = (mobj.group('type') or [None])[0]
2403 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2404 format_modified = mobj.group('mod') is not None
2405
2406 format_fallback = not format_type and not format_modified # for b, w
2407 _filter_f = (
2408 (lambda f: f.get('%scodec' % format_type) != 'none')
2409 if format_type and format_modified # bv*, ba*, wv*, wa*
2410 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2411 if format_type # bv, ba, wv, wa
2412 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2413 if not format_modified # b, w
2414 else lambda f: True) # b*, w*
2415 filter_f = lambda f: _filter_f(f) and (
2416 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2417 else:
2418 if format_spec in self._format_selection_exts['audio']:
2419 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2420 elif format_spec in self._format_selection_exts['video']:
2421 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2422 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2423 elif format_spec in self._format_selection_exts['storyboards']:
2424 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2425 else:
2426 filter_f = lambda f: f.get('format_id') == format_spec # id
2427
2428 def selector_function(ctx):
2429 formats = list(ctx['formats'])
2430 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2431 if not matches:
2432 if format_fallback and ctx['incomplete_formats']:
2433 # for extractors with incomplete formats (audio only (soundcloud)
2434 # or video only (imgur)) best/worst will fallback to
2435 # best/worst {video,audio}-only format
2436 matches = formats
2437 elif seperate_fallback and not ctx['has_merged_format']:
2438 # for compatibility with youtube-dl when there is no pre-merged format
2439 matches = list(filter(seperate_fallback, formats))
2440 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2441 try:
2442 yield matches[format_idx - 1]
2443 except LazyList.IndexError:
2444 return
2445
2446 filters = [self._build_format_filter(f) for f in selector.filters]
2447
2448 def final_selector(ctx):
2449 ctx_copy = dict(ctx)
2450 for _filter in filters:
2451 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2452 return selector_function(ctx_copy)
2453 return final_selector
2454
2455 stream = io.BytesIO(format_spec.encode())
2456 try:
2457 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2458 except tokenize.TokenError:
2459 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2460
2461 class TokenIterator:
2462 def __init__(self, tokens):
2463 self.tokens = tokens
2464 self.counter = 0
2465
2466 def __iter__(self):
2467 return self
2468
2469 def __next__(self):
2470 if self.counter >= len(self.tokens):
2471 raise StopIteration()
2472 value = self.tokens[self.counter]
2473 self.counter += 1
2474 return value
2475
2476 next = __next__
2477
2478 def restore_last_token(self):
2479 self.counter -= 1
2480
2481 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2482 return _build_selector_function(parsed_selector)
2483
2484 def _calc_headers(self, info_dict):
2485 res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers'))
2486 clean_headers(res)
2487 cookies = self.cookiejar.get_cookies_for_url(info_dict['url'])
2488 if cookies:
2489 encoder = LenientSimpleCookie()
2490 values = []
2491 for cookie in cookies:
2492 _, value = encoder.value_encode(cookie.value)
2493 values.append(f'{cookie.name}={value}')
2494 if cookie.domain:
2495 values.append(f'Domain={cookie.domain}')
2496 if cookie.path:
2497 values.append(f'Path={cookie.path}')
2498 if cookie.secure:
2499 values.append('Secure')
2500 if cookie.expires:
2501 values.append(f'Expires={cookie.expires}')
2502 if cookie.version:
2503 values.append(f'Version={cookie.version}')
2504 info_dict['cookies'] = '; '.join(values)
2505
2506 if 'X-Forwarded-For' not in res:
2507 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2508 if x_forwarded_for_ip:
2509 res['X-Forwarded-For'] = x_forwarded_for_ip
2510
2511 return res
2512
2513 def _calc_cookies(self, url):
2514 self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
2515 return self.cookiejar.get_cookie_header(url)
2516
2517 def _sort_thumbnails(self, thumbnails):
2518 thumbnails.sort(key=lambda t: (
2519 t.get('preference') if t.get('preference') is not None else -1,
2520 t.get('width') if t.get('width') is not None else -1,
2521 t.get('height') if t.get('height') is not None else -1,
2522 t.get('id') if t.get('id') is not None else '',
2523 t.get('url')))
2524
2525 def _sanitize_thumbnails(self, info_dict):
2526 thumbnails = info_dict.get('thumbnails')
2527 if thumbnails is None:
2528 thumbnail = info_dict.get('thumbnail')
2529 if thumbnail:
2530 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2531 if not thumbnails:
2532 return
2533
2534 def check_thumbnails(thumbnails):
2535 for t in thumbnails:
2536 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2537 try:
2538 self.urlopen(HEADRequest(t['url']))
2539 except network_exceptions as err:
2540 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2541 continue
2542 yield t
2543
2544 self._sort_thumbnails(thumbnails)
2545 for i, t in enumerate(thumbnails):
2546 if t.get('id') is None:
2547 t['id'] = '%d' % i
2548 if t.get('width') and t.get('height'):
2549 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2550 t['url'] = sanitize_url(t['url'])
2551
2552 if self.params.get('check_formats') is True:
2553 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2554 else:
2555 info_dict['thumbnails'] = thumbnails
2556
2557 def _fill_common_fields(self, info_dict, final=True):
2558 # TODO: move sanitization here
2559 if final:
2560 title = info_dict['fulltitle'] = info_dict.get('title')
2561 if not title:
2562 if title == '':
2563 self.write_debug('Extractor gave empty title. Creating a generic title')
2564 else:
2565 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2566 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2567
2568 if info_dict.get('duration') is not None:
2569 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2570
2571 for ts_key, date_key in (
2572 ('timestamp', 'upload_date'),
2573 ('release_timestamp', 'release_date'),
2574 ('modified_timestamp', 'modified_date'),
2575 ):
2576 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2577 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2578 # see http://bugs.python.org/issue1646728)
2579 with contextlib.suppress(ValueError, OverflowError, OSError):
2580 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2581 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2582
2583 live_keys = ('is_live', 'was_live')
2584 live_status = info_dict.get('live_status')
2585 if live_status is None:
2586 for key in live_keys:
2587 if info_dict.get(key) is False:
2588 continue
2589 if info_dict.get(key):
2590 live_status = key
2591 break
2592 if all(info_dict.get(key) is False for key in live_keys):
2593 live_status = 'not_live'
2594 if live_status:
2595 info_dict['live_status'] = live_status
2596 for key in live_keys:
2597 if info_dict.get(key) is None:
2598 info_dict[key] = (live_status == key)
2599 if live_status == 'post_live':
2600 info_dict['was_live'] = True
2601
2602 # Auto generate title fields corresponding to the *_number fields when missing
2603 # in order to always have clean titles. This is very common for TV series.
2604 for field in ('chapter', 'season', 'episode'):
2605 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2606 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2607
2608 def _raise_pending_errors(self, info):
2609 err = info.pop('__pending_error', None)
2610 if err:
2611 self.report_error(err, tb=False)
2612
2613 def sort_formats(self, info_dict):
2614 formats = self._get_formats(info_dict)
2615 formats.sort(key=FormatSorter(
2616 self, info_dict.get('_format_sort_fields') or []).calculate_preference)
2617
2618 def process_video_result(self, info_dict, download=True):
2619 assert info_dict.get('_type', 'video') == 'video'
2620 self._num_videos += 1
2621
2622 if 'id' not in info_dict:
2623 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2624 elif not info_dict.get('id'):
2625 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2626
2627 def report_force_conversion(field, field_not, conversion):
2628 self.report_warning(
2629 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2630 % (field, field_not, conversion))
2631
2632 def sanitize_string_field(info, string_field):
2633 field = info.get(string_field)
2634 if field is None or isinstance(field, str):
2635 return
2636 report_force_conversion(string_field, 'a string', 'string')
2637 info[string_field] = str(field)
2638
2639 def sanitize_numeric_fields(info):
2640 for numeric_field in self._NUMERIC_FIELDS:
2641 field = info.get(numeric_field)
2642 if field is None or isinstance(field, (int, float)):
2643 continue
2644 report_force_conversion(numeric_field, 'numeric', 'int')
2645 info[numeric_field] = int_or_none(field)
2646
2647 sanitize_string_field(info_dict, 'id')
2648 sanitize_numeric_fields(info_dict)
2649 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2650 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2651 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2652 self.report_warning('"duration" field is negative, there is an error in extractor')
2653
2654 chapters = info_dict.get('chapters') or []
2655 if chapters and chapters[0].get('start_time'):
2656 chapters.insert(0, {'start_time': 0})
2657
2658 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2659 for idx, (prev, current, next_) in enumerate(zip(
2660 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2661 if current.get('start_time') is None:
2662 current['start_time'] = prev.get('end_time')
2663 if not current.get('end_time'):
2664 current['end_time'] = next_.get('start_time')
2665 if not current.get('title'):
2666 current['title'] = f'<Untitled Chapter {idx}>'
2667
2668 if 'playlist' not in info_dict:
2669 # It isn't part of a playlist
2670 info_dict['playlist'] = None
2671 info_dict['playlist_index'] = None
2672
2673 self._sanitize_thumbnails(info_dict)
2674
2675 thumbnail = info_dict.get('thumbnail')
2676 thumbnails = info_dict.get('thumbnails')
2677 if thumbnail:
2678 info_dict['thumbnail'] = sanitize_url(thumbnail)
2679 elif thumbnails:
2680 info_dict['thumbnail'] = thumbnails[-1]['url']
2681
2682 if info_dict.get('display_id') is None and 'id' in info_dict:
2683 info_dict['display_id'] = info_dict['id']
2684
2685 self._fill_common_fields(info_dict)
2686
2687 for cc_kind in ('subtitles', 'automatic_captions'):
2688 cc = info_dict.get(cc_kind)
2689 if cc:
2690 for _, subtitle in cc.items():
2691 for subtitle_format in subtitle:
2692 if subtitle_format.get('url'):
2693 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2694 if subtitle_format.get('ext') is None:
2695 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2696
2697 automatic_captions = info_dict.get('automatic_captions')
2698 subtitles = info_dict.get('subtitles')
2699
2700 info_dict['requested_subtitles'] = self.process_subtitles(
2701 info_dict['id'], subtitles, automatic_captions)
2702
2703 formats = self._get_formats(info_dict)
2704
2705 # Backward compatibility with InfoExtractor._sort_formats
2706 field_preference = (formats or [{}])[0].pop('__sort_fields', None)
2707 if field_preference:
2708 info_dict['_format_sort_fields'] = field_preference
2709
2710 info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it
2711 f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None
2712 if not self.params.get('allow_unplayable_formats'):
2713 formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe']
2714
2715 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2716 self.report_warning(
2717 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2718 'only images are available for download. Use --list-formats to see them'.capitalize())
2719
2720 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2721 if not get_from_start:
2722 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2723 if info_dict.get('is_live') and formats:
2724 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2725 if get_from_start and not formats:
2726 self.raise_no_formats(info_dict, msg=(
2727 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2728 'If you want to download from the current time, use --no-live-from-start'))
2729
2730 def is_wellformed(f):
2731 url = f.get('url')
2732 if not url:
2733 self.report_warning(
2734 '"url" field is missing or empty - skipping format, '
2735 'there is an error in extractor')
2736 return False
2737 if isinstance(url, bytes):
2738 sanitize_string_field(f, 'url')
2739 return True
2740
2741 # Filter out malformed formats for better extraction robustness
2742 formats = list(filter(is_wellformed, formats or []))
2743
2744 if not formats:
2745 self.raise_no_formats(info_dict)
2746
2747 for format in formats:
2748 sanitize_string_field(format, 'format_id')
2749 sanitize_numeric_fields(format)
2750 format['url'] = sanitize_url(format['url'])
2751 if format.get('ext') is None:
2752 format['ext'] = determine_ext(format['url']).lower()
2753 if format.get('protocol') is None:
2754 format['protocol'] = determine_protocol(format)
2755 if format.get('resolution') is None:
2756 format['resolution'] = self.format_resolution(format, default=None)
2757 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2758 format['dynamic_range'] = 'SDR'
2759 if format.get('aspect_ratio') is None:
2760 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2761 if (not format.get('manifest_url') # For fragmented formats, "tbr" is often max bitrate and not average
2762 and info_dict.get('duration') and format.get('tbr')
2763 and not format.get('filesize') and not format.get('filesize_approx')):
2764 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2765 format['http_headers'] = self._calc_headers(collections.ChainMap(format, info_dict))
2766
2767 # This is copied to http_headers by the above _calc_headers and can now be removed
2768 if '__x_forwarded_for_ip' in info_dict:
2769 del info_dict['__x_forwarded_for_ip']
2770
2771 self.sort_formats({
2772 'formats': formats,
2773 '_format_sort_fields': info_dict.get('_format_sort_fields')
2774 })
2775
2776 # Sanitize and group by format_id
2777 formats_dict = {}
2778 for i, format in enumerate(formats):
2779 if not format.get('format_id'):
2780 format['format_id'] = str(i)
2781 else:
2782 # Sanitize format_id from characters used in format selector expression
2783 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2784 formats_dict.setdefault(format['format_id'], []).append(format)
2785
2786 # Make sure all formats have unique format_id
2787 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2788 for format_id, ambiguous_formats in formats_dict.items():
2789 ambigious_id = len(ambiguous_formats) > 1
2790 for i, format in enumerate(ambiguous_formats):
2791 if ambigious_id:
2792 format['format_id'] = '%s-%d' % (format_id, i)
2793 # Ensure there is no conflict between id and ext in format selection
2794 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2795 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2796 format['format_id'] = 'f%s' % format['format_id']
2797
2798 if format.get('format') is None:
2799 format['format'] = '{id} - {res}{note}'.format(
2800 id=format['format_id'],
2801 res=self.format_resolution(format),
2802 note=format_field(format, 'format_note', ' (%s)'),
2803 )
2804
2805 if self.params.get('check_formats') is True:
2806 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2807
2808 if not formats or formats[0] is not info_dict:
2809 # only set the 'formats' fields if the original info_dict list them
2810 # otherwise we end up with a circular reference, the first (and unique)
2811 # element in the 'formats' field in info_dict is info_dict itself,
2812 # which can't be exported to json
2813 info_dict['formats'] = formats
2814
2815 info_dict, _ = self.pre_process(info_dict)
2816
2817 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2818 return info_dict
2819
2820 self.post_extract(info_dict)
2821 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2822
2823 # The pre-processors may have modified the formats
2824 formats = self._get_formats(info_dict)
2825
2826 list_only = self.params.get('simulate') == 'list_only'
2827 interactive_format_selection = not list_only and self.format_selector == '-'
2828 if self.params.get('list_thumbnails'):
2829 self.list_thumbnails(info_dict)
2830 if self.params.get('listsubtitles'):
2831 if 'automatic_captions' in info_dict:
2832 self.list_subtitles(
2833 info_dict['id'], automatic_captions, 'automatic captions')
2834 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2835 if self.params.get('listformats') or interactive_format_selection:
2836 self.list_formats(info_dict)
2837 if list_only:
2838 # Without this printing, -F --print-json will not work
2839 self.__forced_printings(info_dict)
2840 return info_dict
2841
2842 format_selector = self.format_selector
2843 while True:
2844 if interactive_format_selection:
2845 req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS)
2846 + '(Press ENTER for default, or Ctrl+C to quit)'
2847 + self._format_screen(': ', self.Styles.EMPHASIS))
2848 try:
2849 format_selector = self.build_format_selector(req_format) if req_format else None
2850 except SyntaxError as err:
2851 self.report_error(err, tb=False, is_error=False)
2852 continue
2853
2854 if format_selector is None:
2855 req_format = self._default_format_spec(info_dict, download=download)
2856 self.write_debug(f'Default format spec: {req_format}')
2857 format_selector = self.build_format_selector(req_format)
2858
2859 formats_to_download = list(format_selector({
2860 'formats': formats,
2861 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2862 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video
2863 or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio
2864 }))
2865 if interactive_format_selection and not formats_to_download:
2866 self.report_error('Requested format is not available', tb=False, is_error=False)
2867 continue
2868 break
2869
2870 if not formats_to_download:
2871 if not self.params.get('ignore_no_formats_error'):
2872 raise ExtractorError(
2873 'Requested format is not available. Use --list-formats for a list of available formats',
2874 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2875 self.report_warning('Requested format is not available')
2876 # Process what we can, even without any available formats.
2877 formats_to_download = [{}]
2878
2879 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2880 best_format, downloaded_formats = formats_to_download[-1], []
2881 if download:
2882 if best_format and requested_ranges:
2883 def to_screen(*msg):
2884 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2885
2886 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2887 (f['format_id'] for f in formats_to_download))
2888 if requested_ranges != ({}, ):
2889 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2890 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2891 max_downloads_reached = False
2892
2893 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2894 new_info = self._copy_infodict(info_dict)
2895 new_info.update(fmt)
2896 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2897 end_time = offset + min(chapter.get('end_time', duration), duration)
2898 # duration may not be accurate. So allow deviations <1sec
2899 if end_time == float('inf') or end_time > offset + duration + 1:
2900 end_time = None
2901 if chapter or offset:
2902 new_info.update({
2903 'section_start': offset + chapter.get('start_time', 0),
2904 'section_end': end_time,
2905 'section_title': chapter.get('title'),
2906 'section_number': chapter.get('index'),
2907 })
2908 downloaded_formats.append(new_info)
2909 try:
2910 self.process_info(new_info)
2911 except MaxDownloadsReached:
2912 max_downloads_reached = True
2913 self._raise_pending_errors(new_info)
2914 # Remove copied info
2915 for key, val in tuple(new_info.items()):
2916 if info_dict.get(key) == val:
2917 new_info.pop(key)
2918 if max_downloads_reached:
2919 break
2920
2921 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2922 assert write_archive.issubset({True, False, 'ignore'})
2923 if True in write_archive and False not in write_archive:
2924 self.record_download_archive(info_dict)
2925
2926 info_dict['requested_downloads'] = downloaded_formats
2927 info_dict = self.run_all_pps('after_video', info_dict)
2928 if max_downloads_reached:
2929 raise MaxDownloadsReached()
2930
2931 # We update the info dict with the selected best quality format (backwards compatibility)
2932 info_dict.update(best_format)
2933 return info_dict
2934
2935 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2936 """Select the requested subtitles and their format"""
2937 available_subs, normal_sub_langs = {}, []
2938 if normal_subtitles and self.params.get('writesubtitles'):
2939 available_subs.update(normal_subtitles)
2940 normal_sub_langs = tuple(normal_subtitles.keys())
2941 if automatic_captions and self.params.get('writeautomaticsub'):
2942 for lang, cap_info in automatic_captions.items():
2943 if lang not in available_subs:
2944 available_subs[lang] = cap_info
2945
2946 if not available_subs or (
2947 not self.params.get('writesubtitles')
2948 and not self.params.get('writeautomaticsub')):
2949 return None
2950
2951 all_sub_langs = tuple(available_subs.keys())
2952 if self.params.get('allsubtitles', False):
2953 requested_langs = all_sub_langs
2954 elif self.params.get('subtitleslangs', False):
2955 try:
2956 requested_langs = orderedSet_from_options(
2957 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2958 except re.error as e:
2959 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2960 else:
2961 requested_langs = LazyList(itertools.chain(
2962 ['en'] if 'en' in normal_sub_langs else [],
2963 filter(lambda f: f.startswith('en'), normal_sub_langs),
2964 ['en'] if 'en' in all_sub_langs else [],
2965 filter(lambda f: f.startswith('en'), all_sub_langs),
2966 normal_sub_langs, all_sub_langs,
2967 ))[:1]
2968 if requested_langs:
2969 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2970
2971 formats_query = self.params.get('subtitlesformat', 'best')
2972 formats_preference = formats_query.split('/') if formats_query else []
2973 subs = {}
2974 for lang in requested_langs:
2975 formats = available_subs.get(lang)
2976 if formats is None:
2977 self.report_warning(f'{lang} subtitles not available for {video_id}')
2978 continue
2979 for ext in formats_preference:
2980 if ext == 'best':
2981 f = formats[-1]
2982 break
2983 matches = list(filter(lambda f: f['ext'] == ext, formats))
2984 if matches:
2985 f = matches[-1]
2986 break
2987 else:
2988 f = formats[-1]
2989 self.report_warning(
2990 'No subtitle format found matching "%s" for language %s, '
2991 'using %s' % (formats_query, lang, f['ext']))
2992 subs[lang] = f
2993 return subs
2994
2995 def _forceprint(self, key, info_dict):
2996 if info_dict is None:
2997 return
2998 info_copy = info_dict.copy()
2999 info_copy.setdefault('filename', self.prepare_filename(info_dict))
3000 if info_dict.get('requested_formats') is not None:
3001 # For RTMP URLs, also include the playpath
3002 info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
3003 elif info_dict.get('url'):
3004 info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '')
3005 info_copy['formats_table'] = self.render_formats_table(info_dict)
3006 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
3007 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
3008 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
3009
3010 def format_tmpl(tmpl):
3011 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
3012 if not mobj:
3013 return tmpl
3014
3015 fmt = '%({})s'
3016 if tmpl.startswith('{'):
3017 tmpl, fmt = f'.{tmpl}', '%({})j'
3018 if tmpl.endswith('='):
3019 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
3020 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
3021
3022 for tmpl in self.params['forceprint'].get(key, []):
3023 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
3024
3025 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
3026 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
3027 tmpl = format_tmpl(tmpl)
3028 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
3029 if self._ensure_dir_exists(filename):
3030 with open(filename, 'a', encoding='utf-8', newline='') as f:
3031 f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep)
3032
3033 return info_copy
3034
3035 def __forced_printings(self, info_dict, filename=None, incomplete=True):
3036 if (self.params.get('forcejson')
3037 or self.params['forceprint'].get('video')
3038 or self.params['print_to_file'].get('video')):
3039 self.post_extract(info_dict)
3040 if filename:
3041 info_dict['filename'] = filename
3042 info_copy = self._forceprint('video', info_dict)
3043
3044 def print_field(field, actual_field=None, optional=False):
3045 if actual_field is None:
3046 actual_field = field
3047 if self.params.get(f'force{field}') and (
3048 info_copy.get(field) is not None or (not optional and not incomplete)):
3049 self.to_stdout(info_copy[actual_field])
3050
3051 print_field('title')
3052 print_field('id')
3053 print_field('url', 'urls')
3054 print_field('thumbnail', optional=True)
3055 print_field('description', optional=True)
3056 print_field('filename')
3057 if self.params.get('forceduration') and info_copy.get('duration') is not None:
3058 self.to_stdout(formatSeconds(info_copy['duration']))
3059 print_field('format')
3060
3061 if self.params.get('forcejson'):
3062 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
3063
3064 def dl(self, name, info, subtitle=False, test=False):
3065 if not info.get('url'):
3066 self.raise_no_formats(info, True)
3067
3068 if test:
3069 verbose = self.params.get('verbose')
3070 params = {
3071 'test': True,
3072 'quiet': self.params.get('quiet') or not verbose,
3073 'verbose': verbose,
3074 'noprogress': not verbose,
3075 'nopart': True,
3076 'skip_unavailable_fragments': False,
3077 'keep_fragments': False,
3078 'overwrites': True,
3079 '_no_ytdl_file': True,
3080 }
3081 else:
3082 params = self.params
3083 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
3084 if not test:
3085 for ph in self._progress_hooks:
3086 fd.add_progress_hook(ph)
3087 urls = '", "'.join(
3088 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
3089 for f in info.get('requested_formats', []) or [info])
3090 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
3091
3092 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
3093 # But it may contain objects that are not deep-copyable
3094 new_info = self._copy_infodict(info)
3095 if new_info.get('http_headers') is None:
3096 new_info['http_headers'] = self._calc_headers(new_info)
3097 return fd.download(name, new_info, subtitle)
3098
3099 def existing_file(self, filepaths, *, default_overwrite=True):
3100 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
3101 if existing_files and not self.params.get('overwrites', default_overwrite):
3102 return existing_files[0]
3103
3104 for file in existing_files:
3105 self.report_file_delete(file)
3106 os.remove(file)
3107 return None
3108
3109 def process_info(self, info_dict):
3110 """Process a single resolved IE result. (Modifies it in-place)"""
3111
3112 assert info_dict.get('_type', 'video') == 'video'
3113 original_infodict = info_dict
3114
3115 if 'format' not in info_dict and 'ext' in info_dict:
3116 info_dict['format'] = info_dict['ext']
3117
3118 if self._match_entry(info_dict) is not None:
3119 info_dict['__write_download_archive'] = 'ignore'
3120 return
3121
3122 # Does nothing under normal operation - for backward compatibility of process_info
3123 self.post_extract(info_dict)
3124
3125 def replace_info_dict(new_info):
3126 nonlocal info_dict
3127 if new_info == info_dict:
3128 return
3129 info_dict.clear()
3130 info_dict.update(new_info)
3131
3132 new_info, _ = self.pre_process(info_dict, 'video')
3133 replace_info_dict(new_info)
3134 self._num_downloads += 1
3135
3136 # info_dict['_filename'] needs to be set for backward compatibility
3137 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
3138 temp_filename = self.prepare_filename(info_dict, 'temp')
3139 files_to_move = {}
3140
3141 # Forced printings
3142 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
3143
3144 def check_max_downloads():
3145 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3146 raise MaxDownloadsReached()
3147
3148 if self.params.get('simulate'):
3149 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3150 check_max_downloads()
3151 return
3152
3153 if full_filename is None:
3154 return
3155 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3156 return
3157 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3158 return
3159
3160 if self._write_description('video', info_dict,
3161 self.prepare_filename(info_dict, 'description')) is None:
3162 return
3163
3164 sub_files = self._write_subtitles(info_dict, temp_filename)
3165 if sub_files is None:
3166 return
3167 files_to_move.update(dict(sub_files))
3168
3169 thumb_files = self._write_thumbnails(
3170 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3171 if thumb_files is None:
3172 return
3173 files_to_move.update(dict(thumb_files))
3174
3175 infofn = self.prepare_filename(info_dict, 'infojson')
3176 _infojson_written = self._write_info_json('video', info_dict, infofn)
3177 if _infojson_written:
3178 info_dict['infojson_filename'] = infofn
3179 # For backward compatibility, even though it was a private field
3180 info_dict['__infojson_filename'] = infofn
3181 elif _infojson_written is None:
3182 return
3183
3184 # Note: Annotations are deprecated
3185 annofn = None
3186 if self.params.get('writeannotations', False):
3187 annofn = self.prepare_filename(info_dict, 'annotation')
3188 if annofn:
3189 if not self._ensure_dir_exists(encodeFilename(annofn)):
3190 return
3191 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3192 self.to_screen('[info] Video annotations are already present')
3193 elif not info_dict.get('annotations'):
3194 self.report_warning('There are no annotations to write.')
3195 else:
3196 try:
3197 self.to_screen('[info] Writing video annotations to: ' + annofn)
3198 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3199 annofile.write(info_dict['annotations'])
3200 except (KeyError, TypeError):
3201 self.report_warning('There are no annotations to write.')
3202 except OSError:
3203 self.report_error('Cannot write annotations file: ' + annofn)
3204 return
3205
3206 # Write internet shortcut files
3207 def _write_link_file(link_type):
3208 url = try_get(info_dict['webpage_url'], iri_to_uri)
3209 if not url:
3210 self.report_warning(
3211 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3212 return True
3213 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3214 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3215 return False
3216 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3217 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3218 return True
3219 try:
3220 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3221 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3222 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3223 template_vars = {'url': url}
3224 if link_type == 'desktop':
3225 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3226 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3227 except OSError:
3228 self.report_error(f'Cannot write internet shortcut {linkfn}')
3229 return False
3230 return True
3231
3232 write_links = {
3233 'url': self.params.get('writeurllink'),
3234 'webloc': self.params.get('writewebloclink'),
3235 'desktop': self.params.get('writedesktoplink'),
3236 }
3237 if self.params.get('writelink'):
3238 link_type = ('webloc' if sys.platform == 'darwin'
3239 else 'desktop' if sys.platform.startswith('linux')
3240 else 'url')
3241 write_links[link_type] = True
3242
3243 if any(should_write and not _write_link_file(link_type)
3244 for link_type, should_write in write_links.items()):
3245 return
3246
3247 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3248 replace_info_dict(new_info)
3249
3250 if self.params.get('skip_download'):
3251 info_dict['filepath'] = temp_filename
3252 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3253 info_dict['__files_to_move'] = files_to_move
3254 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3255 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3256 else:
3257 # Download
3258 info_dict.setdefault('__postprocessors', [])
3259 try:
3260
3261 def existing_video_file(*filepaths):
3262 ext = info_dict.get('ext')
3263 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3264 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3265 default_overwrite=False)
3266 if file:
3267 info_dict['ext'] = os.path.splitext(file)[1][1:]
3268 return file
3269
3270 fd, success = None, True
3271 if info_dict.get('protocol') or info_dict.get('url'):
3272 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3273 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3274 info_dict.get('section_start') or info_dict.get('section_end')):
3275 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3276 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3277 self.report_error(f'{msg}. Aborting')
3278 return
3279
3280 if info_dict.get('requested_formats') is not None:
3281 old_ext = info_dict['ext']
3282 if self.params.get('merge_output_format') is None:
3283 if (info_dict['ext'] == 'webm'
3284 and info_dict.get('thumbnails')
3285 # check with type instead of pp_key, __name__, or isinstance
3286 # since we dont want any custom PPs to trigger this
3287 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3288 info_dict['ext'] = 'mkv'
3289 self.report_warning(
3290 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3291 new_ext = info_dict['ext']
3292
3293 def correct_ext(filename, ext=new_ext):
3294 if filename == '-':
3295 return filename
3296 filename_real_ext = os.path.splitext(filename)[1][1:]
3297 filename_wo_ext = (
3298 os.path.splitext(filename)[0]
3299 if filename_real_ext in (old_ext, new_ext)
3300 else filename)
3301 return f'{filename_wo_ext}.{ext}'
3302
3303 # Ensure filename always has a correct extension for successful merge
3304 full_filename = correct_ext(full_filename)
3305 temp_filename = correct_ext(temp_filename)
3306 dl_filename = existing_video_file(full_filename, temp_filename)
3307
3308 info_dict['__real_download'] = False
3309 # NOTE: Copy so that original format dicts are not modified
3310 info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats']))
3311
3312 merger = FFmpegMergerPP(self)
3313 downloaded = []
3314 if dl_filename is not None:
3315 self.report_file_already_downloaded(dl_filename)
3316 elif fd:
3317 for f in info_dict['requested_formats'] if fd != FFmpegFD else []:
3318 f['filepath'] = fname = prepend_extension(
3319 correct_ext(temp_filename, info_dict['ext']),
3320 'f%s' % f['format_id'], info_dict['ext'])
3321 downloaded.append(fname)
3322 info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats'])
3323 success, real_download = self.dl(temp_filename, info_dict)
3324 info_dict['__real_download'] = real_download
3325 else:
3326 if self.params.get('allow_unplayable_formats'):
3327 self.report_warning(
3328 'You have requested merging of multiple formats '
3329 'while also allowing unplayable formats to be downloaded. '
3330 'The formats won\'t be merged to prevent data corruption.')
3331 elif not merger.available:
3332 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3333 if not self.params.get('ignoreerrors'):
3334 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3335 return
3336 self.report_warning(f'{msg}. The formats won\'t be merged')
3337
3338 if temp_filename == '-':
3339 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3340 else 'but the formats are incompatible for simultaneous download' if merger.available
3341 else 'but ffmpeg is not installed')
3342 self.report_warning(
3343 f'You have requested downloading multiple formats to stdout {reason}. '
3344 'The formats will be streamed one after the other')
3345 fname = temp_filename
3346 for f in info_dict['requested_formats']:
3347 new_info = dict(info_dict)
3348 del new_info['requested_formats']
3349 new_info.update(f)
3350 if temp_filename != '-':
3351 fname = prepend_extension(
3352 correct_ext(temp_filename, new_info['ext']),
3353 'f%s' % f['format_id'], new_info['ext'])
3354 if not self._ensure_dir_exists(fname):
3355 return
3356 f['filepath'] = fname
3357 downloaded.append(fname)
3358 partial_success, real_download = self.dl(fname, new_info)
3359 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3360 success = success and partial_success
3361
3362 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3363 info_dict['__postprocessors'].append(merger)
3364 info_dict['__files_to_merge'] = downloaded
3365 # Even if there were no downloads, it is being merged only now
3366 info_dict['__real_download'] = True
3367 else:
3368 for file in downloaded:
3369 files_to_move[file] = None
3370 else:
3371 # Just a single file
3372 dl_filename = existing_video_file(full_filename, temp_filename)
3373 if dl_filename is None or dl_filename == temp_filename:
3374 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3375 # So we should try to resume the download
3376 success, real_download = self.dl(temp_filename, info_dict)
3377 info_dict['__real_download'] = real_download
3378 else:
3379 self.report_file_already_downloaded(dl_filename)
3380
3381 dl_filename = dl_filename or temp_filename
3382 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3383
3384 except network_exceptions as err:
3385 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3386 return
3387 except OSError as err:
3388 raise UnavailableVideoError(err)
3389 except (ContentTooShortError, ) as err:
3390 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3391 return
3392
3393 self._raise_pending_errors(info_dict)
3394 if success and full_filename != '-':
3395
3396 def fixup():
3397 do_fixup = True
3398 fixup_policy = self.params.get('fixup')
3399 vid = info_dict['id']
3400
3401 if fixup_policy in ('ignore', 'never'):
3402 return
3403 elif fixup_policy == 'warn':
3404 do_fixup = 'warn'
3405 elif fixup_policy != 'force':
3406 assert fixup_policy in ('detect_or_warn', None)
3407 if not info_dict.get('__real_download'):
3408 do_fixup = False
3409
3410 def ffmpeg_fixup(cndn, msg, cls):
3411 if not (do_fixup and cndn):
3412 return
3413 elif do_fixup == 'warn':
3414 self.report_warning(f'{vid}: {msg}')
3415 return
3416 pp = cls(self)
3417 if pp.available:
3418 info_dict['__postprocessors'].append(pp)
3419 else:
3420 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3421
3422 stretched_ratio = info_dict.get('stretched_ratio')
3423 ffmpeg_fixup(stretched_ratio not in (1, None),
3424 f'Non-uniform pixel ratio {stretched_ratio}',
3425 FFmpegFixupStretchedPP)
3426
3427 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3428 downloader = downloader.FD_NAME if downloader else None
3429
3430 ext = info_dict.get('ext')
3431 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3432 isinstance(pp, FFmpegVideoConvertorPP)
3433 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3434 ) for pp in self._pps['post_process'])
3435
3436 if not postprocessed_by_ffmpeg:
3437 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3438 'writing DASH m4a. Only some players support this container',
3439 FFmpegFixupM4aPP)
3440 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3441 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3442 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3443 FFmpegFixupM3u8PP)
3444 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
3445 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3446
3447 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3448 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3449
3450 fixup()
3451 try:
3452 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3453 except PostProcessingError as err:
3454 self.report_error('Postprocessing: %s' % str(err))
3455 return
3456 try:
3457 for ph in self._post_hooks:
3458 ph(info_dict['filepath'])
3459 except Exception as err:
3460 self.report_error('post hooks: %s' % str(err))
3461 return
3462 info_dict['__write_download_archive'] = True
3463
3464 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3465 if self.params.get('force_write_download_archive'):
3466 info_dict['__write_download_archive'] = True
3467 check_max_downloads()
3468
3469 def __download_wrapper(self, func):
3470 @functools.wraps(func)
3471 def wrapper(*args, **kwargs):
3472 try:
3473 res = func(*args, **kwargs)
3474 except UnavailableVideoError as e:
3475 self.report_error(e)
3476 except DownloadCancelled as e:
3477 self.to_screen(f'[info] {e}')
3478 if not self.params.get('break_per_url'):
3479 raise
3480 self._num_downloads = 0
3481 else:
3482 if self.params.get('dump_single_json', False):
3483 self.post_extract(res)
3484 self.to_stdout(json.dumps(self.sanitize_info(res)))
3485 return wrapper
3486
3487 def download(self, url_list):
3488 """Download a given list of URLs."""
3489 url_list = variadic(url_list) # Passing a single URL is a common mistake
3490 outtmpl = self.params['outtmpl']['default']
3491 if (len(url_list) > 1
3492 and outtmpl != '-'
3493 and '%' not in outtmpl
3494 and self.params.get('max_downloads') != 1):
3495 raise SameFileError(outtmpl)
3496
3497 for url in url_list:
3498 self.__download_wrapper(self.extract_info)(
3499 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3500
3501 return self._download_retcode
3502
3503 def download_with_info_file(self, info_filename):
3504 with contextlib.closing(fileinput.FileInput(
3505 [info_filename], mode='r',
3506 openhook=fileinput.hook_encoded('utf-8'))) as f:
3507 # FileInput doesn't have a read method, we can't call json.load
3508 infos = [self.sanitize_info(info, self.params.get('clean_infojson', True))
3509 for info in variadic(json.loads('\n'.join(f)))]
3510 for info in infos:
3511 self._load_cookies(info.get('cookies'), from_headers=False)
3512 self._load_cookies(traverse_obj(info.get('http_headers'), 'Cookie', casesense=False)) # compat
3513 try:
3514 self.__download_wrapper(self.process_ie_result)(info, download=True)
3515 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3516 if not isinstance(e, EntryNotInPlaylist):
3517 self.to_stderr('\r')
3518 webpage_url = info.get('webpage_url')
3519 if webpage_url is None:
3520 raise
3521 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3522 self.download([webpage_url])
3523 return self._download_retcode
3524
3525 @staticmethod
3526 def sanitize_info(info_dict, remove_private_keys=False):
3527 ''' Sanitize the infodict for converting to json '''
3528 if info_dict is None:
3529 return info_dict
3530 info_dict.setdefault('epoch', int(time.time()))
3531 info_dict.setdefault('_type', 'video')
3532 info_dict.setdefault('_version', {
3533 'version': __version__,
3534 'current_git_head': current_git_head(),
3535 'release_git_head': RELEASE_GIT_HEAD,
3536 'repository': REPOSITORY,
3537 })
3538
3539 if remove_private_keys:
3540 reject = lambda k, v: v is None or k.startswith('__') or k in {
3541 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3542 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url',
3543 'playlist_autonumber', '_format_sort_fields',
3544 }
3545 else:
3546 reject = lambda k, v: False
3547
3548 def filter_fn(obj):
3549 if isinstance(obj, dict):
3550 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3551 elif isinstance(obj, (list, tuple, set, LazyList)):
3552 return list(map(filter_fn, obj))
3553 elif obj is None or isinstance(obj, (str, int, float, bool)):
3554 return obj
3555 else:
3556 return repr(obj)
3557
3558 return filter_fn(info_dict)
3559
3560 @staticmethod
3561 def filter_requested_info(info_dict, actually_filter=True):
3562 ''' Alias of sanitize_info for backward compatibility '''
3563 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3564
3565 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3566 for filename in set(filter(None, files_to_delete)):
3567 if msg:
3568 self.to_screen(msg % filename)
3569 try:
3570 os.remove(filename)
3571 except OSError:
3572 self.report_warning(f'Unable to delete file {filename}')
3573 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3574 del info['__files_to_move'][filename]
3575
3576 @staticmethod
3577 def post_extract(info_dict):
3578 def actual_post_extract(info_dict):
3579 if info_dict.get('_type') in ('playlist', 'multi_video'):
3580 for video_dict in info_dict.get('entries', {}):
3581 actual_post_extract(video_dict or {})
3582 return
3583
3584 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3585 info_dict.update(post_extractor())
3586
3587 actual_post_extract(info_dict or {})
3588
3589 def run_pp(self, pp, infodict):
3590 files_to_delete = []
3591 if '__files_to_move' not in infodict:
3592 infodict['__files_to_move'] = {}
3593 try:
3594 files_to_delete, infodict = pp.run(infodict)
3595 except PostProcessingError as e:
3596 # Must be True and not 'only_download'
3597 if self.params.get('ignoreerrors') is True:
3598 self.report_error(e)
3599 return infodict
3600 raise
3601
3602 if not files_to_delete:
3603 return infodict
3604 if self.params.get('keepvideo', False):
3605 for f in files_to_delete:
3606 infodict['__files_to_move'].setdefault(f, '')
3607 else:
3608 self._delete_downloaded_files(
3609 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3610 return infodict
3611
3612 def run_all_pps(self, key, info, *, additional_pps=None):
3613 if key != 'video':
3614 self._forceprint(key, info)
3615 for pp in (additional_pps or []) + self._pps[key]:
3616 info = self.run_pp(pp, info)
3617 return info
3618
3619 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3620 info = dict(ie_info)
3621 info['__files_to_move'] = files_to_move or {}
3622 try:
3623 info = self.run_all_pps(key, info)
3624 except PostProcessingError as err:
3625 msg = f'Preprocessing: {err}'
3626 info.setdefault('__pending_error', msg)
3627 self.report_error(msg, is_error=False)
3628 return info, info.pop('__files_to_move', None)
3629
3630 def post_process(self, filename, info, files_to_move=None):
3631 """Run all the postprocessors on the given file."""
3632 info['filepath'] = filename
3633 info['__files_to_move'] = files_to_move or {}
3634 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3635 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3636 del info['__files_to_move']
3637 return self.run_all_pps('after_move', info)
3638
3639 def _make_archive_id(self, info_dict):
3640 video_id = info_dict.get('id')
3641 if not video_id:
3642 return
3643 # Future-proof against any change in case
3644 # and backwards compatibility with prior versions
3645 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3646 if extractor is None:
3647 url = str_or_none(info_dict.get('url'))
3648 if not url:
3649 return
3650 # Try to find matching extractor for the URL and take its ie_key
3651 for ie_key, ie in self._ies.items():
3652 if ie.suitable(url):
3653 extractor = ie_key
3654 break
3655 else:
3656 return
3657 return make_archive_id(extractor, video_id)
3658
3659 def in_download_archive(self, info_dict):
3660 if not self.archive:
3661 return False
3662
3663 vid_ids = [self._make_archive_id(info_dict)]
3664 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3665 return any(id_ in self.archive for id_ in vid_ids)
3666
3667 def record_download_archive(self, info_dict):
3668 fn = self.params.get('download_archive')
3669 if fn is None:
3670 return
3671 vid_id = self._make_archive_id(info_dict)
3672 assert vid_id
3673
3674 self.write_debug(f'Adding to archive: {vid_id}')
3675 if is_path_like(fn):
3676 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3677 archive_file.write(vid_id + '\n')
3678 self.archive.add(vid_id)
3679
3680 @staticmethod
3681 def format_resolution(format, default='unknown'):
3682 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3683 return 'audio only'
3684 if format.get('resolution') is not None:
3685 return format['resolution']
3686 if format.get('width') and format.get('height'):
3687 return '%dx%d' % (format['width'], format['height'])
3688 elif format.get('height'):
3689 return '%sp' % format['height']
3690 elif format.get('width'):
3691 return '%dx?' % format['width']
3692 return default
3693
3694 def _list_format_headers(self, *headers):
3695 if self.params.get('listformats_table', True) is not False:
3696 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3697 return headers
3698
3699 def _format_note(self, fdict):
3700 res = ''
3701 if fdict.get('ext') in ['f4f', 'f4m']:
3702 res += '(unsupported)'
3703 if fdict.get('language'):
3704 if res:
3705 res += ' '
3706 res += '[%s]' % fdict['language']
3707 if fdict.get('format_note') is not None:
3708 if res:
3709 res += ' '
3710 res += fdict['format_note']
3711 if fdict.get('tbr') is not None:
3712 if res:
3713 res += ', '
3714 res += '%4dk' % fdict['tbr']
3715 if fdict.get('container') is not None:
3716 if res:
3717 res += ', '
3718 res += '%s container' % fdict['container']
3719 if (fdict.get('vcodec') is not None
3720 and fdict.get('vcodec') != 'none'):
3721 if res:
3722 res += ', '
3723 res += fdict['vcodec']
3724 if fdict.get('vbr') is not None:
3725 res += '@'
3726 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3727 res += 'video@'
3728 if fdict.get('vbr') is not None:
3729 res += '%4dk' % fdict['vbr']
3730 if fdict.get('fps') is not None:
3731 if res:
3732 res += ', '
3733 res += '%sfps' % fdict['fps']
3734 if fdict.get('acodec') is not None:
3735 if res:
3736 res += ', '
3737 if fdict['acodec'] == 'none':
3738 res += 'video only'
3739 else:
3740 res += '%-5s' % fdict['acodec']
3741 elif fdict.get('abr') is not None:
3742 if res:
3743 res += ', '
3744 res += 'audio'
3745 if fdict.get('abr') is not None:
3746 res += '@%3dk' % fdict['abr']
3747 if fdict.get('asr') is not None:
3748 res += ' (%5dHz)' % fdict['asr']
3749 if fdict.get('filesize') is not None:
3750 if res:
3751 res += ', '
3752 res += format_bytes(fdict['filesize'])
3753 elif fdict.get('filesize_approx') is not None:
3754 if res:
3755 res += ', '
3756 res += '~' + format_bytes(fdict['filesize_approx'])
3757 return res
3758
3759 def _get_formats(self, info_dict):
3760 if info_dict.get('formats') is None:
3761 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3762 return [info_dict]
3763 return []
3764 return info_dict['formats']
3765
3766 def render_formats_table(self, info_dict):
3767 formats = self._get_formats(info_dict)
3768 if not formats:
3769 return
3770 if not self.params.get('listformats_table', True) is not False:
3771 table = [
3772 [
3773 format_field(f, 'format_id'),
3774 format_field(f, 'ext'),
3775 self.format_resolution(f),
3776 self._format_note(f)
3777 ] for f in formats if (f.get('preference') or 0) >= -1000]
3778 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3779
3780 def simplified_codec(f, field):
3781 assert field in ('acodec', 'vcodec')
3782 codec = f.get(field)
3783 if not codec:
3784 return 'unknown'
3785 elif codec != 'none':
3786 return '.'.join(codec.split('.')[:4])
3787
3788 if field == 'vcodec' and f.get('acodec') == 'none':
3789 return 'images'
3790 elif field == 'acodec' and f.get('vcodec') == 'none':
3791 return ''
3792 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3793 self.Styles.SUPPRESS)
3794
3795 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3796 table = [
3797 [
3798 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3799 format_field(f, 'ext'),
3800 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3801 format_field(f, 'fps', '\t%d', func=round),
3802 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3803 format_field(f, 'audio_channels', '\t%s'),
3804 delim, (
3805 format_field(f, 'filesize', ' \t%s', func=format_bytes)
3806 or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes)
3807 or format_field(try_call(lambda: format_bytes(int(info_dict['duration'] * f['tbr'] * (1024 / 8)))),
3808 None, self._format_out('~\t%s', self.Styles.SUPPRESS))),
3809 format_field(f, 'tbr', '\t%dk', func=round),
3810 shorten_protocol_name(f.get('protocol', '')),
3811 delim,
3812 simplified_codec(f, 'vcodec'),
3813 format_field(f, 'vbr', '\t%dk', func=round),
3814 simplified_codec(f, 'acodec'),
3815 format_field(f, 'abr', '\t%dk', func=round),
3816 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3817 join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty(
3818 self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None,
3819 (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe'
3820 else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None),
3821 format_field(f, 'format_note'),
3822 format_field(f, 'container', ignore=(None, f.get('ext'))),
3823 delim=', '), delim=' '),
3824 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3825 header_line = self._list_format_headers(
3826 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3827 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3828
3829 return render_table(
3830 header_line, table, hide_empty=True,
3831 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3832
3833 def render_thumbnails_table(self, info_dict):
3834 thumbnails = list(info_dict.get('thumbnails') or [])
3835 if not thumbnails:
3836 return None
3837 return render_table(
3838 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3839 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3840
3841 def render_subtitles_table(self, video_id, subtitles):
3842 def _row(lang, formats):
3843 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3844 if len(set(names)) == 1:
3845 names = [] if names[0] == 'unknown' else names[:1]
3846 return [lang, ', '.join(names), ', '.join(exts)]
3847
3848 if not subtitles:
3849 return None
3850 return render_table(
3851 self._list_format_headers('Language', 'Name', 'Formats'),
3852 [_row(lang, formats) for lang, formats in subtitles.items()],
3853 hide_empty=True)
3854
3855 def __list_table(self, video_id, name, func, *args):
3856 table = func(*args)
3857 if not table:
3858 self.to_screen(f'{video_id} has no {name}')
3859 return
3860 self.to_screen(f'[info] Available {name} for {video_id}:')
3861 self.to_stdout(table)
3862
3863 def list_formats(self, info_dict):
3864 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3865
3866 def list_thumbnails(self, info_dict):
3867 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3868
3869 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3870 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3871
3872 def print_debug_header(self):
3873 if not self.params.get('verbose'):
3874 return
3875
3876 from . import _IN_CLI # Must be delayed import
3877
3878 # These imports can be slow. So import them only as needed
3879 from .extractor.extractors import _LAZY_LOADER
3880 from .extractor.extractors import (
3881 _PLUGIN_CLASSES as plugin_ies,
3882 _PLUGIN_OVERRIDES as plugin_ie_overrides
3883 )
3884
3885 def get_encoding(stream):
3886 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3887 additional_info = []
3888 if os.environ.get('TERM', '').lower() == 'dumb':
3889 additional_info.append('dumb')
3890 if not supports_terminal_sequences(stream):
3891 from .utils import WINDOWS_VT_MODE # Must be imported locally
3892 additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI')
3893 if additional_info:
3894 ret = f'{ret} ({",".join(additional_info)})'
3895 return ret
3896
3897 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3898 locale.getpreferredencoding(),
3899 sys.getfilesystemencoding(),
3900 self.get_encoding(),
3901 ', '.join(
3902 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3903 if stream is not None and key != 'console')
3904 )
3905
3906 logger = self.params.get('logger')
3907 if logger:
3908 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3909 write_debug(encoding_str)
3910 else:
3911 write_string(f'[debug] {encoding_str}\n', encoding=None)
3912 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3913
3914 source = detect_variant()
3915 if VARIANT not in (None, 'pip'):
3916 source += '*'
3917 klass = type(self)
3918 write_debug(join_nonempty(
3919 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3920 f'{CHANNEL}@{__version__}',
3921 f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '',
3922 '' if source == 'unknown' else f'({source})',
3923 '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}',
3924 delim=' '))
3925
3926 if not _IN_CLI:
3927 write_debug(f'params: {self.params}')
3928
3929 if not _LAZY_LOADER:
3930 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3931 write_debug('Lazy loading extractors is forcibly disabled')
3932 else:
3933 write_debug('Lazy loading extractors is disabled')
3934 if self.params['compat_opts']:
3935 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3936
3937 if current_git_head():
3938 write_debug(f'Git HEAD: {current_git_head()}')
3939 write_debug(system_identifier())
3940
3941 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3942 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3943 if ffmpeg_features:
3944 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3945
3946 exe_versions['rtmpdump'] = rtmpdump_version()
3947 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3948 exe_str = ', '.join(
3949 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3950 ) or 'none'
3951 write_debug('exe versions: %s' % exe_str)
3952
3953 from .compat.compat_utils import get_package_info
3954 from .dependencies import available_dependencies
3955
3956 write_debug('Optional libraries: %s' % (', '.join(sorted({
3957 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3958 })) or 'none'))
3959
3960 write_debug(f'Proxy map: {self.proxies}')
3961 # write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers)}')
3962 for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items():
3963 display_list = ['%s%s' % (
3964 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3965 for name, klass in plugins.items()]
3966 if plugin_type == 'Extractor':
3967 display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})'
3968 for parent, plugins in plugin_ie_overrides.items())
3969 if not display_list:
3970 continue
3971 write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}')
3972
3973 plugin_dirs = plugin_directories()
3974 if plugin_dirs:
3975 write_debug(f'Plugin directories: {plugin_dirs}')
3976
3977 # Not implemented
3978 if False and self.params.get('call_home'):
3979 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3980 write_debug('Public IP address: %s' % ipaddr)
3981 latest_version = self.urlopen(
3982 'https://yt-dl.org/latest/version').read().decode()
3983 if version_tuple(latest_version) > version_tuple(__version__):
3984 self.report_warning(
3985 'You are using an outdated version (newest version: %s)! '
3986 'See https://yt-dl.org/update if you need help updating.' %
3987 latest_version)
3988
3989 @functools.cached_property
3990 def proxies(self):
3991 """Global proxy configuration"""
3992 opts_proxy = self.params.get('proxy')
3993 if opts_proxy is not None:
3994 if opts_proxy == '':
3995 opts_proxy = '__noproxy__'
3996 proxies = {'all': opts_proxy}
3997 else:
3998 proxies = urllib.request.getproxies()
3999 # compat. Set HTTPS_PROXY to __noproxy__ to revert
4000 if 'http' in proxies and 'https' not in proxies:
4001 proxies['https'] = proxies['http']
4002
4003 return proxies
4004
4005 @functools.cached_property
4006 def cookiejar(self):
4007 """Global cookiejar instance"""
4008 return load_cookies(
4009 self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self)
4010
4011 @property
4012 def _opener(self):
4013 """
4014 Get a urllib OpenerDirector from the Urllib handler (deprecated).
4015 """
4016 self.deprecation_warning('YoutubeDL._opener() is deprecated, use YoutubeDL.urlopen()')
4017 handler = self._request_director.handlers['Urllib']
4018 return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies)
4019
4020 def urlopen(self, req):
4021 """ Start an HTTP download """
4022 if isinstance(req, str):
4023 req = Request(req)
4024 elif isinstance(req, urllib.request.Request):
4025 self.deprecation_warning(
4026 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. '
4027 'Use yt_dlp.networking.common.Request instead.')
4028 req = urllib_req_to_req(req)
4029 assert isinstance(req, Request)
4030
4031 # compat: Assume user:pass url params are basic auth
4032 url, basic_auth_header = extract_basic_auth(req.url)
4033 if basic_auth_header:
4034 req.headers['Authorization'] = basic_auth_header
4035 req.url = sanitize_url(url)
4036
4037 clean_proxies(proxies=req.proxies, headers=req.headers)
4038 clean_headers(req.headers)
4039
4040 try:
4041 return self._request_director.send(req)
4042 except NoSupportingHandlers as e:
4043 for ue in e.unsupported_errors:
4044 if not (ue.handler and ue.msg):
4045 continue
4046 if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower():
4047 raise RequestError(
4048 'file:// URLs are disabled by default in yt-dlp for security reasons. '
4049 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue
4050 raise
4051 except SSLError as e:
4052 if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e):
4053 raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e
4054 elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e):
4055 raise RequestError(
4056 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. '
4057 'Try using --legacy-server-connect', cause=e) from e
4058 raise
4059 except HTTPError as e: # TODO: Remove in a future release
4060 raise _CompatHTTPError(e) from e
4061
4062 def build_request_director(self, handlers):
4063 logger = _YDLLogger(self)
4064 headers = self.params.get('http_headers').copy()
4065 proxies = self.proxies.copy()
4066 clean_headers(headers)
4067 clean_proxies(proxies, headers)
4068
4069 director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic'))
4070 for handler in handlers:
4071 director.add_handler(handler(
4072 logger=logger,
4073 headers=headers,
4074 cookiejar=self.cookiejar,
4075 proxies=proxies,
4076 prefer_system_certs='no-certifi' in self.params['compat_opts'],
4077 verify=not self.params.get('nocheckcertificate'),
4078 **traverse_obj(self.params, {
4079 'verbose': 'debug_printtraffic',
4080 'source_address': 'source_address',
4081 'timeout': 'socket_timeout',
4082 'legacy_ssl_support': 'legacy_server_connect',
4083 'enable_file_urls': 'enable_file_urls',
4084 'client_cert': {
4085 'client_certificate': 'client_certificate',
4086 'client_certificate_key': 'client_certificate_key',
4087 'client_certificate_password': 'client_certificate_password',
4088 },
4089 }),
4090 ))
4091 return director
4092
4093 def encode(self, s):
4094 if isinstance(s, bytes):
4095 return s # Already encoded
4096
4097 try:
4098 return s.encode(self.get_encoding())
4099 except UnicodeEncodeError as err:
4100 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
4101 raise
4102
4103 def get_encoding(self):
4104 encoding = self.params.get('encoding')
4105 if encoding is None:
4106 encoding = preferredencoding()
4107 return encoding
4108
4109 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
4110 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
4111 if overwrite is None:
4112 overwrite = self.params.get('overwrites', True)
4113 if not self.params.get('writeinfojson'):
4114 return False
4115 elif not infofn:
4116 self.write_debug(f'Skipping writing {label} infojson')
4117 return False
4118 elif not self._ensure_dir_exists(infofn):
4119 return None
4120 elif not overwrite and os.path.exists(infofn):
4121 self.to_screen(f'[info] {label.title()} metadata is already present')
4122 return 'exists'
4123
4124 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
4125 try:
4126 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
4127 return True
4128 except OSError:
4129 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
4130 return None
4131
4132 def _write_description(self, label, ie_result, descfn):
4133 ''' Write description and returns True = written, False = skip, None = error '''
4134 if not self.params.get('writedescription'):
4135 return False
4136 elif not descfn:
4137 self.write_debug(f'Skipping writing {label} description')
4138 return False
4139 elif not self._ensure_dir_exists(descfn):
4140 return None
4141 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
4142 self.to_screen(f'[info] {label.title()} description is already present')
4143 elif ie_result.get('description') is None:
4144 self.to_screen(f'[info] There\'s no {label} description to write')
4145 return False
4146 else:
4147 try:
4148 self.to_screen(f'[info] Writing {label} description to: {descfn}')
4149 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
4150 descfile.write(ie_result['description'])
4151 except OSError:
4152 self.report_error(f'Cannot write {label} description file {descfn}')
4153 return None
4154 return True
4155
4156 def _write_subtitles(self, info_dict, filename):
4157 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
4158 ret = []
4159 subtitles = info_dict.get('requested_subtitles')
4160 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
4161 # subtitles download errors are already managed as troubles in relevant IE
4162 # that way it will silently go on when used with unsupporting IE
4163 return ret
4164 elif not subtitles:
4165 self.to_screen('[info] There are no subtitles for the requested languages')
4166 return ret
4167 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
4168 if not sub_filename_base:
4169 self.to_screen('[info] Skipping writing video subtitles')
4170 return ret
4171
4172 for sub_lang, sub_info in subtitles.items():
4173 sub_format = sub_info['ext']
4174 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
4175 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
4176 existing_sub = self.existing_file((sub_filename_final, sub_filename))
4177 if existing_sub:
4178 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
4179 sub_info['filepath'] = existing_sub
4180 ret.append((existing_sub, sub_filename_final))
4181 continue
4182
4183 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
4184 if sub_info.get('data') is not None:
4185 try:
4186 # Use newline='' to prevent conversion of newline characters
4187 # See https://github.com/ytdl-org/youtube-dl/issues/10268
4188 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
4189 subfile.write(sub_info['data'])
4190 sub_info['filepath'] = sub_filename
4191 ret.append((sub_filename, sub_filename_final))
4192 continue
4193 except OSError:
4194 self.report_error(f'Cannot write video subtitles file {sub_filename}')
4195 return None
4196
4197 try:
4198 sub_copy = sub_info.copy()
4199 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
4200 self.dl(sub_filename, sub_copy, subtitle=True)
4201 sub_info['filepath'] = sub_filename
4202 ret.append((sub_filename, sub_filename_final))
4203 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
4204 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
4205 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
4206 if not self.params.get('ignoreerrors'):
4207 self.report_error(msg)
4208 raise DownloadError(msg)
4209 self.report_warning(msg)
4210 return ret
4211
4212 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4213 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4214 write_all = self.params.get('write_all_thumbnails', False)
4215 thumbnails, ret = [], []
4216 if write_all or self.params.get('writethumbnail', False):
4217 thumbnails = info_dict.get('thumbnails') or []
4218 if not thumbnails:
4219 self.to_screen(f'[info] There are no {label} thumbnails to download')
4220 return ret
4221 multiple = write_all and len(thumbnails) > 1
4222
4223 if thumb_filename_base is None:
4224 thumb_filename_base = filename
4225 if thumbnails and not thumb_filename_base:
4226 self.write_debug(f'Skipping writing {label} thumbnail')
4227 return ret
4228
4229 for idx, t in list(enumerate(thumbnails))[::-1]:
4230 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4231 thumb_display_id = f'{label} thumbnail {t["id"]}'
4232 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4233 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4234
4235 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4236 if existing_thumb:
4237 self.to_screen('[info] %s is already present' % (
4238 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4239 t['filepath'] = existing_thumb
4240 ret.append((existing_thumb, thumb_filename_final))
4241 else:
4242 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4243 try:
4244 uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {})))
4245 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4246 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4247 shutil.copyfileobj(uf, thumbf)
4248 ret.append((thumb_filename, thumb_filename_final))
4249 t['filepath'] = thumb_filename
4250 except network_exceptions as err:
4251 if isinstance(err, HTTPError) and err.status == 404:
4252 self.to_screen(f'[info] {thumb_display_id.title()} does not exist')
4253 else:
4254 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4255 thumbnails.pop(idx)
4256 if ret and not write_all:
4257 break
4258 return ret