]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import datetime
4 import errno
5 import fileinput
6 import functools
7 import io
8 import itertools
9 import json
10 import locale
11 import operator
12 import os
13 import random
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import tokenize
21 import traceback
22 import unicodedata
23 import urllib.request
24 from string import ascii_letters
25
26 from .cache import Cache
27 from .compat import compat_os_name, compat_shlex_quote
28 from .cookies import load_cookies
29 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30 from .downloader.rtmp import rtmpdump_version
31 from .extractor import gen_extractor_classes, get_info_extractor
32 from .extractor.common import UnsupportedURLIE
33 from .extractor.openload import PhantomJSwrapper
34 from .minicurses import format_text
35 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
36 from .postprocessor import (
37 EmbedThumbnailPP,
38 FFmpegFixupDuplicateMoovPP,
39 FFmpegFixupDurationPP,
40 FFmpegFixupM3u8PP,
41 FFmpegFixupM4aPP,
42 FFmpegFixupStretchedPP,
43 FFmpegFixupTimestampPP,
44 FFmpegMergerPP,
45 FFmpegPostProcessor,
46 FFmpegVideoConvertorPP,
47 MoveFilesAfterDownloadPP,
48 get_postprocessor,
49 )
50 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
51 from .update import REPOSITORY, current_git_head, detect_variant
52 from .utils import (
53 DEFAULT_OUTTMPL,
54 IDENTITY,
55 LINK_TEMPLATES,
56 MEDIA_EXTENSIONS,
57 NO_DEFAULT,
58 NUMBER_RE,
59 OUTTMPL_TYPES,
60 POSTPROCESS_WHEN,
61 STR_FORMAT_RE_TMPL,
62 STR_FORMAT_TYPES,
63 ContentTooShortError,
64 DateRange,
65 DownloadCancelled,
66 DownloadError,
67 EntryNotInPlaylist,
68 ExistingVideoReached,
69 ExtractorError,
70 GeoRestrictedError,
71 HEADRequest,
72 ISO3166Utils,
73 LazyList,
74 MaxDownloadsReached,
75 Namespace,
76 PagedList,
77 PerRequestProxyHandler,
78 PlaylistEntries,
79 Popen,
80 PostProcessingError,
81 ReExtractInfo,
82 RejectedVideoReached,
83 SameFileError,
84 UnavailableVideoError,
85 UserNotLive,
86 YoutubeDLCookieProcessor,
87 YoutubeDLHandler,
88 YoutubeDLRedirectHandler,
89 age_restricted,
90 args_to_str,
91 bug_reports_message,
92 date_from_str,
93 deprecation_warning,
94 determine_ext,
95 determine_protocol,
96 encode_compat_str,
97 encodeFilename,
98 error_to_compat_str,
99 escapeHTML,
100 expand_path,
101 filter_dict,
102 float_or_none,
103 format_bytes,
104 format_decimal_suffix,
105 format_field,
106 formatSeconds,
107 get_compatible_ext,
108 get_domain,
109 int_or_none,
110 iri_to_uri,
111 join_nonempty,
112 locked_file,
113 make_archive_id,
114 make_dir,
115 make_HTTPS_handler,
116 merge_headers,
117 network_exceptions,
118 number_of_digits,
119 orderedSet,
120 orderedSet_from_options,
121 parse_filesize,
122 preferredencoding,
123 prepend_extension,
124 register_socks_protocols,
125 remove_terminal_sequences,
126 render_table,
127 replace_extension,
128 sanitize_filename,
129 sanitize_path,
130 sanitize_url,
131 sanitized_Request,
132 std_headers,
133 str_or_none,
134 strftime_or_none,
135 subtitles_filename,
136 supports_terminal_sequences,
137 system_identifier,
138 timetuple_from_msec,
139 to_high_limit_path,
140 traverse_obj,
141 try_call,
142 try_get,
143 url_basename,
144 variadic,
145 version_tuple,
146 windows_enable_vt_mode,
147 write_json_file,
148 write_string,
149 )
150 from .version import RELEASE_GIT_HEAD, VARIANT, __version__
151
152 if compat_os_name == 'nt':
153 import ctypes
154
155
156 class YoutubeDL:
157 """YoutubeDL class.
158
159 YoutubeDL objects are the ones responsible of downloading the
160 actual video file and writing it to disk if the user has requested
161 it, among some other tasks. In most cases there should be one per
162 program. As, given a video URL, the downloader doesn't know how to
163 extract all the needed information, task that InfoExtractors do, it
164 has to pass the URL to one of them.
165
166 For this, YoutubeDL objects have a method that allows
167 InfoExtractors to be registered in a given order. When it is passed
168 a URL, the YoutubeDL object handles it to the first InfoExtractor it
169 finds that reports being able to handle it. The InfoExtractor extracts
170 all the information about the video or videos the URL refers to, and
171 YoutubeDL process the extracted information, possibly using a File
172 Downloader to download the video.
173
174 YoutubeDL objects accept a lot of parameters. In order not to saturate
175 the object constructor with arguments, it receives a dictionary of
176 options instead. These options are available through the params
177 attribute for the InfoExtractors to use. The YoutubeDL also
178 registers itself as the downloader in charge for the InfoExtractors
179 that are added to it, so this is a "mutual registration".
180
181 Available options:
182
183 username: Username for authentication purposes.
184 password: Password for authentication purposes.
185 videopassword: Password for accessing a video.
186 ap_mso: Adobe Pass multiple-system operator identifier.
187 ap_username: Multiple-system operator account username.
188 ap_password: Multiple-system operator account password.
189 usenetrc: Use netrc for authentication instead.
190 verbose: Print additional info to stdout.
191 quiet: Do not print messages to stdout.
192 no_warnings: Do not print out anything for warnings.
193 forceprint: A dict with keys WHEN mapped to a list of templates to
194 print to stdout. The allowed keys are video or any of the
195 items in utils.POSTPROCESS_WHEN.
196 For compatibility, a single list is also accepted
197 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
198 a list of tuples with (template, filename)
199 forcejson: Force printing info_dict as JSON.
200 dump_single_json: Force printing the info_dict of the whole playlist
201 (or video) as a single JSON line.
202 force_write_download_archive: Force writing download archive regardless
203 of 'skip_download' or 'simulate'.
204 simulate: Do not download the video files. If unset (or None),
205 simulate only if listsubtitles, listformats or list_thumbnails is used
206 format: Video format code. see "FORMAT SELECTION" for more details.
207 You can also pass a function. The function takes 'ctx' as
208 argument and returns the formats to download.
209 See "build_format_selector" for an implementation
210 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
211 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
212 extracting metadata even if the video is not actually
213 available for download (experimental)
214 format_sort: A list of fields by which to sort the video formats.
215 See "Sorting Formats" for more details.
216 format_sort_force: Force the given format_sort. see "Sorting Formats"
217 for more details.
218 prefer_free_formats: Whether to prefer video formats with free containers
219 over non-free ones of same quality.
220 allow_multiple_video_streams: Allow multiple video streams to be merged
221 into a single file
222 allow_multiple_audio_streams: Allow multiple audio streams to be merged
223 into a single file
224 check_formats Whether to test if the formats are downloadable.
225 Can be True (check all), False (check none),
226 'selected' (check selected formats),
227 or None (check only if requested by extractor)
228 paths: Dictionary of output paths. The allowed keys are 'home'
229 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
230 outtmpl: Dictionary of templates for output names. Allowed keys
231 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
232 For compatibility with youtube-dl, a single string can also be used
233 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
234 restrictfilenames: Do not allow "&" and spaces in file names
235 trim_file_name: Limit length of filename (extension excluded)
236 windowsfilenames: Force the filenames to be windows compatible
237 ignoreerrors: Do not stop on download/postprocessing errors.
238 Can be 'only_download' to ignore only download errors.
239 Default is 'only_download' for CLI, but False for API
240 skip_playlist_after_errors: Number of allowed failures until the rest of
241 the playlist is skipped
242 allowed_extractors: List of regexes to match against extractor names that are allowed
243 overwrites: Overwrite all video and metadata files if True,
244 overwrite only non-video files if None
245 and don't overwrite any file if False
246 For compatibility with youtube-dl,
247 "nooverwrites" may also be used instead
248 playlist_items: Specific indices of playlist to download.
249 playlistrandom: Download playlist items in random order.
250 lazy_playlist: Process playlist entries as they are received.
251 matchtitle: Download only matching titles.
252 rejecttitle: Reject downloads for matching titles.
253 logger: Log messages to a logging.Logger instance.
254 logtostderr: Log messages to stderr instead of stdout.
255 consoletitle: Display progress in console window's titlebar.
256 writedescription: Write the video description to a .description file
257 writeinfojson: Write the video description to a .info.json file
258 clean_infojson: Remove private fields from the infojson
259 getcomments: Extract video comments. This will not be written to disk
260 unless writeinfojson is also given
261 writeannotations: Write the video annotations to a .annotations.xml file
262 writethumbnail: Write the thumbnail image to a file
263 allow_playlist_files: Whether to write playlists' description, infojson etc
264 also to disk when using the 'write*' options
265 write_all_thumbnails: Write all thumbnail formats to files
266 writelink: Write an internet shortcut file, depending on the
267 current platform (.url/.webloc/.desktop)
268 writeurllink: Write a Windows internet shortcut file (.url)
269 writewebloclink: Write a macOS internet shortcut file (.webloc)
270 writedesktoplink: Write a Linux internet shortcut file (.desktop)
271 writesubtitles: Write the video subtitles to a file
272 writeautomaticsub: Write the automatically generated subtitles to a file
273 listsubtitles: Lists all available subtitles for the video
274 subtitlesformat: The format code for subtitles
275 subtitleslangs: List of languages of the subtitles to download (can be regex).
276 The list may contain "all" to refer to all the available
277 subtitles. The language can be prefixed with a "-" to
278 exclude it from the requested languages, e.g. ['all', '-live_chat']
279 keepvideo: Keep the video file after post-processing
280 daterange: A DateRange object, download only if the upload_date is in the range.
281 skip_download: Skip the actual download of the video file
282 cachedir: Location of the cache files in the filesystem.
283 False to disable filesystem cache.
284 noplaylist: Download single video instead of a playlist if in doubt.
285 age_limit: An integer representing the user's age in years.
286 Unsuitable videos for the given age are skipped.
287 min_views: An integer representing the minimum view count the video
288 must have in order to not be skipped.
289 Videos without view count information are always
290 downloaded. None for no limit.
291 max_views: An integer representing the maximum view count.
292 Videos that are more popular than that are not
293 downloaded.
294 Videos without view count information are always
295 downloaded. None for no limit.
296 download_archive: File name of a file where all downloads are recorded.
297 Videos already present in the file are not downloaded
298 again.
299 break_on_existing: Stop the download process after attempting to download a
300 file that is in the archive.
301 break_on_reject: Stop the download process when encountering a video that
302 has been filtered out.
303 break_per_url: Whether break_on_reject and break_on_existing
304 should act on each input URL as opposed to for the entire queue
305 cookiefile: File name or text stream from where cookies should be read and dumped to
306 cookiesfrombrowser: A tuple containing the name of the browser, the profile
307 name/path from where cookies are loaded, the name of the keyring,
308 and the container name, e.g. ('chrome', ) or
309 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
310 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
311 support RFC 5746 secure renegotiation
312 nocheckcertificate: Do not verify SSL certificates
313 client_certificate: Path to client certificate file in PEM format. May include the private key
314 client_certificate_key: Path to private key file for client certificate
315 client_certificate_password: Password for client certificate private key, if encrypted.
316 If not provided and the key is encrypted, yt-dlp will ask interactively
317 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
318 (Only supported by some extractors)
319 http_headers: A dictionary of custom headers to be used for all requests
320 proxy: URL of the proxy server to use
321 geo_verification_proxy: URL of the proxy to use for IP address verification
322 on geo-restricted sites.
323 socket_timeout: Time to wait for unresponsive hosts, in seconds
324 bidi_workaround: Work around buggy terminals without bidirectional text
325 support, using fridibi
326 debug_printtraffic:Print out sent and received HTTP traffic
327 default_search: Prepend this string if an input url is not valid.
328 'auto' for elaborate guessing
329 encoding: Use this encoding instead of the system-specified.
330 extract_flat: Whether to resolve and process url_results further
331 * False: Always process (default)
332 * True: Never process
333 * 'in_playlist': Do not process inside playlist/multi_video
334 * 'discard': Always process, but don't return the result
335 from inside playlist/multi_video
336 * 'discard_in_playlist': Same as "discard", but only for
337 playlists (not multi_video)
338 wait_for_video: If given, wait for scheduled streams to become available.
339 The value should be a tuple containing the range
340 (min_secs, max_secs) to wait between retries
341 postprocessors: A list of dictionaries, each with an entry
342 * key: The name of the postprocessor. See
343 yt_dlp/postprocessor/__init__.py for a list.
344 * when: When to run the postprocessor. Allowed values are
345 the entries of utils.POSTPROCESS_WHEN
346 Assumed to be 'post_process' if not given
347 progress_hooks: A list of functions that get called on download
348 progress, with a dictionary with the entries
349 * status: One of "downloading", "error", or "finished".
350 Check this first and ignore unknown values.
351 * info_dict: The extracted info_dict
352
353 If status is one of "downloading", or "finished", the
354 following properties may also be present:
355 * filename: The final filename (always present)
356 * tmpfilename: The filename we're currently writing to
357 * downloaded_bytes: Bytes on disk
358 * total_bytes: Size of the whole file, None if unknown
359 * total_bytes_estimate: Guess of the eventual file size,
360 None if unavailable.
361 * elapsed: The number of seconds since download started.
362 * eta: The estimated time in seconds, None if unknown
363 * speed: The download speed in bytes/second, None if
364 unknown
365 * fragment_index: The counter of the currently
366 downloaded video fragment.
367 * fragment_count: The number of fragments (= individual
368 files that will be merged)
369
370 Progress hooks are guaranteed to be called at least once
371 (with status "finished") if the download is successful.
372 postprocessor_hooks: A list of functions that get called on postprocessing
373 progress, with a dictionary with the entries
374 * status: One of "started", "processing", or "finished".
375 Check this first and ignore unknown values.
376 * postprocessor: Name of the postprocessor
377 * info_dict: The extracted info_dict
378
379 Progress hooks are guaranteed to be called at least twice
380 (with status "started" and "finished") if the processing is successful.
381 merge_output_format: "/" separated list of extensions to use when merging formats.
382 final_ext: Expected final extension; used to detect when the file was
383 already downloaded and converted
384 fixup: Automatically correct known faults of the file.
385 One of:
386 - "never": do nothing
387 - "warn": only emit a warning
388 - "detect_or_warn": check whether we can do anything
389 about it, warn otherwise (default)
390 source_address: Client-side IP address to bind to.
391 sleep_interval_requests: Number of seconds to sleep between requests
392 during extraction
393 sleep_interval: Number of seconds to sleep before each download when
394 used alone or a lower bound of a range for randomized
395 sleep before each download (minimum possible number
396 of seconds to sleep) when used along with
397 max_sleep_interval.
398 max_sleep_interval:Upper bound of a range for randomized sleep before each
399 download (maximum possible number of seconds to sleep).
400 Must only be used along with sleep_interval.
401 Actual sleep time will be a random float from range
402 [sleep_interval; max_sleep_interval].
403 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
404 listformats: Print an overview of available video formats and exit.
405 list_thumbnails: Print a table of all thumbnails and exit.
406 match_filter: A function that gets called for every video with the signature
407 (info_dict, *, incomplete: bool) -> Optional[str]
408 For backward compatibility with youtube-dl, the signature
409 (info_dict) -> Optional[str] is also allowed.
410 - If it returns a message, the video is ignored.
411 - If it returns None, the video is downloaded.
412 - If it returns utils.NO_DEFAULT, the user is interactively
413 asked whether to download the video.
414 match_filter_func in utils.py is one example for this.
415 no_color: Do not emit color codes in output.
416 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
417 HTTP header
418 geo_bypass_country:
419 Two-letter ISO 3166-2 country code that will be used for
420 explicit geographic restriction bypassing via faking
421 X-Forwarded-For HTTP header
422 geo_bypass_ip_block:
423 IP range in CIDR notation that will be used similarly to
424 geo_bypass_country
425 external_downloader: A dictionary of protocol keys and the executable of the
426 external downloader to use for it. The allowed protocols
427 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
428 Set the value to 'native' to use the native downloader
429 compat_opts: Compatibility options. See "Differences in default behavior".
430 The following options do not work when used through the API:
431 filename, abort-on-error, multistreams, no-live-chat, format-sort
432 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
433 Refer __init__.py for their implementation
434 progress_template: Dictionary of templates for progress outputs.
435 Allowed keys are 'download', 'postprocess',
436 'download-title' (console title) and 'postprocess-title'.
437 The template is mapped on a dictionary with keys 'progress' and 'info'
438 retry_sleep_functions: Dictionary of functions that takes the number of attempts
439 as argument and returns the time to sleep in seconds.
440 Allowed keys are 'http', 'fragment', 'file_access'
441 download_ranges: A callback function that gets called for every video with
442 the signature (info_dict, ydl) -> Iterable[Section].
443 Only the returned sections will be downloaded.
444 Each Section is a dict with the following keys:
445 * start_time: Start time of the section in seconds
446 * end_time: End time of the section in seconds
447 * title: Section title (Optional)
448 * index: Section number (Optional)
449 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
450 noprogress: Do not print the progress bar
451 live_from_start: Whether to download livestreams videos from the start
452
453 The following parameters are not used by YoutubeDL itself, they are used by
454 the downloader (see yt_dlp/downloader/common.py):
455 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
456 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
457 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
458 external_downloader_args, concurrent_fragment_downloads.
459
460 The following options are used by the post processors:
461 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
462 to the binary or its containing directory.
463 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
464 and a list of additional command-line arguments for the
465 postprocessor/executable. The dict can also have "PP+EXE" keys
466 which are used when the given exe is used by the given PP.
467 Use 'default' as the name for arguments to passed to all PP
468 For compatibility with youtube-dl, a single list of args
469 can also be used
470
471 The following options are used by the extractors:
472 extractor_retries: Number of times to retry for known errors
473 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
474 hls_split_discontinuity: Split HLS playlists to different formats at
475 discontinuities such as ad breaks (default: False)
476 extractor_args: A dictionary of arguments to be passed to the extractors.
477 See "EXTRACTOR ARGUMENTS" for details.
478 E.g. {'youtube': {'skip': ['dash', 'hls']}}
479 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
480
481 The following options are deprecated and may be removed in the future:
482
483 force_generic_extractor: Force downloader to use the generic extractor
484 - Use allowed_extractors = ['generic', 'default']
485 playliststart: - Use playlist_items
486 Playlist item to start at.
487 playlistend: - Use playlist_items
488 Playlist item to end at.
489 playlistreverse: - Use playlist_items
490 Download playlist items in reverse order.
491 forceurl: - Use forceprint
492 Force printing final URL.
493 forcetitle: - Use forceprint
494 Force printing title.
495 forceid: - Use forceprint
496 Force printing ID.
497 forcethumbnail: - Use forceprint
498 Force printing thumbnail URL.
499 forcedescription: - Use forceprint
500 Force printing description.
501 forcefilename: - Use forceprint
502 Force printing final filename.
503 forceduration: - Use forceprint
504 Force printing duration.
505 allsubtitles: - Use subtitleslangs = ['all']
506 Downloads all the subtitles of the video
507 (requires writesubtitles or writeautomaticsub)
508 include_ads: - Doesn't work
509 Download ads as well
510 call_home: - Not implemented
511 Boolean, true iff we are allowed to contact the
512 yt-dlp servers for debugging.
513 post_hooks: - Register a custom postprocessor
514 A list of functions that get called as the final step
515 for each video file, after all postprocessors have been
516 called. The filename will be passed as the only argument.
517 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
518 Use the native HLS downloader instead of ffmpeg/avconv
519 if True, otherwise use ffmpeg/avconv if False, otherwise
520 use downloader suggested by extractor if None.
521 prefer_ffmpeg: - avconv support is deprecated
522 If False, use avconv instead of ffmpeg if both are available,
523 otherwise prefer ffmpeg.
524 youtube_include_dash_manifest: - Use extractor_args
525 If True (default), DASH manifests and related
526 data will be downloaded and processed by extractor.
527 You can reduce network I/O by disabling it if you don't
528 care about DASH. (only for youtube)
529 youtube_include_hls_manifest: - Use extractor_args
530 If True (default), HLS manifests and related
531 data will be downloaded and processed by extractor.
532 You can reduce network I/O by disabling it if you don't
533 care about HLS. (only for youtube)
534 """
535
536 _NUMERIC_FIELDS = {
537 'width', 'height', 'asr', 'audio_channels', 'fps',
538 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
539 'timestamp', 'release_timestamp',
540 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
541 'average_rating', 'comment_count', 'age_limit',
542 'start_time', 'end_time',
543 'chapter_number', 'season_number', 'episode_number',
544 'track_number', 'disc_number', 'release_year',
545 }
546
547 _format_fields = {
548 # NB: Keep in sync with the docstring of extractor/common.py
549 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
550 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
551 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
552 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
553 'preference', 'language', 'language_preference', 'quality', 'source_preference',
554 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
555 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
556 }
557 _format_selection_exts = {
558 'audio': set(MEDIA_EXTENSIONS.common_audio),
559 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
560 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
561 }
562
563 def __init__(self, params=None, auto_init=True):
564 """Create a FileDownloader object with the given options.
565 @param auto_init Whether to load the default extractors and print header (if verbose).
566 Set to 'no_verbose_header' to not print the header
567 """
568 if params is None:
569 params = {}
570 self.params = params
571 self._ies = {}
572 self._ies_instances = {}
573 self._pps = {k: [] for k in POSTPROCESS_WHEN}
574 self._printed_messages = set()
575 self._first_webpage_request = True
576 self._post_hooks = []
577 self._progress_hooks = []
578 self._postprocessor_hooks = []
579 self._download_retcode = 0
580 self._num_downloads = 0
581 self._num_videos = 0
582 self._playlist_level = 0
583 self._playlist_urls = set()
584 self.cache = Cache(self)
585
586 windows_enable_vt_mode()
587 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
588 self._out_files = Namespace(
589 out=stdout,
590 error=sys.stderr,
591 screen=sys.stderr if self.params.get('quiet') else stdout,
592 console=None if compat_os_name == 'nt' else next(
593 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
594 )
595 self._allow_colors = Namespace(**{
596 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
597 for type_, stream in self._out_files.items_ if type_ != 'console'
598 })
599
600 # The code is left like this to be reused for future deprecations
601 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
602 current_version = sys.version_info[:2]
603 if current_version < MIN_RECOMMENDED:
604 msg = ('Support for Python version %d.%d has been deprecated. '
605 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
606 '\n You will no longer receive updates on this version')
607 if current_version < MIN_SUPPORTED:
608 msg = 'Python version %d.%d is no longer supported'
609 self.deprecation_warning(
610 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
611
612 if self.params.get('allow_unplayable_formats'):
613 self.report_warning(
614 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
615 'This is a developer option intended for debugging. \n'
616 ' If you experience any issues while using this option, '
617 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
618
619 def check_deprecated(param, option, suggestion):
620 if self.params.get(param) is not None:
621 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
622 return True
623 return False
624
625 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
626 if self.params.get('geo_verification_proxy') is None:
627 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
628
629 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
630 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
631 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
632
633 for msg in self.params.get('_warnings', []):
634 self.report_warning(msg)
635 for msg in self.params.get('_deprecation_warnings', []):
636 self.deprecated_feature(msg)
637
638 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
639 if 'list-formats' in self.params['compat_opts']:
640 self.params['listformats_table'] = False
641
642 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
643 # nooverwrites was unnecessarily changed to overwrites
644 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
645 # This ensures compatibility with both keys
646 self.params['overwrites'] = not self.params['nooverwrites']
647 elif self.params.get('overwrites') is None:
648 self.params.pop('overwrites', None)
649 else:
650 self.params['nooverwrites'] = not self.params['overwrites']
651
652 self.params.setdefault('forceprint', {})
653 self.params.setdefault('print_to_file', {})
654
655 # Compatibility with older syntax
656 if not isinstance(params['forceprint'], dict):
657 self.params['forceprint'] = {'video': params['forceprint']}
658
659 if self.params.get('bidi_workaround', False):
660 try:
661 import pty
662 master, slave = pty.openpty()
663 width = shutil.get_terminal_size().columns
664 width_args = [] if width is None else ['-w', str(width)]
665 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
666 try:
667 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
668 except OSError:
669 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
670 self._output_channel = os.fdopen(master, 'rb')
671 except OSError as ose:
672 if ose.errno == errno.ENOENT:
673 self.report_warning(
674 'Could not find fribidi executable, ignoring --bidi-workaround. '
675 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
676 else:
677 raise
678
679 if auto_init:
680 if auto_init != 'no_verbose_header':
681 self.print_debug_header()
682 self.add_default_info_extractors()
683
684 if (sys.platform != 'win32'
685 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
686 and not self.params.get('restrictfilenames', False)):
687 # Unicode filesystem API will throw errors (#1474, #13027)
688 self.report_warning(
689 'Assuming --restrict-filenames since file system encoding '
690 'cannot encode all characters. '
691 'Set the LC_ALL environment variable to fix this.')
692 self.params['restrictfilenames'] = True
693
694 self._parse_outtmpl()
695
696 # Creating format selector here allows us to catch syntax errors before the extraction
697 self.format_selector = (
698 self.params.get('format') if self.params.get('format') in (None, '-')
699 else self.params['format'] if callable(self.params['format'])
700 else self.build_format_selector(self.params['format']))
701
702 # Set http_headers defaults according to std_headers
703 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
704
705 hooks = {
706 'post_hooks': self.add_post_hook,
707 'progress_hooks': self.add_progress_hook,
708 'postprocessor_hooks': self.add_postprocessor_hook,
709 }
710 for opt, fn in hooks.items():
711 for ph in self.params.get(opt, []):
712 fn(ph)
713
714 for pp_def_raw in self.params.get('postprocessors', []):
715 pp_def = dict(pp_def_raw)
716 when = pp_def.pop('when', 'post_process')
717 self.add_post_processor(
718 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
719 when=when)
720
721 self._setup_opener()
722 register_socks_protocols()
723
724 def preload_download_archive(fn):
725 """Preload the archive, if any is specified"""
726 if fn is None:
727 return False
728 self.write_debug(f'Loading archive file {fn!r}')
729 try:
730 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
731 for line in archive_file:
732 self.archive.add(line.strip())
733 except OSError as ioe:
734 if ioe.errno != errno.ENOENT:
735 raise
736 return False
737 return True
738
739 self.archive = set()
740 preload_download_archive(self.params.get('download_archive'))
741
742 def warn_if_short_id(self, argv):
743 # short YouTube ID starting with dash?
744 idxs = [
745 i for i, a in enumerate(argv)
746 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
747 if idxs:
748 correct_argv = (
749 ['yt-dlp']
750 + [a for i, a in enumerate(argv) if i not in idxs]
751 + ['--'] + [argv[i] for i in idxs]
752 )
753 self.report_warning(
754 'Long argument string detected. '
755 'Use -- to separate parameters and URLs, like this:\n%s' %
756 args_to_str(correct_argv))
757
758 def add_info_extractor(self, ie):
759 """Add an InfoExtractor object to the end of the list."""
760 ie_key = ie.ie_key()
761 self._ies[ie_key] = ie
762 if not isinstance(ie, type):
763 self._ies_instances[ie_key] = ie
764 ie.set_downloader(self)
765
766 def get_info_extractor(self, ie_key):
767 """
768 Get an instance of an IE with name ie_key, it will try to get one from
769 the _ies list, if there's no instance it will create a new one and add
770 it to the extractor list.
771 """
772 ie = self._ies_instances.get(ie_key)
773 if ie is None:
774 ie = get_info_extractor(ie_key)()
775 self.add_info_extractor(ie)
776 return ie
777
778 def add_default_info_extractors(self):
779 """
780 Add the InfoExtractors returned by gen_extractors to the end of the list
781 """
782 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
783 all_ies['end'] = UnsupportedURLIE()
784 try:
785 ie_names = orderedSet_from_options(
786 self.params.get('allowed_extractors', ['default']), {
787 'all': list(all_ies),
788 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
789 }, use_regex=True)
790 except re.error as e:
791 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
792 for name in ie_names:
793 self.add_info_extractor(all_ies[name])
794 self.write_debug(f'Loaded {len(ie_names)} extractors')
795
796 def add_post_processor(self, pp, when='post_process'):
797 """Add a PostProcessor object to the end of the chain."""
798 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
799 self._pps[when].append(pp)
800 pp.set_downloader(self)
801
802 def add_post_hook(self, ph):
803 """Add the post hook"""
804 self._post_hooks.append(ph)
805
806 def add_progress_hook(self, ph):
807 """Add the download progress hook"""
808 self._progress_hooks.append(ph)
809
810 def add_postprocessor_hook(self, ph):
811 """Add the postprocessing progress hook"""
812 self._postprocessor_hooks.append(ph)
813 for pps in self._pps.values():
814 for pp in pps:
815 pp.add_progress_hook(ph)
816
817 def _bidi_workaround(self, message):
818 if not hasattr(self, '_output_channel'):
819 return message
820
821 assert hasattr(self, '_output_process')
822 assert isinstance(message, str)
823 line_count = message.count('\n') + 1
824 self._output_process.stdin.write((message + '\n').encode())
825 self._output_process.stdin.flush()
826 res = ''.join(self._output_channel.readline().decode()
827 for _ in range(line_count))
828 return res[:-len('\n')]
829
830 def _write_string(self, message, out=None, only_once=False):
831 if only_once:
832 if message in self._printed_messages:
833 return
834 self._printed_messages.add(message)
835 write_string(message, out=out, encoding=self.params.get('encoding'))
836
837 def to_stdout(self, message, skip_eol=False, quiet=None):
838 """Print message to stdout"""
839 if quiet is not None:
840 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
841 'Use "YoutubeDL.to_screen" instead')
842 if skip_eol is not False:
843 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
844 'Use "YoutubeDL.to_screen" instead')
845 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
846
847 def to_screen(self, message, skip_eol=False, quiet=None):
848 """Print message to screen if not in quiet mode"""
849 if self.params.get('logger'):
850 self.params['logger'].debug(message)
851 return
852 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
853 return
854 self._write_string(
855 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
856 self._out_files.screen)
857
858 def to_stderr(self, message, only_once=False):
859 """Print message to stderr"""
860 assert isinstance(message, str)
861 if self.params.get('logger'):
862 self.params['logger'].error(message)
863 else:
864 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
865
866 def _send_console_code(self, code):
867 if compat_os_name == 'nt' or not self._out_files.console:
868 return
869 self._write_string(code, self._out_files.console)
870
871 def to_console_title(self, message):
872 if not self.params.get('consoletitle', False):
873 return
874 message = remove_terminal_sequences(message)
875 if compat_os_name == 'nt':
876 if ctypes.windll.kernel32.GetConsoleWindow():
877 # c_wchar_p() might not be necessary if `message` is
878 # already of type unicode()
879 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
880 else:
881 self._send_console_code(f'\033]0;{message}\007')
882
883 def save_console_title(self):
884 if not self.params.get('consoletitle') or self.params.get('simulate'):
885 return
886 self._send_console_code('\033[22;0t') # Save the title on stack
887
888 def restore_console_title(self):
889 if not self.params.get('consoletitle') or self.params.get('simulate'):
890 return
891 self._send_console_code('\033[23;0t') # Restore the title from stack
892
893 def __enter__(self):
894 self.save_console_title()
895 return self
896
897 def __exit__(self, *args):
898 self.restore_console_title()
899
900 if self.params.get('cookiefile') is not None:
901 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
902
903 def trouble(self, message=None, tb=None, is_error=True):
904 """Determine action to take when a download problem appears.
905
906 Depending on if the downloader has been configured to ignore
907 download errors or not, this method may throw an exception or
908 not when errors are found, after printing the message.
909
910 @param tb If given, is additional traceback information
911 @param is_error Whether to raise error according to ignorerrors
912 """
913 if message is not None:
914 self.to_stderr(message)
915 if self.params.get('verbose'):
916 if tb is None:
917 if sys.exc_info()[0]: # if .trouble has been called from an except block
918 tb = ''
919 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
920 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
921 tb += encode_compat_str(traceback.format_exc())
922 else:
923 tb_data = traceback.format_list(traceback.extract_stack())
924 tb = ''.join(tb_data)
925 if tb:
926 self.to_stderr(tb)
927 if not is_error:
928 return
929 if not self.params.get('ignoreerrors'):
930 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
931 exc_info = sys.exc_info()[1].exc_info
932 else:
933 exc_info = sys.exc_info()
934 raise DownloadError(message, exc_info)
935 self._download_retcode = 1
936
937 Styles = Namespace(
938 HEADERS='yellow',
939 EMPHASIS='light blue',
940 FILENAME='green',
941 ID='green',
942 DELIM='blue',
943 ERROR='red',
944 WARNING='yellow',
945 SUPPRESS='light black',
946 )
947
948 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
949 text = str(text)
950 if test_encoding:
951 original_text = text
952 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
953 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
954 text = text.encode(encoding, 'ignore').decode(encoding)
955 if fallback is not None and text != original_text:
956 text = fallback
957 return format_text(text, f) if allow_colors else text if fallback is None else fallback
958
959 def _format_out(self, *args, **kwargs):
960 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
961
962 def _format_screen(self, *args, **kwargs):
963 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
964
965 def _format_err(self, *args, **kwargs):
966 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
967
968 def report_warning(self, message, only_once=False):
969 '''
970 Print the message to stderr, it will be prefixed with 'WARNING:'
971 If stderr is a tty file the 'WARNING:' will be colored
972 '''
973 if self.params.get('logger') is not None:
974 self.params['logger'].warning(message)
975 else:
976 if self.params.get('no_warnings'):
977 return
978 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
979
980 def deprecation_warning(self, message, *, stacklevel=0):
981 deprecation_warning(
982 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
983
984 def deprecated_feature(self, message):
985 if self.params.get('logger') is not None:
986 self.params['logger'].warning(f'Deprecated Feature: {message}')
987 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
988
989 def report_error(self, message, *args, **kwargs):
990 '''
991 Do the same as trouble, but prefixes the message with 'ERROR:', colored
992 in red if stderr is a tty file.
993 '''
994 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
995
996 def write_debug(self, message, only_once=False):
997 '''Log debug message or Print message to stderr'''
998 if not self.params.get('verbose', False):
999 return
1000 message = f'[debug] {message}'
1001 if self.params.get('logger'):
1002 self.params['logger'].debug(message)
1003 else:
1004 self.to_stderr(message, only_once)
1005
1006 def report_file_already_downloaded(self, file_name):
1007 """Report file has already been fully downloaded."""
1008 try:
1009 self.to_screen('[download] %s has already been downloaded' % file_name)
1010 except UnicodeEncodeError:
1011 self.to_screen('[download] The file has already been downloaded')
1012
1013 def report_file_delete(self, file_name):
1014 """Report that existing file will be deleted."""
1015 try:
1016 self.to_screen('Deleting existing file %s' % file_name)
1017 except UnicodeEncodeError:
1018 self.to_screen('Deleting existing file')
1019
1020 def raise_no_formats(self, info, forced=False, *, msg=None):
1021 has_drm = info.get('_has_drm')
1022 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1023 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1024 if forced or not ignored:
1025 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1026 expected=has_drm or ignored or expected)
1027 else:
1028 self.report_warning(msg)
1029
1030 def parse_outtmpl(self):
1031 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1032 self._parse_outtmpl()
1033 return self.params['outtmpl']
1034
1035 def _parse_outtmpl(self):
1036 sanitize = IDENTITY
1037 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1038 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1039
1040 outtmpl = self.params.setdefault('outtmpl', {})
1041 if not isinstance(outtmpl, dict):
1042 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1043 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1044
1045 def get_output_path(self, dir_type='', filename=None):
1046 paths = self.params.get('paths', {})
1047 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1048 path = os.path.join(
1049 expand_path(paths.get('home', '').strip()),
1050 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1051 filename or '')
1052 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1053
1054 @staticmethod
1055 def _outtmpl_expandpath(outtmpl):
1056 # expand_path translates '%%' into '%' and '$$' into '$'
1057 # correspondingly that is not what we want since we need to keep
1058 # '%%' intact for template dict substitution step. Working around
1059 # with boundary-alike separator hack.
1060 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1061 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1062
1063 # outtmpl should be expand_path'ed before template dict substitution
1064 # because meta fields may contain env variables we don't want to
1065 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1066 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1067 return expand_path(outtmpl).replace(sep, '')
1068
1069 @staticmethod
1070 def escape_outtmpl(outtmpl):
1071 ''' Escape any remaining strings like %s, %abc% etc. '''
1072 return re.sub(
1073 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1074 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1075 outtmpl)
1076
1077 @classmethod
1078 def validate_outtmpl(cls, outtmpl):
1079 ''' @return None or Exception object '''
1080 outtmpl = re.sub(
1081 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1082 lambda mobj: f'{mobj.group(0)[:-1]}s',
1083 cls._outtmpl_expandpath(outtmpl))
1084 try:
1085 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1086 return None
1087 except ValueError as err:
1088 return err
1089
1090 @staticmethod
1091 def _copy_infodict(info_dict):
1092 info_dict = dict(info_dict)
1093 info_dict.pop('__postprocessors', None)
1094 info_dict.pop('__pending_error', None)
1095 return info_dict
1096
1097 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1098 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1099 @param sanitize Whether to sanitize the output as a filename.
1100 For backward compatibility, a function can also be passed
1101 """
1102
1103 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1104
1105 info_dict = self._copy_infodict(info_dict)
1106 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1107 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1108 if info_dict.get('duration', None) is not None
1109 else None)
1110 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1111 info_dict['video_autonumber'] = self._num_videos
1112 if info_dict.get('resolution') is None:
1113 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1114
1115 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1116 # of %(field)s to %(field)0Nd for backward compatibility
1117 field_size_compat_map = {
1118 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1119 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1120 'autonumber': self.params.get('autonumber_size') or 5,
1121 }
1122
1123 TMPL_DICT = {}
1124 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1125 MATH_FUNCTIONS = {
1126 '+': float.__add__,
1127 '-': float.__sub__,
1128 }
1129 # Field is of the form key1.key2...
1130 # where keys (except first) can be string, int or slice
1131 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1132 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1133 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1134 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1135 (?P<negate>-)?
1136 (?P<fields>{FIELD_RE})
1137 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1138 (?:>(?P<strf_format>.+?))?
1139 (?P<remaining>
1140 (?P<alternate>(?<!\\),[^|&)]+)?
1141 (?:&(?P<replacement>.*?))?
1142 (?:\|(?P<default>.*?))?
1143 )$''')
1144
1145 def _traverse_infodict(k):
1146 k = k.split('.')
1147 if k[0] == '':
1148 k.pop(0)
1149 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1150
1151 def get_value(mdict):
1152 # Object traversal
1153 value = _traverse_infodict(mdict['fields'])
1154 # Negative
1155 if mdict['negate']:
1156 value = float_or_none(value)
1157 if value is not None:
1158 value *= -1
1159 # Do maths
1160 offset_key = mdict['maths']
1161 if offset_key:
1162 value = float_or_none(value)
1163 operator = None
1164 while offset_key:
1165 item = re.match(
1166 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1167 offset_key).group(0)
1168 offset_key = offset_key[len(item):]
1169 if operator is None:
1170 operator = MATH_FUNCTIONS[item]
1171 continue
1172 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1173 offset = float_or_none(item)
1174 if offset is None:
1175 offset = float_or_none(_traverse_infodict(item))
1176 try:
1177 value = operator(value, multiplier * offset)
1178 except (TypeError, ZeroDivisionError):
1179 return None
1180 operator = None
1181 # Datetime formatting
1182 if mdict['strf_format']:
1183 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1184
1185 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1186 if sanitize and value == '':
1187 value = None
1188 return value
1189
1190 na = self.params.get('outtmpl_na_placeholder', 'NA')
1191
1192 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1193 return sanitize_filename(str(value), restricted=restricted, is_id=(
1194 bool(re.search(r'(^|[_.])id(\.|$)', key))
1195 if 'filename-sanitization' in self.params['compat_opts']
1196 else NO_DEFAULT))
1197
1198 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1199 sanitize = bool(sanitize)
1200
1201 def _dumpjson_default(obj):
1202 if isinstance(obj, (set, LazyList)):
1203 return list(obj)
1204 return repr(obj)
1205
1206 def create_key(outer_mobj):
1207 if not outer_mobj.group('has_key'):
1208 return outer_mobj.group(0)
1209 key = outer_mobj.group('key')
1210 mobj = re.match(INTERNAL_FORMAT_RE, key)
1211 initial_field = mobj.group('fields') if mobj else ''
1212 value, replacement, default = None, None, na
1213 while mobj:
1214 mobj = mobj.groupdict()
1215 default = mobj['default'] if mobj['default'] is not None else default
1216 value = get_value(mobj)
1217 replacement = mobj['replacement']
1218 if value is None and mobj['alternate']:
1219 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1220 else:
1221 break
1222
1223 fmt = outer_mobj.group('format')
1224 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1225 fmt = f'0{field_size_compat_map[key]:d}d'
1226
1227 value = default if value is None else value if replacement is None else replacement
1228
1229 flags = outer_mobj.group('conversion') or ''
1230 str_fmt = f'{fmt[:-1]}s'
1231 if fmt[-1] == 'l': # list
1232 delim = '\n' if '#' in flags else ', '
1233 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1234 elif fmt[-1] == 'j': # json
1235 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1236 elif fmt[-1] == 'h': # html
1237 value, fmt = escapeHTML(value), str_fmt
1238 elif fmt[-1] == 'q': # quoted
1239 value = map(str, variadic(value) if '#' in flags else [value])
1240 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1241 elif fmt[-1] == 'B': # bytes
1242 value = f'%{str_fmt}'.encode() % str(value).encode()
1243 value, fmt = value.decode('utf-8', 'ignore'), 's'
1244 elif fmt[-1] == 'U': # unicode normalized
1245 value, fmt = unicodedata.normalize(
1246 # "+" = compatibility equivalence, "#" = NFD
1247 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1248 value), str_fmt
1249 elif fmt[-1] == 'D': # decimal suffix
1250 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1251 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1252 factor=1024 if '#' in flags else 1000)
1253 elif fmt[-1] == 'S': # filename sanitization
1254 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1255 elif fmt[-1] == 'c':
1256 if value:
1257 value = str(value)[0]
1258 else:
1259 fmt = str_fmt
1260 elif fmt[-1] not in 'rs': # numeric
1261 value = float_or_none(value)
1262 if value is None:
1263 value, fmt = default, 's'
1264
1265 if sanitize:
1266 if fmt[-1] == 'r':
1267 # If value is an object, sanitize might convert it to a string
1268 # So we convert it to repr first
1269 value, fmt = repr(value), str_fmt
1270 if fmt[-1] in 'csr':
1271 value = sanitizer(initial_field, value)
1272
1273 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1274 TMPL_DICT[key] = value
1275 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1276
1277 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1278
1279 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1280 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1281 return self.escape_outtmpl(outtmpl) % info_dict
1282
1283 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1284 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1285 if outtmpl is None:
1286 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1287 try:
1288 outtmpl = self._outtmpl_expandpath(outtmpl)
1289 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1290 if not filename:
1291 return None
1292
1293 if tmpl_type in ('', 'temp'):
1294 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1295 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1296 filename = replace_extension(filename, ext, final_ext)
1297 elif tmpl_type:
1298 force_ext = OUTTMPL_TYPES[tmpl_type]
1299 if force_ext:
1300 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1301
1302 # https://github.com/blackjack4494/youtube-dlc/issues/85
1303 trim_file_name = self.params.get('trim_file_name', False)
1304 if trim_file_name:
1305 no_ext, *ext = filename.rsplit('.', 2)
1306 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1307
1308 return filename
1309 except ValueError as err:
1310 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1311 return None
1312
1313 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1314 """Generate the output filename"""
1315 if outtmpl:
1316 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1317 dir_type = None
1318 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1319 if not filename and dir_type not in ('', 'temp'):
1320 return ''
1321
1322 if warn:
1323 if not self.params.get('paths'):
1324 pass
1325 elif filename == '-':
1326 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1327 elif os.path.isabs(filename):
1328 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1329 if filename == '-' or not filename:
1330 return filename
1331
1332 return self.get_output_path(dir_type, filename)
1333
1334 def _match_entry(self, info_dict, incomplete=False, silent=False):
1335 """ Returns None if the file should be downloaded """
1336
1337 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1338
1339 def check_filter():
1340 if 'title' in info_dict:
1341 # This can happen when we're just evaluating the playlist
1342 title = info_dict['title']
1343 matchtitle = self.params.get('matchtitle', False)
1344 if matchtitle:
1345 if not re.search(matchtitle, title, re.IGNORECASE):
1346 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1347 rejecttitle = self.params.get('rejecttitle', False)
1348 if rejecttitle:
1349 if re.search(rejecttitle, title, re.IGNORECASE):
1350 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1351 date = info_dict.get('upload_date')
1352 if date is not None:
1353 dateRange = self.params.get('daterange', DateRange())
1354 if date not in dateRange:
1355 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1356 view_count = info_dict.get('view_count')
1357 if view_count is not None:
1358 min_views = self.params.get('min_views')
1359 if min_views is not None and view_count < min_views:
1360 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1361 max_views = self.params.get('max_views')
1362 if max_views is not None and view_count > max_views:
1363 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1364 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1365 return 'Skipping "%s" because it is age restricted' % video_title
1366
1367 match_filter = self.params.get('match_filter')
1368 if match_filter is not None:
1369 try:
1370 ret = match_filter(info_dict, incomplete=incomplete)
1371 except TypeError:
1372 # For backward compatibility
1373 ret = None if incomplete else match_filter(info_dict)
1374 if ret is NO_DEFAULT:
1375 while True:
1376 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1377 reply = input(self._format_screen(
1378 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1379 if reply in {'y', ''}:
1380 return None
1381 elif reply == 'n':
1382 return f'Skipping {video_title}'
1383 elif ret is not None:
1384 return ret
1385 return None
1386
1387 if self.in_download_archive(info_dict):
1388 reason = '%s has already been recorded in the archive' % video_title
1389 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1390 else:
1391 reason = check_filter()
1392 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1393 if reason is not None:
1394 if not silent:
1395 self.to_screen('[download] ' + reason)
1396 if self.params.get(break_opt, False):
1397 raise break_err()
1398 return reason
1399
1400 @staticmethod
1401 def add_extra_info(info_dict, extra_info):
1402 '''Set the keys from extra_info in info dict if they are missing'''
1403 for key, value in extra_info.items():
1404 info_dict.setdefault(key, value)
1405
1406 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1407 process=True, force_generic_extractor=False):
1408 """
1409 Return a list with a dictionary for each video extracted.
1410
1411 Arguments:
1412 url -- URL to extract
1413
1414 Keyword arguments:
1415 download -- whether to download videos during extraction
1416 ie_key -- extractor key hint
1417 extra_info -- dictionary containing the extra values to add to each result
1418 process -- whether to resolve all unresolved references (URLs, playlist items),
1419 must be True for download to work.
1420 force_generic_extractor -- force using the generic extractor
1421 """
1422
1423 if extra_info is None:
1424 extra_info = {}
1425
1426 if not ie_key and force_generic_extractor:
1427 ie_key = 'Generic'
1428
1429 if ie_key:
1430 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1431 else:
1432 ies = self._ies
1433
1434 for key, ie in ies.items():
1435 if not ie.suitable(url):
1436 continue
1437
1438 if not ie.working():
1439 self.report_warning('The program functionality for this site has been marked as broken, '
1440 'and will probably not work.')
1441
1442 temp_id = ie.get_temp_id(url)
1443 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1444 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1445 if self.params.get('break_on_existing', False):
1446 raise ExistingVideoReached()
1447 break
1448 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1449 else:
1450 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1451 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1452 tb=False if extractors_restricted else None)
1453
1454 def _handle_extraction_exceptions(func):
1455 @functools.wraps(func)
1456 def wrapper(self, *args, **kwargs):
1457 while True:
1458 try:
1459 return func(self, *args, **kwargs)
1460 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1461 raise
1462 except ReExtractInfo as e:
1463 if e.expected:
1464 self.to_screen(f'{e}; Re-extracting data')
1465 else:
1466 self.to_stderr('\r')
1467 self.report_warning(f'{e}; Re-extracting data')
1468 continue
1469 except GeoRestrictedError as e:
1470 msg = e.msg
1471 if e.countries:
1472 msg += '\nThis video is available in %s.' % ', '.join(
1473 map(ISO3166Utils.short2full, e.countries))
1474 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1475 self.report_error(msg)
1476 except ExtractorError as e: # An error we somewhat expected
1477 self.report_error(str(e), e.format_traceback())
1478 except Exception as e:
1479 if self.params.get('ignoreerrors'):
1480 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1481 else:
1482 raise
1483 break
1484 return wrapper
1485
1486 def _wait_for_video(self, ie_result={}):
1487 if (not self.params.get('wait_for_video')
1488 or ie_result.get('_type', 'video') != 'video'
1489 or ie_result.get('formats') or ie_result.get('url')):
1490 return
1491
1492 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1493 last_msg = ''
1494
1495 def progress(msg):
1496 nonlocal last_msg
1497 full_msg = f'{msg}\n'
1498 if not self.params.get('noprogress'):
1499 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1500 elif last_msg:
1501 return
1502 self.to_screen(full_msg, skip_eol=True)
1503 last_msg = msg
1504
1505 min_wait, max_wait = self.params.get('wait_for_video')
1506 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1507 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1508 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1509 self.report_warning('Release time of video is not known')
1510 elif ie_result and (diff or 0) <= 0:
1511 self.report_warning('Video should already be available according to extracted info')
1512 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1513 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1514
1515 wait_till = time.time() + diff
1516 try:
1517 while True:
1518 diff = wait_till - time.time()
1519 if diff <= 0:
1520 progress('')
1521 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1522 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1523 time.sleep(1)
1524 except KeyboardInterrupt:
1525 progress('')
1526 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1527 except BaseException as e:
1528 if not isinstance(e, ReExtractInfo):
1529 self.to_screen('')
1530 raise
1531
1532 @_handle_extraction_exceptions
1533 def __extract_info(self, url, ie, download, extra_info, process):
1534 try:
1535 ie_result = ie.extract(url)
1536 except UserNotLive as e:
1537 if process:
1538 if self.params.get('wait_for_video'):
1539 self.report_warning(e)
1540 self._wait_for_video()
1541 raise
1542 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1543 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1544 return
1545 if isinstance(ie_result, list):
1546 # Backwards compatibility: old IE result format
1547 ie_result = {
1548 '_type': 'compat_list',
1549 'entries': ie_result,
1550 }
1551 if extra_info.get('original_url'):
1552 ie_result.setdefault('original_url', extra_info['original_url'])
1553 self.add_default_extra_info(ie_result, ie, url)
1554 if process:
1555 self._wait_for_video(ie_result)
1556 return self.process_ie_result(ie_result, download, extra_info)
1557 else:
1558 return ie_result
1559
1560 def add_default_extra_info(self, ie_result, ie, url):
1561 if url is not None:
1562 self.add_extra_info(ie_result, {
1563 'webpage_url': url,
1564 'original_url': url,
1565 })
1566 webpage_url = ie_result.get('webpage_url')
1567 if webpage_url:
1568 self.add_extra_info(ie_result, {
1569 'webpage_url_basename': url_basename(webpage_url),
1570 'webpage_url_domain': get_domain(webpage_url),
1571 })
1572 if ie is not None:
1573 self.add_extra_info(ie_result, {
1574 'extractor': ie.IE_NAME,
1575 'extractor_key': ie.ie_key(),
1576 })
1577
1578 def process_ie_result(self, ie_result, download=True, extra_info=None):
1579 """
1580 Take the result of the ie(may be modified) and resolve all unresolved
1581 references (URLs, playlist items).
1582
1583 It will also download the videos if 'download'.
1584 Returns the resolved ie_result.
1585 """
1586 if extra_info is None:
1587 extra_info = {}
1588 result_type = ie_result.get('_type', 'video')
1589
1590 if result_type in ('url', 'url_transparent'):
1591 ie_result['url'] = sanitize_url(
1592 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1593 if ie_result.get('original_url'):
1594 extra_info.setdefault('original_url', ie_result['original_url'])
1595
1596 extract_flat = self.params.get('extract_flat', False)
1597 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1598 or extract_flat is True):
1599 info_copy = ie_result.copy()
1600 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1601 if ie and not ie_result.get('id'):
1602 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1603 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1604 self.add_extra_info(info_copy, extra_info)
1605 info_copy, _ = self.pre_process(info_copy)
1606 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1607 self._raise_pending_errors(info_copy)
1608 if self.params.get('force_write_download_archive', False):
1609 self.record_download_archive(info_copy)
1610 return ie_result
1611
1612 if result_type == 'video':
1613 self.add_extra_info(ie_result, extra_info)
1614 ie_result = self.process_video_result(ie_result, download=download)
1615 self._raise_pending_errors(ie_result)
1616 additional_urls = (ie_result or {}).get('additional_urls')
1617 if additional_urls:
1618 # TODO: Improve MetadataParserPP to allow setting a list
1619 if isinstance(additional_urls, str):
1620 additional_urls = [additional_urls]
1621 self.to_screen(
1622 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1623 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1624 ie_result['additional_entries'] = [
1625 self.extract_info(
1626 url, download, extra_info=extra_info,
1627 force_generic_extractor=self.params.get('force_generic_extractor'))
1628 for url in additional_urls
1629 ]
1630 return ie_result
1631 elif result_type == 'url':
1632 # We have to add extra_info to the results because it may be
1633 # contained in a playlist
1634 return self.extract_info(
1635 ie_result['url'], download,
1636 ie_key=ie_result.get('ie_key'),
1637 extra_info=extra_info)
1638 elif result_type == 'url_transparent':
1639 # Use the information from the embedding page
1640 info = self.extract_info(
1641 ie_result['url'], ie_key=ie_result.get('ie_key'),
1642 extra_info=extra_info, download=False, process=False)
1643
1644 # extract_info may return None when ignoreerrors is enabled and
1645 # extraction failed with an error, don't crash and return early
1646 # in this case
1647 if not info:
1648 return info
1649
1650 exempted_fields = {'_type', 'url', 'ie_key'}
1651 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1652 # For video clips, the id etc of the clip extractor should be used
1653 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1654
1655 new_result = info.copy()
1656 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1657
1658 # Extracted info may not be a video result (i.e.
1659 # info.get('_type', 'video') != video) but rather an url or
1660 # url_transparent. In such cases outer metadata (from ie_result)
1661 # should be propagated to inner one (info). For this to happen
1662 # _type of info should be overridden with url_transparent. This
1663 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1664 if new_result.get('_type') == 'url':
1665 new_result['_type'] = 'url_transparent'
1666
1667 return self.process_ie_result(
1668 new_result, download=download, extra_info=extra_info)
1669 elif result_type in ('playlist', 'multi_video'):
1670 # Protect from infinite recursion due to recursively nested playlists
1671 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1672 webpage_url = ie_result['webpage_url']
1673 if webpage_url in self._playlist_urls:
1674 self.to_screen(
1675 '[download] Skipping already downloaded playlist: %s'
1676 % ie_result.get('title') or ie_result.get('id'))
1677 return
1678
1679 self._playlist_level += 1
1680 self._playlist_urls.add(webpage_url)
1681 self._fill_common_fields(ie_result, False)
1682 self._sanitize_thumbnails(ie_result)
1683 try:
1684 return self.__process_playlist(ie_result, download)
1685 finally:
1686 self._playlist_level -= 1
1687 if not self._playlist_level:
1688 self._playlist_urls.clear()
1689 elif result_type == 'compat_list':
1690 self.report_warning(
1691 'Extractor %s returned a compat_list result. '
1692 'It needs to be updated.' % ie_result.get('extractor'))
1693
1694 def _fixup(r):
1695 self.add_extra_info(r, {
1696 'extractor': ie_result['extractor'],
1697 'webpage_url': ie_result['webpage_url'],
1698 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1699 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1700 'extractor_key': ie_result['extractor_key'],
1701 })
1702 return r
1703 ie_result['entries'] = [
1704 self.process_ie_result(_fixup(r), download, extra_info)
1705 for r in ie_result['entries']
1706 ]
1707 return ie_result
1708 else:
1709 raise Exception('Invalid result type: %s' % result_type)
1710
1711 def _ensure_dir_exists(self, path):
1712 return make_dir(path, self.report_error)
1713
1714 @staticmethod
1715 def _playlist_infodict(ie_result, strict=False, **kwargs):
1716 info = {
1717 'playlist_count': ie_result.get('playlist_count'),
1718 'playlist': ie_result.get('title') or ie_result.get('id'),
1719 'playlist_id': ie_result.get('id'),
1720 'playlist_title': ie_result.get('title'),
1721 'playlist_uploader': ie_result.get('uploader'),
1722 'playlist_uploader_id': ie_result.get('uploader_id'),
1723 **kwargs,
1724 }
1725 if strict:
1726 return info
1727 return {
1728 **info,
1729 'playlist_index': 0,
1730 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1731 'extractor': ie_result['extractor'],
1732 'webpage_url': ie_result['webpage_url'],
1733 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1734 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1735 'extractor_key': ie_result['extractor_key'],
1736 }
1737
1738 def __process_playlist(self, ie_result, download):
1739 """Process each entry in the playlist"""
1740 assert ie_result['_type'] in ('playlist', 'multi_video')
1741
1742 common_info = self._playlist_infodict(ie_result, strict=True)
1743 title = common_info.get('playlist') or '<Untitled>'
1744 if self._match_entry(common_info, incomplete=True) is not None:
1745 return
1746 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1747
1748 all_entries = PlaylistEntries(self, ie_result)
1749 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1750
1751 lazy = self.params.get('lazy_playlist')
1752 if lazy:
1753 resolved_entries, n_entries = [], 'N/A'
1754 ie_result['requested_entries'], ie_result['entries'] = None, None
1755 else:
1756 entries = resolved_entries = list(entries)
1757 n_entries = len(resolved_entries)
1758 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1759 if not ie_result.get('playlist_count'):
1760 # Better to do this after potentially exhausting entries
1761 ie_result['playlist_count'] = all_entries.get_full_count()
1762
1763 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1764 ie_copy = collections.ChainMap(ie_result, extra)
1765
1766 _infojson_written = False
1767 write_playlist_files = self.params.get('allow_playlist_files', True)
1768 if write_playlist_files and self.params.get('list_thumbnails'):
1769 self.list_thumbnails(ie_result)
1770 if write_playlist_files and not self.params.get('simulate'):
1771 _infojson_written = self._write_info_json(
1772 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1773 if _infojson_written is None:
1774 return
1775 if self._write_description('playlist', ie_result,
1776 self.prepare_filename(ie_copy, 'pl_description')) is None:
1777 return
1778 # TODO: This should be passed to ThumbnailsConvertor if necessary
1779 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1780
1781 if lazy:
1782 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1783 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1784 elif self.params.get('playlistreverse'):
1785 entries.reverse()
1786 elif self.params.get('playlistrandom'):
1787 random.shuffle(entries)
1788
1789 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
1790 f'{format_field(ie_result, "playlist_count", " of %s")}')
1791
1792 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1793 if self.params.get('extract_flat') == 'discard_in_playlist':
1794 keep_resolved_entries = ie_result['_type'] != 'playlist'
1795 if keep_resolved_entries:
1796 self.write_debug('The information of all playlist entries will be held in memory')
1797
1798 failures = 0
1799 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1800 for i, (playlist_index, entry) in enumerate(entries):
1801 if lazy:
1802 resolved_entries.append((playlist_index, entry))
1803 if not entry:
1804 continue
1805
1806 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1807 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1808 playlist_index = ie_result['requested_entries'][i]
1809
1810 entry_copy = collections.ChainMap(entry, {
1811 **common_info,
1812 'n_entries': int_or_none(n_entries),
1813 'playlist_index': playlist_index,
1814 'playlist_autonumber': i + 1,
1815 })
1816
1817 if self._match_entry(entry_copy, incomplete=True) is not None:
1818 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1819 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1820 continue
1821
1822 self.to_screen('[download] Downloading video %s of %s' % (
1823 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1824
1825 extra.update({
1826 'playlist_index': playlist_index,
1827 'playlist_autonumber': i + 1,
1828 })
1829 entry_result = self.__process_iterable_entry(entry, download, extra)
1830 if not entry_result:
1831 failures += 1
1832 if failures >= max_failures:
1833 self.report_error(
1834 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1835 break
1836 if keep_resolved_entries:
1837 resolved_entries[i] = (playlist_index, entry_result)
1838
1839 # Update with processed data
1840 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1841 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1842
1843 # Write the updated info to json
1844 if _infojson_written is True and self._write_info_json(
1845 'updated playlist', ie_result,
1846 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1847 return
1848
1849 ie_result = self.run_all_pps('playlist', ie_result)
1850 self.to_screen(f'[download] Finished downloading playlist: {title}')
1851 return ie_result
1852
1853 @_handle_extraction_exceptions
1854 def __process_iterable_entry(self, entry, download, extra_info):
1855 return self.process_ie_result(
1856 entry, download=download, extra_info=extra_info)
1857
1858 def _build_format_filter(self, filter_spec):
1859 " Returns a function to filter the formats according to the filter_spec "
1860
1861 OPERATORS = {
1862 '<': operator.lt,
1863 '<=': operator.le,
1864 '>': operator.gt,
1865 '>=': operator.ge,
1866 '=': operator.eq,
1867 '!=': operator.ne,
1868 }
1869 operator_rex = re.compile(r'''(?x)\s*
1870 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1871 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1872 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1873 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1874 m = operator_rex.fullmatch(filter_spec)
1875 if m:
1876 try:
1877 comparison_value = int(m.group('value'))
1878 except ValueError:
1879 comparison_value = parse_filesize(m.group('value'))
1880 if comparison_value is None:
1881 comparison_value = parse_filesize(m.group('value') + 'B')
1882 if comparison_value is None:
1883 raise ValueError(
1884 'Invalid value %r in format specification %r' % (
1885 m.group('value'), filter_spec))
1886 op = OPERATORS[m.group('op')]
1887
1888 if not m:
1889 STR_OPERATORS = {
1890 '=': operator.eq,
1891 '^=': lambda attr, value: attr.startswith(value),
1892 '$=': lambda attr, value: attr.endswith(value),
1893 '*=': lambda attr, value: value in attr,
1894 '~=': lambda attr, value: value.search(attr) is not None
1895 }
1896 str_operator_rex = re.compile(r'''(?x)\s*
1897 (?P<key>[a-zA-Z0-9._-]+)\s*
1898 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1899 (?P<quote>["'])?
1900 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1901 (?(quote)(?P=quote))\s*
1902 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1903 m = str_operator_rex.fullmatch(filter_spec)
1904 if m:
1905 if m.group('op') == '~=':
1906 comparison_value = re.compile(m.group('value'))
1907 else:
1908 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1909 str_op = STR_OPERATORS[m.group('op')]
1910 if m.group('negation'):
1911 op = lambda attr, value: not str_op(attr, value)
1912 else:
1913 op = str_op
1914
1915 if not m:
1916 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1917
1918 def _filter(f):
1919 actual_value = f.get(m.group('key'))
1920 if actual_value is None:
1921 return m.group('none_inclusive')
1922 return op(actual_value, comparison_value)
1923 return _filter
1924
1925 def _check_formats(self, formats):
1926 for f in formats:
1927 self.to_screen('[info] Testing format %s' % f['format_id'])
1928 path = self.get_output_path('temp')
1929 if not self._ensure_dir_exists(f'{path}/'):
1930 continue
1931 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1932 temp_file.close()
1933 try:
1934 success, _ = self.dl(temp_file.name, f, test=True)
1935 except (DownloadError, OSError, ValueError) + network_exceptions:
1936 success = False
1937 finally:
1938 if os.path.exists(temp_file.name):
1939 try:
1940 os.remove(temp_file.name)
1941 except OSError:
1942 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1943 if success:
1944 yield f
1945 else:
1946 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1947
1948 def _default_format_spec(self, info_dict, download=True):
1949
1950 def can_merge():
1951 merger = FFmpegMergerPP(self)
1952 return merger.available and merger.can_merge()
1953
1954 prefer_best = (
1955 not self.params.get('simulate')
1956 and download
1957 and (
1958 not can_merge()
1959 or info_dict.get('is_live') and not self.params.get('live_from_start')
1960 or self.params['outtmpl']['default'] == '-'))
1961 compat = (
1962 prefer_best
1963 or self.params.get('allow_multiple_audio_streams', False)
1964 or 'format-spec' in self.params['compat_opts'])
1965
1966 return (
1967 'best/bestvideo+bestaudio' if prefer_best
1968 else 'bestvideo*+bestaudio/best' if not compat
1969 else 'bestvideo+bestaudio/best')
1970
1971 def build_format_selector(self, format_spec):
1972 def syntax_error(note, start):
1973 message = (
1974 'Invalid format specification: '
1975 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1976 return SyntaxError(message)
1977
1978 PICKFIRST = 'PICKFIRST'
1979 MERGE = 'MERGE'
1980 SINGLE = 'SINGLE'
1981 GROUP = 'GROUP'
1982 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1983
1984 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1985 'video': self.params.get('allow_multiple_video_streams', False)}
1986
1987 check_formats = self.params.get('check_formats') == 'selected'
1988
1989 def _parse_filter(tokens):
1990 filter_parts = []
1991 for type, string, start, _, _ in tokens:
1992 if type == tokenize.OP and string == ']':
1993 return ''.join(filter_parts)
1994 else:
1995 filter_parts.append(string)
1996
1997 def _remove_unused_ops(tokens):
1998 # Remove operators that we don't use and join them with the surrounding strings.
1999 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2000 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2001 last_string, last_start, last_end, last_line = None, None, None, None
2002 for type, string, start, end, line in tokens:
2003 if type == tokenize.OP and string == '[':
2004 if last_string:
2005 yield tokenize.NAME, last_string, last_start, last_end, last_line
2006 last_string = None
2007 yield type, string, start, end, line
2008 # everything inside brackets will be handled by _parse_filter
2009 for type, string, start, end, line in tokens:
2010 yield type, string, start, end, line
2011 if type == tokenize.OP and string == ']':
2012 break
2013 elif type == tokenize.OP and string in ALLOWED_OPS:
2014 if last_string:
2015 yield tokenize.NAME, last_string, last_start, last_end, last_line
2016 last_string = None
2017 yield type, string, start, end, line
2018 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2019 if not last_string:
2020 last_string = string
2021 last_start = start
2022 last_end = end
2023 else:
2024 last_string += string
2025 if last_string:
2026 yield tokenize.NAME, last_string, last_start, last_end, last_line
2027
2028 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2029 selectors = []
2030 current_selector = None
2031 for type, string, start, _, _ in tokens:
2032 # ENCODING is only defined in python 3.x
2033 if type == getattr(tokenize, 'ENCODING', None):
2034 continue
2035 elif type in [tokenize.NAME, tokenize.NUMBER]:
2036 current_selector = FormatSelector(SINGLE, string, [])
2037 elif type == tokenize.OP:
2038 if string == ')':
2039 if not inside_group:
2040 # ')' will be handled by the parentheses group
2041 tokens.restore_last_token()
2042 break
2043 elif inside_merge and string in ['/', ',']:
2044 tokens.restore_last_token()
2045 break
2046 elif inside_choice and string == ',':
2047 tokens.restore_last_token()
2048 break
2049 elif string == ',':
2050 if not current_selector:
2051 raise syntax_error('"," must follow a format selector', start)
2052 selectors.append(current_selector)
2053 current_selector = None
2054 elif string == '/':
2055 if not current_selector:
2056 raise syntax_error('"/" must follow a format selector', start)
2057 first_choice = current_selector
2058 second_choice = _parse_format_selection(tokens, inside_choice=True)
2059 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2060 elif string == '[':
2061 if not current_selector:
2062 current_selector = FormatSelector(SINGLE, 'best', [])
2063 format_filter = _parse_filter(tokens)
2064 current_selector.filters.append(format_filter)
2065 elif string == '(':
2066 if current_selector:
2067 raise syntax_error('Unexpected "("', start)
2068 group = _parse_format_selection(tokens, inside_group=True)
2069 current_selector = FormatSelector(GROUP, group, [])
2070 elif string == '+':
2071 if not current_selector:
2072 raise syntax_error('Unexpected "+"', start)
2073 selector_1 = current_selector
2074 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2075 if not selector_2:
2076 raise syntax_error('Expected a selector', start)
2077 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2078 else:
2079 raise syntax_error(f'Operator not recognized: "{string}"', start)
2080 elif type == tokenize.ENDMARKER:
2081 break
2082 if current_selector:
2083 selectors.append(current_selector)
2084 return selectors
2085
2086 def _merge(formats_pair):
2087 format_1, format_2 = formats_pair
2088
2089 formats_info = []
2090 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2091 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2092
2093 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2094 get_no_more = {'video': False, 'audio': False}
2095 for (i, fmt_info) in enumerate(formats_info):
2096 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2097 formats_info.pop(i)
2098 continue
2099 for aud_vid in ['audio', 'video']:
2100 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2101 if get_no_more[aud_vid]:
2102 formats_info.pop(i)
2103 break
2104 get_no_more[aud_vid] = True
2105
2106 if len(formats_info) == 1:
2107 return formats_info[0]
2108
2109 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2110 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2111
2112 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2113 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2114
2115 output_ext = get_compatible_ext(
2116 vcodecs=[f.get('vcodec') for f in video_fmts],
2117 acodecs=[f.get('acodec') for f in audio_fmts],
2118 vexts=[f['ext'] for f in video_fmts],
2119 aexts=[f['ext'] for f in audio_fmts],
2120 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2121 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2122
2123 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2124
2125 new_dict = {
2126 'requested_formats': formats_info,
2127 'format': '+'.join(filtered('format')),
2128 'format_id': '+'.join(filtered('format_id')),
2129 'ext': output_ext,
2130 'protocol': '+'.join(map(determine_protocol, formats_info)),
2131 'language': '+'.join(orderedSet(filtered('language'))) or None,
2132 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2133 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2134 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2135 }
2136
2137 if the_only_video:
2138 new_dict.update({
2139 'width': the_only_video.get('width'),
2140 'height': the_only_video.get('height'),
2141 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2142 'fps': the_only_video.get('fps'),
2143 'dynamic_range': the_only_video.get('dynamic_range'),
2144 'vcodec': the_only_video.get('vcodec'),
2145 'vbr': the_only_video.get('vbr'),
2146 'stretched_ratio': the_only_video.get('stretched_ratio'),
2147 })
2148
2149 if the_only_audio:
2150 new_dict.update({
2151 'acodec': the_only_audio.get('acodec'),
2152 'abr': the_only_audio.get('abr'),
2153 'asr': the_only_audio.get('asr'),
2154 'audio_channels': the_only_audio.get('audio_channels')
2155 })
2156
2157 return new_dict
2158
2159 def _check_formats(formats):
2160 if not check_formats:
2161 yield from formats
2162 return
2163 yield from self._check_formats(formats)
2164
2165 def _build_selector_function(selector):
2166 if isinstance(selector, list): # ,
2167 fs = [_build_selector_function(s) for s in selector]
2168
2169 def selector_function(ctx):
2170 for f in fs:
2171 yield from f(ctx)
2172 return selector_function
2173
2174 elif selector.type == GROUP: # ()
2175 selector_function = _build_selector_function(selector.selector)
2176
2177 elif selector.type == PICKFIRST: # /
2178 fs = [_build_selector_function(s) for s in selector.selector]
2179
2180 def selector_function(ctx):
2181 for f in fs:
2182 picked_formats = list(f(ctx))
2183 if picked_formats:
2184 return picked_formats
2185 return []
2186
2187 elif selector.type == MERGE: # +
2188 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2189
2190 def selector_function(ctx):
2191 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2192 yield _merge(pair)
2193
2194 elif selector.type == SINGLE: # atom
2195 format_spec = selector.selector or 'best'
2196
2197 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2198 if format_spec == 'all':
2199 def selector_function(ctx):
2200 yield from _check_formats(ctx['formats'][::-1])
2201 elif format_spec == 'mergeall':
2202 def selector_function(ctx):
2203 formats = list(_check_formats(
2204 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2205 if not formats:
2206 return
2207 merged_format = formats[-1]
2208 for f in formats[-2::-1]:
2209 merged_format = _merge((merged_format, f))
2210 yield merged_format
2211
2212 else:
2213 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2214 mobj = re.match(
2215 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2216 format_spec)
2217 if mobj is not None:
2218 format_idx = int_or_none(mobj.group('n'), default=1)
2219 format_reverse = mobj.group('bw')[0] == 'b'
2220 format_type = (mobj.group('type') or [None])[0]
2221 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2222 format_modified = mobj.group('mod') is not None
2223
2224 format_fallback = not format_type and not format_modified # for b, w
2225 _filter_f = (
2226 (lambda f: f.get('%scodec' % format_type) != 'none')
2227 if format_type and format_modified # bv*, ba*, wv*, wa*
2228 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2229 if format_type # bv, ba, wv, wa
2230 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2231 if not format_modified # b, w
2232 else lambda f: True) # b*, w*
2233 filter_f = lambda f: _filter_f(f) and (
2234 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2235 else:
2236 if format_spec in self._format_selection_exts['audio']:
2237 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2238 elif format_spec in self._format_selection_exts['video']:
2239 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2240 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2241 elif format_spec in self._format_selection_exts['storyboards']:
2242 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2243 else:
2244 filter_f = lambda f: f.get('format_id') == format_spec # id
2245
2246 def selector_function(ctx):
2247 formats = list(ctx['formats'])
2248 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2249 if not matches:
2250 if format_fallback and ctx['incomplete_formats']:
2251 # for extractors with incomplete formats (audio only (soundcloud)
2252 # or video only (imgur)) best/worst will fallback to
2253 # best/worst {video,audio}-only format
2254 matches = formats
2255 elif seperate_fallback and not ctx['has_merged_format']:
2256 # for compatibility with youtube-dl when there is no pre-merged format
2257 matches = list(filter(seperate_fallback, formats))
2258 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2259 try:
2260 yield matches[format_idx - 1]
2261 except LazyList.IndexError:
2262 return
2263
2264 filters = [self._build_format_filter(f) for f in selector.filters]
2265
2266 def final_selector(ctx):
2267 ctx_copy = dict(ctx)
2268 for _filter in filters:
2269 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2270 return selector_function(ctx_copy)
2271 return final_selector
2272
2273 stream = io.BytesIO(format_spec.encode())
2274 try:
2275 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2276 except tokenize.TokenError:
2277 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2278
2279 class TokenIterator:
2280 def __init__(self, tokens):
2281 self.tokens = tokens
2282 self.counter = 0
2283
2284 def __iter__(self):
2285 return self
2286
2287 def __next__(self):
2288 if self.counter >= len(self.tokens):
2289 raise StopIteration()
2290 value = self.tokens[self.counter]
2291 self.counter += 1
2292 return value
2293
2294 next = __next__
2295
2296 def restore_last_token(self):
2297 self.counter -= 1
2298
2299 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2300 return _build_selector_function(parsed_selector)
2301
2302 def _calc_headers(self, info_dict):
2303 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2304
2305 cookies = self._calc_cookies(info_dict['url'])
2306 if cookies:
2307 res['Cookie'] = cookies
2308
2309 if 'X-Forwarded-For' not in res:
2310 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2311 if x_forwarded_for_ip:
2312 res['X-Forwarded-For'] = x_forwarded_for_ip
2313
2314 return res
2315
2316 def _calc_cookies(self, url):
2317 pr = sanitized_Request(url)
2318 self.cookiejar.add_cookie_header(pr)
2319 return pr.get_header('Cookie')
2320
2321 def _sort_thumbnails(self, thumbnails):
2322 thumbnails.sort(key=lambda t: (
2323 t.get('preference') if t.get('preference') is not None else -1,
2324 t.get('width') if t.get('width') is not None else -1,
2325 t.get('height') if t.get('height') is not None else -1,
2326 t.get('id') if t.get('id') is not None else '',
2327 t.get('url')))
2328
2329 def _sanitize_thumbnails(self, info_dict):
2330 thumbnails = info_dict.get('thumbnails')
2331 if thumbnails is None:
2332 thumbnail = info_dict.get('thumbnail')
2333 if thumbnail:
2334 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2335 if not thumbnails:
2336 return
2337
2338 def check_thumbnails(thumbnails):
2339 for t in thumbnails:
2340 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2341 try:
2342 self.urlopen(HEADRequest(t['url']))
2343 except network_exceptions as err:
2344 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2345 continue
2346 yield t
2347
2348 self._sort_thumbnails(thumbnails)
2349 for i, t in enumerate(thumbnails):
2350 if t.get('id') is None:
2351 t['id'] = '%d' % i
2352 if t.get('width') and t.get('height'):
2353 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2354 t['url'] = sanitize_url(t['url'])
2355
2356 if self.params.get('check_formats') is True:
2357 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2358 else:
2359 info_dict['thumbnails'] = thumbnails
2360
2361 def _fill_common_fields(self, info_dict, is_video=True):
2362 # TODO: move sanitization here
2363 if is_video:
2364 # playlists are allowed to lack "title"
2365 title = info_dict.get('title', NO_DEFAULT)
2366 if title is NO_DEFAULT:
2367 raise ExtractorError('Missing "title" field in extractor result',
2368 video_id=info_dict['id'], ie=info_dict['extractor'])
2369 info_dict['fulltitle'] = title
2370 if not title:
2371 if title == '':
2372 self.write_debug('Extractor gave empty title. Creating a generic title')
2373 else:
2374 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2375 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2376
2377 if info_dict.get('duration') is not None:
2378 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2379
2380 for ts_key, date_key in (
2381 ('timestamp', 'upload_date'),
2382 ('release_timestamp', 'release_date'),
2383 ('modified_timestamp', 'modified_date'),
2384 ):
2385 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2386 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2387 # see http://bugs.python.org/issue1646728)
2388 with contextlib.suppress(ValueError, OverflowError, OSError):
2389 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2390 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2391
2392 live_keys = ('is_live', 'was_live')
2393 live_status = info_dict.get('live_status')
2394 if live_status is None:
2395 for key in live_keys:
2396 if info_dict.get(key) is False:
2397 continue
2398 if info_dict.get(key):
2399 live_status = key
2400 break
2401 if all(info_dict.get(key) is False for key in live_keys):
2402 live_status = 'not_live'
2403 if live_status:
2404 info_dict['live_status'] = live_status
2405 for key in live_keys:
2406 if info_dict.get(key) is None:
2407 info_dict[key] = (live_status == key)
2408
2409 # Auto generate title fields corresponding to the *_number fields when missing
2410 # in order to always have clean titles. This is very common for TV series.
2411 for field in ('chapter', 'season', 'episode'):
2412 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2413 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2414
2415 def _raise_pending_errors(self, info):
2416 err = info.pop('__pending_error', None)
2417 if err:
2418 self.report_error(err, tb=False)
2419
2420 def process_video_result(self, info_dict, download=True):
2421 assert info_dict.get('_type', 'video') == 'video'
2422 self._num_videos += 1
2423
2424 if 'id' not in info_dict:
2425 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2426 elif not info_dict.get('id'):
2427 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2428
2429 def report_force_conversion(field, field_not, conversion):
2430 self.report_warning(
2431 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2432 % (field, field_not, conversion))
2433
2434 def sanitize_string_field(info, string_field):
2435 field = info.get(string_field)
2436 if field is None or isinstance(field, str):
2437 return
2438 report_force_conversion(string_field, 'a string', 'string')
2439 info[string_field] = str(field)
2440
2441 def sanitize_numeric_fields(info):
2442 for numeric_field in self._NUMERIC_FIELDS:
2443 field = info.get(numeric_field)
2444 if field is None or isinstance(field, (int, float)):
2445 continue
2446 report_force_conversion(numeric_field, 'numeric', 'int')
2447 info[numeric_field] = int_or_none(field)
2448
2449 sanitize_string_field(info_dict, 'id')
2450 sanitize_numeric_fields(info_dict)
2451 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2452 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2453 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2454 self.report_warning('"duration" field is negative, there is an error in extractor')
2455
2456 chapters = info_dict.get('chapters') or []
2457 if chapters and chapters[0].get('start_time'):
2458 chapters.insert(0, {'start_time': 0})
2459
2460 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2461 for idx, (prev, current, next_) in enumerate(zip(
2462 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2463 if current.get('start_time') is None:
2464 current['start_time'] = prev.get('end_time')
2465 if not current.get('end_time'):
2466 current['end_time'] = next_.get('start_time')
2467 if not current.get('title'):
2468 current['title'] = f'<Untitled Chapter {idx}>'
2469
2470 if 'playlist' not in info_dict:
2471 # It isn't part of a playlist
2472 info_dict['playlist'] = None
2473 info_dict['playlist_index'] = None
2474
2475 self._sanitize_thumbnails(info_dict)
2476
2477 thumbnail = info_dict.get('thumbnail')
2478 thumbnails = info_dict.get('thumbnails')
2479 if thumbnail:
2480 info_dict['thumbnail'] = sanitize_url(thumbnail)
2481 elif thumbnails:
2482 info_dict['thumbnail'] = thumbnails[-1]['url']
2483
2484 if info_dict.get('display_id') is None and 'id' in info_dict:
2485 info_dict['display_id'] = info_dict['id']
2486
2487 self._fill_common_fields(info_dict)
2488
2489 for cc_kind in ('subtitles', 'automatic_captions'):
2490 cc = info_dict.get(cc_kind)
2491 if cc:
2492 for _, subtitle in cc.items():
2493 for subtitle_format in subtitle:
2494 if subtitle_format.get('url'):
2495 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2496 if subtitle_format.get('ext') is None:
2497 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2498
2499 automatic_captions = info_dict.get('automatic_captions')
2500 subtitles = info_dict.get('subtitles')
2501
2502 info_dict['requested_subtitles'] = self.process_subtitles(
2503 info_dict['id'], subtitles, automatic_captions)
2504
2505 if info_dict.get('formats') is None:
2506 # There's only one format available
2507 formats = [info_dict]
2508 else:
2509 formats = info_dict['formats']
2510
2511 # or None ensures --clean-infojson removes it
2512 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2513 if not self.params.get('allow_unplayable_formats'):
2514 formats = [f for f in formats if not f.get('has_drm')]
2515 if info_dict['_has_drm'] and formats and all(
2516 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2517 self.report_warning(
2518 'This video is DRM protected and only images are available for download. '
2519 'Use --list-formats to see them')
2520
2521 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2522 if not get_from_start:
2523 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2524 if info_dict.get('is_live') and formats:
2525 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2526 if get_from_start and not formats:
2527 self.raise_no_formats(info_dict, msg=(
2528 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2529 'If you want to download from the current time, use --no-live-from-start'))
2530
2531 def is_wellformed(f):
2532 url = f.get('url')
2533 if not url:
2534 self.report_warning(
2535 '"url" field is missing or empty - skipping format, '
2536 'there is an error in extractor')
2537 return False
2538 if isinstance(url, bytes):
2539 sanitize_string_field(f, 'url')
2540 return True
2541
2542 # Filter out malformed formats for better extraction robustness
2543 formats = list(filter(is_wellformed, formats or []))
2544
2545 if not formats:
2546 self.raise_no_formats(info_dict)
2547
2548 formats_dict = {}
2549
2550 # We check that all the formats have the format and format_id fields
2551 for i, format in enumerate(formats):
2552 sanitize_string_field(format, 'format_id')
2553 sanitize_numeric_fields(format)
2554 format['url'] = sanitize_url(format['url'])
2555 if not format.get('format_id'):
2556 format['format_id'] = str(i)
2557 else:
2558 # Sanitize format_id from characters used in format selector expression
2559 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2560 format_id = format['format_id']
2561 if format_id not in formats_dict:
2562 formats_dict[format_id] = []
2563 formats_dict[format_id].append(format)
2564
2565 # Make sure all formats have unique format_id
2566 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2567 for format_id, ambiguous_formats in formats_dict.items():
2568 ambigious_id = len(ambiguous_formats) > 1
2569 for i, format in enumerate(ambiguous_formats):
2570 if ambigious_id:
2571 format['format_id'] = '%s-%d' % (format_id, i)
2572 if format.get('ext') is None:
2573 format['ext'] = determine_ext(format['url']).lower()
2574 # Ensure there is no conflict between id and ext in format selection
2575 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2576 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2577 format['format_id'] = 'f%s' % format['format_id']
2578
2579 for i, format in enumerate(formats):
2580 if format.get('format') is None:
2581 format['format'] = '{id} - {res}{note}'.format(
2582 id=format['format_id'],
2583 res=self.format_resolution(format),
2584 note=format_field(format, 'format_note', ' (%s)'),
2585 )
2586 if format.get('protocol') is None:
2587 format['protocol'] = determine_protocol(format)
2588 if format.get('resolution') is None:
2589 format['resolution'] = self.format_resolution(format, default=None)
2590 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2591 format['dynamic_range'] = 'SDR'
2592 if (info_dict.get('duration') and format.get('tbr')
2593 and not format.get('filesize') and not format.get('filesize_approx')):
2594 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2595
2596 # Add HTTP headers, so that external programs can use them from the
2597 # json output
2598 full_format_info = info_dict.copy()
2599 full_format_info.update(format)
2600 format['http_headers'] = self._calc_headers(full_format_info)
2601 # Remove private housekeeping stuff
2602 if '__x_forwarded_for_ip' in info_dict:
2603 del info_dict['__x_forwarded_for_ip']
2604
2605 if self.params.get('check_formats') is True:
2606 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2607
2608 if not formats or formats[0] is not info_dict:
2609 # only set the 'formats' fields if the original info_dict list them
2610 # otherwise we end up with a circular reference, the first (and unique)
2611 # element in the 'formats' field in info_dict is info_dict itself,
2612 # which can't be exported to json
2613 info_dict['formats'] = formats
2614
2615 info_dict, _ = self.pre_process(info_dict)
2616
2617 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2618 return info_dict
2619
2620 self.post_extract(info_dict)
2621 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2622
2623 # The pre-processors may have modified the formats
2624 formats = info_dict.get('formats', [info_dict])
2625
2626 list_only = self.params.get('simulate') is None and (
2627 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2628 interactive_format_selection = not list_only and self.format_selector == '-'
2629 if self.params.get('list_thumbnails'):
2630 self.list_thumbnails(info_dict)
2631 if self.params.get('listsubtitles'):
2632 if 'automatic_captions' in info_dict:
2633 self.list_subtitles(
2634 info_dict['id'], automatic_captions, 'automatic captions')
2635 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2636 if self.params.get('listformats') or interactive_format_selection:
2637 self.list_formats(info_dict)
2638 if list_only:
2639 # Without this printing, -F --print-json will not work
2640 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2641 return info_dict
2642
2643 format_selector = self.format_selector
2644 if format_selector is None:
2645 req_format = self._default_format_spec(info_dict, download=download)
2646 self.write_debug('Default format spec: %s' % req_format)
2647 format_selector = self.build_format_selector(req_format)
2648
2649 while True:
2650 if interactive_format_selection:
2651 req_format = input(
2652 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2653 try:
2654 format_selector = self.build_format_selector(req_format)
2655 except SyntaxError as err:
2656 self.report_error(err, tb=False, is_error=False)
2657 continue
2658
2659 formats_to_download = list(format_selector({
2660 'formats': formats,
2661 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2662 'incomplete_formats': (
2663 # All formats are video-only or
2664 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2665 # all formats are audio-only
2666 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2667 }))
2668 if interactive_format_selection and not formats_to_download:
2669 self.report_error('Requested format is not available', tb=False, is_error=False)
2670 continue
2671 break
2672
2673 if not formats_to_download:
2674 if not self.params.get('ignore_no_formats_error'):
2675 raise ExtractorError(
2676 'Requested format is not available. Use --list-formats for a list of available formats',
2677 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2678 self.report_warning('Requested format is not available')
2679 # Process what we can, even without any available formats.
2680 formats_to_download = [{}]
2681
2682 requested_ranges = self.params.get('download_ranges')
2683 if requested_ranges:
2684 requested_ranges = tuple(requested_ranges(info_dict, self))
2685
2686 best_format, downloaded_formats = formats_to_download[-1], []
2687 if download:
2688 if best_format:
2689 def to_screen(*msg):
2690 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2691
2692 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2693 (f['format_id'] for f in formats_to_download))
2694 if requested_ranges:
2695 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2696 (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2697 max_downloads_reached = False
2698
2699 for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2700 new_info = self._copy_infodict(info_dict)
2701 new_info.update(fmt)
2702 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2703 if chapter or offset:
2704 new_info.update({
2705 'section_start': offset + chapter.get('start_time', 0),
2706 'section_end': offset + min(chapter.get('end_time', duration), duration),
2707 'section_title': chapter.get('title'),
2708 'section_number': chapter.get('index'),
2709 })
2710 downloaded_formats.append(new_info)
2711 try:
2712 self.process_info(new_info)
2713 except MaxDownloadsReached:
2714 max_downloads_reached = True
2715 self._raise_pending_errors(new_info)
2716 # Remove copied info
2717 for key, val in tuple(new_info.items()):
2718 if info_dict.get(key) == val:
2719 new_info.pop(key)
2720 if max_downloads_reached:
2721 break
2722
2723 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2724 assert write_archive.issubset({True, False, 'ignore'})
2725 if True in write_archive and False not in write_archive:
2726 self.record_download_archive(info_dict)
2727
2728 info_dict['requested_downloads'] = downloaded_formats
2729 info_dict = self.run_all_pps('after_video', info_dict)
2730 if max_downloads_reached:
2731 raise MaxDownloadsReached()
2732
2733 # We update the info dict with the selected best quality format (backwards compatibility)
2734 info_dict.update(best_format)
2735 return info_dict
2736
2737 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2738 """Select the requested subtitles and their format"""
2739 available_subs, normal_sub_langs = {}, []
2740 if normal_subtitles and self.params.get('writesubtitles'):
2741 available_subs.update(normal_subtitles)
2742 normal_sub_langs = tuple(normal_subtitles.keys())
2743 if automatic_captions and self.params.get('writeautomaticsub'):
2744 for lang, cap_info in automatic_captions.items():
2745 if lang not in available_subs:
2746 available_subs[lang] = cap_info
2747
2748 if not available_subs or (
2749 not self.params.get('writesubtitles')
2750 and not self.params.get('writeautomaticsub')):
2751 return None
2752
2753 all_sub_langs = tuple(available_subs.keys())
2754 if self.params.get('allsubtitles', False):
2755 requested_langs = all_sub_langs
2756 elif self.params.get('subtitleslangs', False):
2757 try:
2758 requested_langs = orderedSet_from_options(
2759 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2760 except re.error as e:
2761 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2762 elif normal_sub_langs:
2763 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2764 else:
2765 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2766 if requested_langs:
2767 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2768
2769 formats_query = self.params.get('subtitlesformat', 'best')
2770 formats_preference = formats_query.split('/') if formats_query else []
2771 subs = {}
2772 for lang in requested_langs:
2773 formats = available_subs.get(lang)
2774 if formats is None:
2775 self.report_warning(f'{lang} subtitles not available for {video_id}')
2776 continue
2777 for ext in formats_preference:
2778 if ext == 'best':
2779 f = formats[-1]
2780 break
2781 matches = list(filter(lambda f: f['ext'] == ext, formats))
2782 if matches:
2783 f = matches[-1]
2784 break
2785 else:
2786 f = formats[-1]
2787 self.report_warning(
2788 'No subtitle format found matching "%s" for language %s, '
2789 'using %s' % (formats_query, lang, f['ext']))
2790 subs[lang] = f
2791 return subs
2792
2793 def _forceprint(self, key, info_dict):
2794 if info_dict is None:
2795 return
2796 info_copy = info_dict.copy()
2797 info_copy['formats_table'] = self.render_formats_table(info_dict)
2798 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2799 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2800 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2801
2802 def format_tmpl(tmpl):
2803 mobj = re.match(r'\w+(=?)$', tmpl)
2804 if mobj and mobj.group(1):
2805 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2806 elif mobj:
2807 return f'%({tmpl})s'
2808 return tmpl
2809
2810 for tmpl in self.params['forceprint'].get(key, []):
2811 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2812
2813 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2814 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2815 tmpl = format_tmpl(tmpl)
2816 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2817 if self._ensure_dir_exists(filename):
2818 with open(filename, 'a', encoding='utf-8') as f:
2819 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2820
2821 def __forced_printings(self, info_dict, filename, incomplete):
2822 def print_mandatory(field, actual_field=None):
2823 if actual_field is None:
2824 actual_field = field
2825 if (self.params.get('force%s' % field, False)
2826 and (not incomplete or info_dict.get(actual_field) is not None)):
2827 self.to_stdout(info_dict[actual_field])
2828
2829 def print_optional(field):
2830 if (self.params.get('force%s' % field, False)
2831 and info_dict.get(field) is not None):
2832 self.to_stdout(info_dict[field])
2833
2834 info_dict = info_dict.copy()
2835 if filename is not None:
2836 info_dict['filename'] = filename
2837 if info_dict.get('requested_formats') is not None:
2838 # For RTMP URLs, also include the playpath
2839 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2840 elif info_dict.get('url'):
2841 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2842
2843 if (self.params.get('forcejson')
2844 or self.params['forceprint'].get('video')
2845 or self.params['print_to_file'].get('video')):
2846 self.post_extract(info_dict)
2847 self._forceprint('video', info_dict)
2848
2849 print_mandatory('title')
2850 print_mandatory('id')
2851 print_mandatory('url', 'urls')
2852 print_optional('thumbnail')
2853 print_optional('description')
2854 print_optional('filename')
2855 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2856 self.to_stdout(formatSeconds(info_dict['duration']))
2857 print_mandatory('format')
2858
2859 if self.params.get('forcejson'):
2860 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2861
2862 def dl(self, name, info, subtitle=False, test=False):
2863 if not info.get('url'):
2864 self.raise_no_formats(info, True)
2865
2866 if test:
2867 verbose = self.params.get('verbose')
2868 params = {
2869 'test': True,
2870 'quiet': self.params.get('quiet') or not verbose,
2871 'verbose': verbose,
2872 'noprogress': not verbose,
2873 'nopart': True,
2874 'skip_unavailable_fragments': False,
2875 'keep_fragments': False,
2876 'overwrites': True,
2877 '_no_ytdl_file': True,
2878 }
2879 else:
2880 params = self.params
2881 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2882 if not test:
2883 for ph in self._progress_hooks:
2884 fd.add_progress_hook(ph)
2885 urls = '", "'.join(
2886 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2887 for f in info.get('requested_formats', []) or [info])
2888 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2889
2890 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2891 # But it may contain objects that are not deep-copyable
2892 new_info = self._copy_infodict(info)
2893 if new_info.get('http_headers') is None:
2894 new_info['http_headers'] = self._calc_headers(new_info)
2895 return fd.download(name, new_info, subtitle)
2896
2897 def existing_file(self, filepaths, *, default_overwrite=True):
2898 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2899 if existing_files and not self.params.get('overwrites', default_overwrite):
2900 return existing_files[0]
2901
2902 for file in existing_files:
2903 self.report_file_delete(file)
2904 os.remove(file)
2905 return None
2906
2907 def process_info(self, info_dict):
2908 """Process a single resolved IE result. (Modifies it in-place)"""
2909
2910 assert info_dict.get('_type', 'video') == 'video'
2911 original_infodict = info_dict
2912
2913 if 'format' not in info_dict and 'ext' in info_dict:
2914 info_dict['format'] = info_dict['ext']
2915
2916 # This is mostly just for backward compatibility of process_info
2917 # As a side-effect, this allows for format-specific filters
2918 if self._match_entry(info_dict) is not None:
2919 info_dict['__write_download_archive'] = 'ignore'
2920 return
2921
2922 # Does nothing under normal operation - for backward compatibility of process_info
2923 self.post_extract(info_dict)
2924 self._num_downloads += 1
2925
2926 # info_dict['_filename'] needs to be set for backward compatibility
2927 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2928 temp_filename = self.prepare_filename(info_dict, 'temp')
2929 files_to_move = {}
2930
2931 # Forced printings
2932 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2933
2934 def check_max_downloads():
2935 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2936 raise MaxDownloadsReached()
2937
2938 if self.params.get('simulate'):
2939 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2940 check_max_downloads()
2941 return
2942
2943 if full_filename is None:
2944 return
2945 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2946 return
2947 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2948 return
2949
2950 if self._write_description('video', info_dict,
2951 self.prepare_filename(info_dict, 'description')) is None:
2952 return
2953
2954 sub_files = self._write_subtitles(info_dict, temp_filename)
2955 if sub_files is None:
2956 return
2957 files_to_move.update(dict(sub_files))
2958
2959 thumb_files = self._write_thumbnails(
2960 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2961 if thumb_files is None:
2962 return
2963 files_to_move.update(dict(thumb_files))
2964
2965 infofn = self.prepare_filename(info_dict, 'infojson')
2966 _infojson_written = self._write_info_json('video', info_dict, infofn)
2967 if _infojson_written:
2968 info_dict['infojson_filename'] = infofn
2969 # For backward compatibility, even though it was a private field
2970 info_dict['__infojson_filename'] = infofn
2971 elif _infojson_written is None:
2972 return
2973
2974 # Note: Annotations are deprecated
2975 annofn = None
2976 if self.params.get('writeannotations', False):
2977 annofn = self.prepare_filename(info_dict, 'annotation')
2978 if annofn:
2979 if not self._ensure_dir_exists(encodeFilename(annofn)):
2980 return
2981 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2982 self.to_screen('[info] Video annotations are already present')
2983 elif not info_dict.get('annotations'):
2984 self.report_warning('There are no annotations to write.')
2985 else:
2986 try:
2987 self.to_screen('[info] Writing video annotations to: ' + annofn)
2988 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2989 annofile.write(info_dict['annotations'])
2990 except (KeyError, TypeError):
2991 self.report_warning('There are no annotations to write.')
2992 except OSError:
2993 self.report_error('Cannot write annotations file: ' + annofn)
2994 return
2995
2996 # Write internet shortcut files
2997 def _write_link_file(link_type):
2998 url = try_get(info_dict['webpage_url'], iri_to_uri)
2999 if not url:
3000 self.report_warning(
3001 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3002 return True
3003 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3004 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3005 return False
3006 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3007 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3008 return True
3009 try:
3010 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3011 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3012 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3013 template_vars = {'url': url}
3014 if link_type == 'desktop':
3015 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3016 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3017 except OSError:
3018 self.report_error(f'Cannot write internet shortcut {linkfn}')
3019 return False
3020 return True
3021
3022 write_links = {
3023 'url': self.params.get('writeurllink'),
3024 'webloc': self.params.get('writewebloclink'),
3025 'desktop': self.params.get('writedesktoplink'),
3026 }
3027 if self.params.get('writelink'):
3028 link_type = ('webloc' if sys.platform == 'darwin'
3029 else 'desktop' if sys.platform.startswith('linux')
3030 else 'url')
3031 write_links[link_type] = True
3032
3033 if any(should_write and not _write_link_file(link_type)
3034 for link_type, should_write in write_links.items()):
3035 return
3036
3037 def replace_info_dict(new_info):
3038 nonlocal info_dict
3039 if new_info == info_dict:
3040 return
3041 info_dict.clear()
3042 info_dict.update(new_info)
3043
3044 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3045 replace_info_dict(new_info)
3046
3047 if self.params.get('skip_download'):
3048 info_dict['filepath'] = temp_filename
3049 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3050 info_dict['__files_to_move'] = files_to_move
3051 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3052 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3053 else:
3054 # Download
3055 info_dict.setdefault('__postprocessors', [])
3056 try:
3057
3058 def existing_video_file(*filepaths):
3059 ext = info_dict.get('ext')
3060 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3061 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3062 default_overwrite=False)
3063 if file:
3064 info_dict['ext'] = os.path.splitext(file)[1][1:]
3065 return file
3066
3067 fd, success = None, True
3068 if info_dict.get('protocol') or info_dict.get('url'):
3069 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3070 if fd is not FFmpegFD and (
3071 info_dict.get('section_start') or info_dict.get('section_end')):
3072 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3073 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3074 self.report_error(f'{msg}. Aborting')
3075 return
3076
3077 if info_dict.get('requested_formats') is not None:
3078 requested_formats = info_dict['requested_formats']
3079 old_ext = info_dict['ext']
3080 if self.params.get('merge_output_format') is None:
3081 if (info_dict['ext'] == 'webm'
3082 and info_dict.get('thumbnails')
3083 # check with type instead of pp_key, __name__, or isinstance
3084 # since we dont want any custom PPs to trigger this
3085 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3086 info_dict['ext'] = 'mkv'
3087 self.report_warning(
3088 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3089 new_ext = info_dict['ext']
3090
3091 def correct_ext(filename, ext=new_ext):
3092 if filename == '-':
3093 return filename
3094 filename_real_ext = os.path.splitext(filename)[1][1:]
3095 filename_wo_ext = (
3096 os.path.splitext(filename)[0]
3097 if filename_real_ext in (old_ext, new_ext)
3098 else filename)
3099 return f'{filename_wo_ext}.{ext}'
3100
3101 # Ensure filename always has a correct extension for successful merge
3102 full_filename = correct_ext(full_filename)
3103 temp_filename = correct_ext(temp_filename)
3104 dl_filename = existing_video_file(full_filename, temp_filename)
3105 info_dict['__real_download'] = False
3106
3107 merger = FFmpegMergerPP(self)
3108 downloaded = []
3109 if dl_filename is not None:
3110 self.report_file_already_downloaded(dl_filename)
3111 elif fd:
3112 for f in requested_formats if fd != FFmpegFD else []:
3113 f['filepath'] = fname = prepend_extension(
3114 correct_ext(temp_filename, info_dict['ext']),
3115 'f%s' % f['format_id'], info_dict['ext'])
3116 downloaded.append(fname)
3117 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3118 success, real_download = self.dl(temp_filename, info_dict)
3119 info_dict['__real_download'] = real_download
3120 else:
3121 if self.params.get('allow_unplayable_formats'):
3122 self.report_warning(
3123 'You have requested merging of multiple formats '
3124 'while also allowing unplayable formats to be downloaded. '
3125 'The formats won\'t be merged to prevent data corruption.')
3126 elif not merger.available:
3127 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3128 if not self.params.get('ignoreerrors'):
3129 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3130 return
3131 self.report_warning(f'{msg}. The formats won\'t be merged')
3132
3133 if temp_filename == '-':
3134 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3135 else 'but the formats are incompatible for simultaneous download' if merger.available
3136 else 'but ffmpeg is not installed')
3137 self.report_warning(
3138 f'You have requested downloading multiple formats to stdout {reason}. '
3139 'The formats will be streamed one after the other')
3140 fname = temp_filename
3141 for f in requested_formats:
3142 new_info = dict(info_dict)
3143 del new_info['requested_formats']
3144 new_info.update(f)
3145 if temp_filename != '-':
3146 fname = prepend_extension(
3147 correct_ext(temp_filename, new_info['ext']),
3148 'f%s' % f['format_id'], new_info['ext'])
3149 if not self._ensure_dir_exists(fname):
3150 return
3151 f['filepath'] = fname
3152 downloaded.append(fname)
3153 partial_success, real_download = self.dl(fname, new_info)
3154 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3155 success = success and partial_success
3156
3157 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3158 info_dict['__postprocessors'].append(merger)
3159 info_dict['__files_to_merge'] = downloaded
3160 # Even if there were no downloads, it is being merged only now
3161 info_dict['__real_download'] = True
3162 else:
3163 for file in downloaded:
3164 files_to_move[file] = None
3165 else:
3166 # Just a single file
3167 dl_filename = existing_video_file(full_filename, temp_filename)
3168 if dl_filename is None or dl_filename == temp_filename:
3169 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3170 # So we should try to resume the download
3171 success, real_download = self.dl(temp_filename, info_dict)
3172 info_dict['__real_download'] = real_download
3173 else:
3174 self.report_file_already_downloaded(dl_filename)
3175
3176 dl_filename = dl_filename or temp_filename
3177 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3178
3179 except network_exceptions as err:
3180 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3181 return
3182 except OSError as err:
3183 raise UnavailableVideoError(err)
3184 except (ContentTooShortError, ) as err:
3185 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3186 return
3187
3188 self._raise_pending_errors(info_dict)
3189 if success and full_filename != '-':
3190
3191 def fixup():
3192 do_fixup = True
3193 fixup_policy = self.params.get('fixup')
3194 vid = info_dict['id']
3195
3196 if fixup_policy in ('ignore', 'never'):
3197 return
3198 elif fixup_policy == 'warn':
3199 do_fixup = 'warn'
3200 elif fixup_policy != 'force':
3201 assert fixup_policy in ('detect_or_warn', None)
3202 if not info_dict.get('__real_download'):
3203 do_fixup = False
3204
3205 def ffmpeg_fixup(cndn, msg, cls):
3206 if not (do_fixup and cndn):
3207 return
3208 elif do_fixup == 'warn':
3209 self.report_warning(f'{vid}: {msg}')
3210 return
3211 pp = cls(self)
3212 if pp.available:
3213 info_dict['__postprocessors'].append(pp)
3214 else:
3215 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3216
3217 stretched_ratio = info_dict.get('stretched_ratio')
3218 ffmpeg_fixup(stretched_ratio not in (1, None),
3219 f'Non-uniform pixel ratio {stretched_ratio}',
3220 FFmpegFixupStretchedPP)
3221
3222 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3223 downloader = downloader.FD_NAME if downloader else None
3224
3225 ext = info_dict.get('ext')
3226 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3227 isinstance(pp, FFmpegVideoConvertorPP)
3228 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3229 ) for pp in self._pps['post_process'])
3230
3231 if not postprocessed_by_ffmpeg:
3232 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3233 'writing DASH m4a. Only some players support this container',
3234 FFmpegFixupM4aPP)
3235 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3236 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3237 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3238 FFmpegFixupM3u8PP)
3239 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3240 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3241
3242 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3243 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3244
3245 fixup()
3246 try:
3247 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3248 except PostProcessingError as err:
3249 self.report_error('Postprocessing: %s' % str(err))
3250 return
3251 try:
3252 for ph in self._post_hooks:
3253 ph(info_dict['filepath'])
3254 except Exception as err:
3255 self.report_error('post hooks: %s' % str(err))
3256 return
3257 info_dict['__write_download_archive'] = True
3258
3259 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3260 if self.params.get('force_write_download_archive'):
3261 info_dict['__write_download_archive'] = True
3262 check_max_downloads()
3263
3264 def __download_wrapper(self, func):
3265 @functools.wraps(func)
3266 def wrapper(*args, **kwargs):
3267 try:
3268 res = func(*args, **kwargs)
3269 except UnavailableVideoError as e:
3270 self.report_error(e)
3271 except DownloadCancelled as e:
3272 self.to_screen(f'[info] {e}')
3273 if not self.params.get('break_per_url'):
3274 raise
3275 self._num_downloads = 0
3276 else:
3277 if self.params.get('dump_single_json', False):
3278 self.post_extract(res)
3279 self.to_stdout(json.dumps(self.sanitize_info(res)))
3280 return wrapper
3281
3282 def download(self, url_list):
3283 """Download a given list of URLs."""
3284 url_list = variadic(url_list) # Passing a single URL is a common mistake
3285 outtmpl = self.params['outtmpl']['default']
3286 if (len(url_list) > 1
3287 and outtmpl != '-'
3288 and '%' not in outtmpl
3289 and self.params.get('max_downloads') != 1):
3290 raise SameFileError(outtmpl)
3291
3292 for url in url_list:
3293 self.__download_wrapper(self.extract_info)(
3294 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3295
3296 return self._download_retcode
3297
3298 def download_with_info_file(self, info_filename):
3299 with contextlib.closing(fileinput.FileInput(
3300 [info_filename], mode='r',
3301 openhook=fileinput.hook_encoded('utf-8'))) as f:
3302 # FileInput doesn't have a read method, we can't call json.load
3303 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3304 try:
3305 self.__download_wrapper(self.process_ie_result)(info, download=True)
3306 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3307 if not isinstance(e, EntryNotInPlaylist):
3308 self.to_stderr('\r')
3309 webpage_url = info.get('webpage_url')
3310 if webpage_url is not None:
3311 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3312 return self.download([webpage_url])
3313 else:
3314 raise
3315 return self._download_retcode
3316
3317 @staticmethod
3318 def sanitize_info(info_dict, remove_private_keys=False):
3319 ''' Sanitize the infodict for converting to json '''
3320 if info_dict is None:
3321 return info_dict
3322 info_dict.setdefault('epoch', int(time.time()))
3323 info_dict.setdefault('_type', 'video')
3324 info_dict.setdefault('_version', {
3325 'version': __version__,
3326 'current_git_head': current_git_head(),
3327 'release_git_head': RELEASE_GIT_HEAD,
3328 'repository': REPOSITORY,
3329 })
3330
3331 if remove_private_keys:
3332 reject = lambda k, v: v is None or k.startswith('__') or k in {
3333 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3334 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3335 }
3336 else:
3337 reject = lambda k, v: False
3338
3339 def filter_fn(obj):
3340 if isinstance(obj, dict):
3341 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3342 elif isinstance(obj, (list, tuple, set, LazyList)):
3343 return list(map(filter_fn, obj))
3344 elif obj is None or isinstance(obj, (str, int, float, bool)):
3345 return obj
3346 else:
3347 return repr(obj)
3348
3349 return filter_fn(info_dict)
3350
3351 @staticmethod
3352 def filter_requested_info(info_dict, actually_filter=True):
3353 ''' Alias of sanitize_info for backward compatibility '''
3354 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3355
3356 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3357 for filename in set(filter(None, files_to_delete)):
3358 if msg:
3359 self.to_screen(msg % filename)
3360 try:
3361 os.remove(filename)
3362 except OSError:
3363 self.report_warning(f'Unable to delete file {filename}')
3364 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3365 del info['__files_to_move'][filename]
3366
3367 @staticmethod
3368 def post_extract(info_dict):
3369 def actual_post_extract(info_dict):
3370 if info_dict.get('_type') in ('playlist', 'multi_video'):
3371 for video_dict in info_dict.get('entries', {}):
3372 actual_post_extract(video_dict or {})
3373 return
3374
3375 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3376 info_dict.update(post_extractor())
3377
3378 actual_post_extract(info_dict or {})
3379
3380 def run_pp(self, pp, infodict):
3381 files_to_delete = []
3382 if '__files_to_move' not in infodict:
3383 infodict['__files_to_move'] = {}
3384 try:
3385 files_to_delete, infodict = pp.run(infodict)
3386 except PostProcessingError as e:
3387 # Must be True and not 'only_download'
3388 if self.params.get('ignoreerrors') is True:
3389 self.report_error(e)
3390 return infodict
3391 raise
3392
3393 if not files_to_delete:
3394 return infodict
3395 if self.params.get('keepvideo', False):
3396 for f in files_to_delete:
3397 infodict['__files_to_move'].setdefault(f, '')
3398 else:
3399 self._delete_downloaded_files(
3400 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3401 return infodict
3402
3403 def run_all_pps(self, key, info, *, additional_pps=None):
3404 self._forceprint(key, info)
3405 for pp in (additional_pps or []) + self._pps[key]:
3406 info = self.run_pp(pp, info)
3407 return info
3408
3409 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3410 info = dict(ie_info)
3411 info['__files_to_move'] = files_to_move or {}
3412 try:
3413 info = self.run_all_pps(key, info)
3414 except PostProcessingError as err:
3415 msg = f'Preprocessing: {err}'
3416 info.setdefault('__pending_error', msg)
3417 self.report_error(msg, is_error=False)
3418 return info, info.pop('__files_to_move', None)
3419
3420 def post_process(self, filename, info, files_to_move=None):
3421 """Run all the postprocessors on the given file."""
3422 info['filepath'] = filename
3423 info['__files_to_move'] = files_to_move or {}
3424 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3425 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3426 del info['__files_to_move']
3427 return self.run_all_pps('after_move', info)
3428
3429 def _make_archive_id(self, info_dict):
3430 video_id = info_dict.get('id')
3431 if not video_id:
3432 return
3433 # Future-proof against any change in case
3434 # and backwards compatibility with prior versions
3435 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3436 if extractor is None:
3437 url = str_or_none(info_dict.get('url'))
3438 if not url:
3439 return
3440 # Try to find matching extractor for the URL and take its ie_key
3441 for ie_key, ie in self._ies.items():
3442 if ie.suitable(url):
3443 extractor = ie_key
3444 break
3445 else:
3446 return
3447 return make_archive_id(extractor, video_id)
3448
3449 def in_download_archive(self, info_dict):
3450 fn = self.params.get('download_archive')
3451 if fn is None:
3452 return False
3453
3454 vid_ids = [self._make_archive_id(info_dict)]
3455 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3456 return any(id_ in self.archive for id_ in vid_ids)
3457
3458 def record_download_archive(self, info_dict):
3459 fn = self.params.get('download_archive')
3460 if fn is None:
3461 return
3462 vid_id = self._make_archive_id(info_dict)
3463 assert vid_id
3464 self.write_debug(f'Adding to archive: {vid_id}')
3465 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3466 archive_file.write(vid_id + '\n')
3467 self.archive.add(vid_id)
3468
3469 @staticmethod
3470 def format_resolution(format, default='unknown'):
3471 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3472 return 'audio only'
3473 if format.get('resolution') is not None:
3474 return format['resolution']
3475 if format.get('width') and format.get('height'):
3476 return '%dx%d' % (format['width'], format['height'])
3477 elif format.get('height'):
3478 return '%sp' % format['height']
3479 elif format.get('width'):
3480 return '%dx?' % format['width']
3481 return default
3482
3483 def _list_format_headers(self, *headers):
3484 if self.params.get('listformats_table', True) is not False:
3485 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3486 return headers
3487
3488 def _format_note(self, fdict):
3489 res = ''
3490 if fdict.get('ext') in ['f4f', 'f4m']:
3491 res += '(unsupported)'
3492 if fdict.get('language'):
3493 if res:
3494 res += ' '
3495 res += '[%s]' % fdict['language']
3496 if fdict.get('format_note') is not None:
3497 if res:
3498 res += ' '
3499 res += fdict['format_note']
3500 if fdict.get('tbr') is not None:
3501 if res:
3502 res += ', '
3503 res += '%4dk' % fdict['tbr']
3504 if fdict.get('container') is not None:
3505 if res:
3506 res += ', '
3507 res += '%s container' % fdict['container']
3508 if (fdict.get('vcodec') is not None
3509 and fdict.get('vcodec') != 'none'):
3510 if res:
3511 res += ', '
3512 res += fdict['vcodec']
3513 if fdict.get('vbr') is not None:
3514 res += '@'
3515 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3516 res += 'video@'
3517 if fdict.get('vbr') is not None:
3518 res += '%4dk' % fdict['vbr']
3519 if fdict.get('fps') is not None:
3520 if res:
3521 res += ', '
3522 res += '%sfps' % fdict['fps']
3523 if fdict.get('acodec') is not None:
3524 if res:
3525 res += ', '
3526 if fdict['acodec'] == 'none':
3527 res += 'video only'
3528 else:
3529 res += '%-5s' % fdict['acodec']
3530 elif fdict.get('abr') is not None:
3531 if res:
3532 res += ', '
3533 res += 'audio'
3534 if fdict.get('abr') is not None:
3535 res += '@%3dk' % fdict['abr']
3536 if fdict.get('asr') is not None:
3537 res += ' (%5dHz)' % fdict['asr']
3538 if fdict.get('filesize') is not None:
3539 if res:
3540 res += ', '
3541 res += format_bytes(fdict['filesize'])
3542 elif fdict.get('filesize_approx') is not None:
3543 if res:
3544 res += ', '
3545 res += '~' + format_bytes(fdict['filesize_approx'])
3546 return res
3547
3548 def render_formats_table(self, info_dict):
3549 if not info_dict.get('formats') and not info_dict.get('url'):
3550 return None
3551
3552 formats = info_dict.get('formats', [info_dict])
3553 if not self.params.get('listformats_table', True) is not False:
3554 table = [
3555 [
3556 format_field(f, 'format_id'),
3557 format_field(f, 'ext'),
3558 self.format_resolution(f),
3559 self._format_note(f)
3560 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3561 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3562
3563 def simplified_codec(f, field):
3564 assert field in ('acodec', 'vcodec')
3565 codec = f.get(field, 'unknown')
3566 if not codec:
3567 return 'unknown'
3568 elif codec != 'none':
3569 return '.'.join(codec.split('.')[:4])
3570
3571 if field == 'vcodec' and f.get('acodec') == 'none':
3572 return 'images'
3573 elif field == 'acodec' and f.get('vcodec') == 'none':
3574 return ''
3575 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3576 self.Styles.SUPPRESS)
3577
3578 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3579 table = [
3580 [
3581 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3582 format_field(f, 'ext'),
3583 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3584 format_field(f, 'fps', '\t%d', func=round),
3585 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3586 format_field(f, 'audio_channels', '\t%s'),
3587 delim,
3588 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3589 format_field(f, 'tbr', '\t%dk', func=round),
3590 shorten_protocol_name(f.get('protocol', '')),
3591 delim,
3592 simplified_codec(f, 'vcodec'),
3593 format_field(f, 'vbr', '\t%dk', func=round),
3594 simplified_codec(f, 'acodec'),
3595 format_field(f, 'abr', '\t%dk', func=round),
3596 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3597 join_nonempty(
3598 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3599 format_field(f, 'language', '[%s]'),
3600 join_nonempty(format_field(f, 'format_note'),
3601 format_field(f, 'container', ignore=(None, f.get('ext'))),
3602 delim=', '),
3603 delim=' '),
3604 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3605 header_line = self._list_format_headers(
3606 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3607 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3608
3609 return render_table(
3610 header_line, table, hide_empty=True,
3611 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3612
3613 def render_thumbnails_table(self, info_dict):
3614 thumbnails = list(info_dict.get('thumbnails') or [])
3615 if not thumbnails:
3616 return None
3617 return render_table(
3618 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3619 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3620
3621 def render_subtitles_table(self, video_id, subtitles):
3622 def _row(lang, formats):
3623 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3624 if len(set(names)) == 1:
3625 names = [] if names[0] == 'unknown' else names[:1]
3626 return [lang, ', '.join(names), ', '.join(exts)]
3627
3628 if not subtitles:
3629 return None
3630 return render_table(
3631 self._list_format_headers('Language', 'Name', 'Formats'),
3632 [_row(lang, formats) for lang, formats in subtitles.items()],
3633 hide_empty=True)
3634
3635 def __list_table(self, video_id, name, func, *args):
3636 table = func(*args)
3637 if not table:
3638 self.to_screen(f'{video_id} has no {name}')
3639 return
3640 self.to_screen(f'[info] Available {name} for {video_id}:')
3641 self.to_stdout(table)
3642
3643 def list_formats(self, info_dict):
3644 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3645
3646 def list_thumbnails(self, info_dict):
3647 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3648
3649 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3650 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3651
3652 def urlopen(self, req):
3653 """ Start an HTTP download """
3654 if isinstance(req, str):
3655 req = sanitized_Request(req)
3656 return self._opener.open(req, timeout=self._socket_timeout)
3657
3658 def print_debug_header(self):
3659 if not self.params.get('verbose'):
3660 return
3661
3662 # These imports can be slow. So import them only as needed
3663 from .extractor.extractors import _LAZY_LOADER
3664 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3665
3666 def get_encoding(stream):
3667 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3668 if not supports_terminal_sequences(stream):
3669 from .utils import WINDOWS_VT_MODE # Must be imported locally
3670 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3671 return ret
3672
3673 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3674 locale.getpreferredencoding(),
3675 sys.getfilesystemencoding(),
3676 self.get_encoding(),
3677 ', '.join(
3678 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3679 if stream is not None and key != 'console')
3680 )
3681
3682 logger = self.params.get('logger')
3683 if logger:
3684 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3685 write_debug(encoding_str)
3686 else:
3687 write_string(f'[debug] {encoding_str}\n', encoding=None)
3688 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3689
3690 source = detect_variant()
3691 if VARIANT not in (None, 'pip'):
3692 source += '*'
3693 write_debug(join_nonempty(
3694 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3695 __version__,
3696 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3697 '' if source == 'unknown' else f'({source})',
3698 delim=' '))
3699 if not _LAZY_LOADER:
3700 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3701 write_debug('Lazy loading extractors is forcibly disabled')
3702 else:
3703 write_debug('Lazy loading extractors is disabled')
3704 if plugin_extractors or plugin_postprocessors:
3705 write_debug('Plugins: %s' % [
3706 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3707 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3708 if self.params['compat_opts']:
3709 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3710
3711 if current_git_head():
3712 write_debug(f'Git HEAD: {current_git_head()}')
3713 write_debug(system_identifier())
3714
3715 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3716 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3717 if ffmpeg_features:
3718 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3719
3720 exe_versions['rtmpdump'] = rtmpdump_version()
3721 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3722 exe_str = ', '.join(
3723 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3724 ) or 'none'
3725 write_debug('exe versions: %s' % exe_str)
3726
3727 from .compat.compat_utils import get_package_info
3728 from .dependencies import available_dependencies
3729
3730 write_debug('Optional libraries: %s' % (', '.join(sorted({
3731 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3732 })) or 'none'))
3733
3734 self._setup_opener()
3735 proxy_map = {}
3736 for handler in self._opener.handlers:
3737 if hasattr(handler, 'proxies'):
3738 proxy_map.update(handler.proxies)
3739 write_debug(f'Proxy map: {proxy_map}')
3740
3741 # Not implemented
3742 if False and self.params.get('call_home'):
3743 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3744 write_debug('Public IP address: %s' % ipaddr)
3745 latest_version = self.urlopen(
3746 'https://yt-dl.org/latest/version').read().decode()
3747 if version_tuple(latest_version) > version_tuple(__version__):
3748 self.report_warning(
3749 'You are using an outdated version (newest version: %s)! '
3750 'See https://yt-dl.org/update if you need help updating.' %
3751 latest_version)
3752
3753 def _setup_opener(self):
3754 if hasattr(self, '_opener'):
3755 return
3756 timeout_val = self.params.get('socket_timeout')
3757 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3758
3759 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3760 opts_cookiefile = self.params.get('cookiefile')
3761 opts_proxy = self.params.get('proxy')
3762
3763 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3764
3765 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3766 if opts_proxy is not None:
3767 if opts_proxy == '':
3768 proxies = {}
3769 else:
3770 proxies = {'http': opts_proxy, 'https': opts_proxy}
3771 else:
3772 proxies = urllib.request.getproxies()
3773 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3774 if 'http' in proxies and 'https' not in proxies:
3775 proxies['https'] = proxies['http']
3776 proxy_handler = PerRequestProxyHandler(proxies)
3777
3778 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3779 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3780 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3781 redirect_handler = YoutubeDLRedirectHandler()
3782 data_handler = urllib.request.DataHandler()
3783
3784 # When passing our own FileHandler instance, build_opener won't add the
3785 # default FileHandler and allows us to disable the file protocol, which
3786 # can be used for malicious purposes (see
3787 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3788 file_handler = urllib.request.FileHandler()
3789
3790 def file_open(*args, **kwargs):
3791 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3792 file_handler.file_open = file_open
3793
3794 opener = urllib.request.build_opener(
3795 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3796
3797 # Delete the default user-agent header, which would otherwise apply in
3798 # cases where our custom HTTP handler doesn't come into play
3799 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3800 opener.addheaders = []
3801 self._opener = opener
3802
3803 def encode(self, s):
3804 if isinstance(s, bytes):
3805 return s # Already encoded
3806
3807 try:
3808 return s.encode(self.get_encoding())
3809 except UnicodeEncodeError as err:
3810 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3811 raise
3812
3813 def get_encoding(self):
3814 encoding = self.params.get('encoding')
3815 if encoding is None:
3816 encoding = preferredencoding()
3817 return encoding
3818
3819 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3820 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3821 if overwrite is None:
3822 overwrite = self.params.get('overwrites', True)
3823 if not self.params.get('writeinfojson'):
3824 return False
3825 elif not infofn:
3826 self.write_debug(f'Skipping writing {label} infojson')
3827 return False
3828 elif not self._ensure_dir_exists(infofn):
3829 return None
3830 elif not overwrite and os.path.exists(infofn):
3831 self.to_screen(f'[info] {label.title()} metadata is already present')
3832 return 'exists'
3833
3834 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3835 try:
3836 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3837 return True
3838 except OSError:
3839 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3840 return None
3841
3842 def _write_description(self, label, ie_result, descfn):
3843 ''' Write description and returns True = written, False = skip, None = error '''
3844 if not self.params.get('writedescription'):
3845 return False
3846 elif not descfn:
3847 self.write_debug(f'Skipping writing {label} description')
3848 return False
3849 elif not self._ensure_dir_exists(descfn):
3850 return None
3851 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3852 self.to_screen(f'[info] {label.title()} description is already present')
3853 elif ie_result.get('description') is None:
3854 self.report_warning(f'There\'s no {label} description to write')
3855 return False
3856 else:
3857 try:
3858 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3859 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3860 descfile.write(ie_result['description'])
3861 except OSError:
3862 self.report_error(f'Cannot write {label} description file {descfn}')
3863 return None
3864 return True
3865
3866 def _write_subtitles(self, info_dict, filename):
3867 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3868 ret = []
3869 subtitles = info_dict.get('requested_subtitles')
3870 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3871 # subtitles download errors are already managed as troubles in relevant IE
3872 # that way it will silently go on when used with unsupporting IE
3873 return ret
3874
3875 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3876 if not sub_filename_base:
3877 self.to_screen('[info] Skipping writing video subtitles')
3878 return ret
3879 for sub_lang, sub_info in subtitles.items():
3880 sub_format = sub_info['ext']
3881 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3882 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3883 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3884 if existing_sub:
3885 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3886 sub_info['filepath'] = existing_sub
3887 ret.append((existing_sub, sub_filename_final))
3888 continue
3889
3890 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3891 if sub_info.get('data') is not None:
3892 try:
3893 # Use newline='' to prevent conversion of newline characters
3894 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3895 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3896 subfile.write(sub_info['data'])
3897 sub_info['filepath'] = sub_filename
3898 ret.append((sub_filename, sub_filename_final))
3899 continue
3900 except OSError:
3901 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3902 return None
3903
3904 try:
3905 sub_copy = sub_info.copy()
3906 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3907 self.dl(sub_filename, sub_copy, subtitle=True)
3908 sub_info['filepath'] = sub_filename
3909 ret.append((sub_filename, sub_filename_final))
3910 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3911 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3912 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3913 if not self.params.get('ignoreerrors'):
3914 self.report_error(msg)
3915 raise DownloadError(msg)
3916 self.report_warning(msg)
3917 return ret
3918
3919 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3920 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3921 write_all = self.params.get('write_all_thumbnails', False)
3922 thumbnails, ret = [], []
3923 if write_all or self.params.get('writethumbnail', False):
3924 thumbnails = info_dict.get('thumbnails') or []
3925 multiple = write_all and len(thumbnails) > 1
3926
3927 if thumb_filename_base is None:
3928 thumb_filename_base = filename
3929 if thumbnails and not thumb_filename_base:
3930 self.write_debug(f'Skipping writing {label} thumbnail')
3931 return ret
3932
3933 for idx, t in list(enumerate(thumbnails))[::-1]:
3934 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3935 thumb_display_id = f'{label} thumbnail {t["id"]}'
3936 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3937 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3938
3939 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3940 if existing_thumb:
3941 self.to_screen('[info] %s is already present' % (
3942 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3943 t['filepath'] = existing_thumb
3944 ret.append((existing_thumb, thumb_filename_final))
3945 else:
3946 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3947 try:
3948 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3949 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3950 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3951 shutil.copyfileobj(uf, thumbf)
3952 ret.append((thumb_filename, thumb_filename_final))
3953 t['filepath'] = thumb_filename
3954 except network_exceptions as err:
3955 thumbnails.pop(idx)
3956 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3957 if ret and not write_all:
3958 break
3959 return ret