]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
Fix bug in 119e40ef64b25f66a39246e87ce6c143cd34276d
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 import collections
2 import contextlib
3 import datetime
4 import errno
5 import fileinput
6 import functools
7 import io
8 import itertools
9 import json
10 import locale
11 import operator
12 import os
13 import random
14 import re
15 import shutil
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import tokenize
21 import traceback
22 import unicodedata
23 import urllib.request
24 from string import ascii_letters
25
26 from .cache import Cache
27 from .compat import compat_os_name, compat_shlex_quote
28 from .cookies import load_cookies
29 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
30 from .downloader.rtmp import rtmpdump_version
31 from .extractor import gen_extractor_classes, get_info_extractor
32 from .extractor.common import UnsupportedURLIE
33 from .extractor.openload import PhantomJSwrapper
34 from .minicurses import format_text
35 from .plugins import directories as plugin_directories
36 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
37 from .postprocessor import (
38 EmbedThumbnailPP,
39 FFmpegFixupDuplicateMoovPP,
40 FFmpegFixupDurationPP,
41 FFmpegFixupM3u8PP,
42 FFmpegFixupM4aPP,
43 FFmpegFixupStretchedPP,
44 FFmpegFixupTimestampPP,
45 FFmpegMergerPP,
46 FFmpegPostProcessor,
47 FFmpegVideoConvertorPP,
48 MoveFilesAfterDownloadPP,
49 get_postprocessor,
50 )
51 from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping
52 from .update import REPOSITORY, current_git_head, detect_variant
53 from .utils import (
54 DEFAULT_OUTTMPL,
55 IDENTITY,
56 LINK_TEMPLATES,
57 MEDIA_EXTENSIONS,
58 NO_DEFAULT,
59 NUMBER_RE,
60 OUTTMPL_TYPES,
61 POSTPROCESS_WHEN,
62 STR_FORMAT_RE_TMPL,
63 STR_FORMAT_TYPES,
64 ContentTooShortError,
65 DateRange,
66 DownloadCancelled,
67 DownloadError,
68 EntryNotInPlaylist,
69 ExistingVideoReached,
70 ExtractorError,
71 FormatSorter,
72 GeoRestrictedError,
73 HEADRequest,
74 ISO3166Utils,
75 LazyList,
76 MaxDownloadsReached,
77 Namespace,
78 PagedList,
79 PerRequestProxyHandler,
80 PlaylistEntries,
81 Popen,
82 PostProcessingError,
83 ReExtractInfo,
84 RejectedVideoReached,
85 SameFileError,
86 UnavailableVideoError,
87 UserNotLive,
88 YoutubeDLCookieProcessor,
89 YoutubeDLHandler,
90 YoutubeDLRedirectHandler,
91 age_restricted,
92 args_to_str,
93 bug_reports_message,
94 date_from_str,
95 deprecation_warning,
96 determine_ext,
97 determine_protocol,
98 encode_compat_str,
99 encodeFilename,
100 error_to_compat_str,
101 escapeHTML,
102 expand_path,
103 filter_dict,
104 float_or_none,
105 format_bytes,
106 format_decimal_suffix,
107 format_field,
108 formatSeconds,
109 get_compatible_ext,
110 get_domain,
111 int_or_none,
112 iri_to_uri,
113 is_path_like,
114 join_nonempty,
115 locked_file,
116 make_archive_id,
117 make_dir,
118 make_HTTPS_handler,
119 merge_headers,
120 network_exceptions,
121 number_of_digits,
122 orderedSet,
123 orderedSet_from_options,
124 parse_filesize,
125 preferredencoding,
126 prepend_extension,
127 register_socks_protocols,
128 remove_terminal_sequences,
129 render_table,
130 replace_extension,
131 sanitize_filename,
132 sanitize_path,
133 sanitize_url,
134 sanitized_Request,
135 std_headers,
136 str_or_none,
137 strftime_or_none,
138 subtitles_filename,
139 supports_terminal_sequences,
140 system_identifier,
141 timetuple_from_msec,
142 to_high_limit_path,
143 traverse_obj,
144 try_call,
145 try_get,
146 url_basename,
147 variadic,
148 version_tuple,
149 windows_enable_vt_mode,
150 write_json_file,
151 write_string,
152 )
153 from .version import RELEASE_GIT_HEAD, VARIANT, __version__
154
155 if compat_os_name == 'nt':
156 import ctypes
157
158
159 class YoutubeDL:
160 """YoutubeDL class.
161
162 YoutubeDL objects are the ones responsible of downloading the
163 actual video file and writing it to disk if the user has requested
164 it, among some other tasks. In most cases there should be one per
165 program. As, given a video URL, the downloader doesn't know how to
166 extract all the needed information, task that InfoExtractors do, it
167 has to pass the URL to one of them.
168
169 For this, YoutubeDL objects have a method that allows
170 InfoExtractors to be registered in a given order. When it is passed
171 a URL, the YoutubeDL object handles it to the first InfoExtractor it
172 finds that reports being able to handle it. The InfoExtractor extracts
173 all the information about the video or videos the URL refers to, and
174 YoutubeDL process the extracted information, possibly using a File
175 Downloader to download the video.
176
177 YoutubeDL objects accept a lot of parameters. In order not to saturate
178 the object constructor with arguments, it receives a dictionary of
179 options instead. These options are available through the params
180 attribute for the InfoExtractors to use. The YoutubeDL also
181 registers itself as the downloader in charge for the InfoExtractors
182 that are added to it, so this is a "mutual registration".
183
184 Available options:
185
186 username: Username for authentication purposes.
187 password: Password for authentication purposes.
188 videopassword: Password for accessing a video.
189 ap_mso: Adobe Pass multiple-system operator identifier.
190 ap_username: Multiple-system operator account username.
191 ap_password: Multiple-system operator account password.
192 usenetrc: Use netrc for authentication instead.
193 verbose: Print additional info to stdout.
194 quiet: Do not print messages to stdout.
195 no_warnings: Do not print out anything for warnings.
196 forceprint: A dict with keys WHEN mapped to a list of templates to
197 print to stdout. The allowed keys are video or any of the
198 items in utils.POSTPROCESS_WHEN.
199 For compatibility, a single list is also accepted
200 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
201 a list of tuples with (template, filename)
202 forcejson: Force printing info_dict as JSON.
203 dump_single_json: Force printing the info_dict of the whole playlist
204 (or video) as a single JSON line.
205 force_write_download_archive: Force writing download archive regardless
206 of 'skip_download' or 'simulate'.
207 simulate: Do not download the video files. If unset (or None),
208 simulate only if listsubtitles, listformats or list_thumbnails is used
209 format: Video format code. see "FORMAT SELECTION" for more details.
210 You can also pass a function. The function takes 'ctx' as
211 argument and returns the formats to download.
212 See "build_format_selector" for an implementation
213 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
214 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
215 extracting metadata even if the video is not actually
216 available for download (experimental)
217 format_sort: A list of fields by which to sort the video formats.
218 See "Sorting Formats" for more details.
219 format_sort_force: Force the given format_sort. see "Sorting Formats"
220 for more details.
221 prefer_free_formats: Whether to prefer video formats with free containers
222 over non-free ones of same quality.
223 allow_multiple_video_streams: Allow multiple video streams to be merged
224 into a single file
225 allow_multiple_audio_streams: Allow multiple audio streams to be merged
226 into a single file
227 check_formats Whether to test if the formats are downloadable.
228 Can be True (check all), False (check none),
229 'selected' (check selected formats),
230 or None (check only if requested by extractor)
231 paths: Dictionary of output paths. The allowed keys are 'home'
232 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
233 outtmpl: Dictionary of templates for output names. Allowed keys
234 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
235 For compatibility with youtube-dl, a single string can also be used
236 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
237 restrictfilenames: Do not allow "&" and spaces in file names
238 trim_file_name: Limit length of filename (extension excluded)
239 windowsfilenames: Force the filenames to be windows compatible
240 ignoreerrors: Do not stop on download/postprocessing errors.
241 Can be 'only_download' to ignore only download errors.
242 Default is 'only_download' for CLI, but False for API
243 skip_playlist_after_errors: Number of allowed failures until the rest of
244 the playlist is skipped
245 allowed_extractors: List of regexes to match against extractor names that are allowed
246 overwrites: Overwrite all video and metadata files if True,
247 overwrite only non-video files if None
248 and don't overwrite any file if False
249 For compatibility with youtube-dl,
250 "nooverwrites" may also be used instead
251 playlist_items: Specific indices of playlist to download.
252 playlistrandom: Download playlist items in random order.
253 lazy_playlist: Process playlist entries as they are received.
254 matchtitle: Download only matching titles.
255 rejecttitle: Reject downloads for matching titles.
256 logger: Log messages to a logging.Logger instance.
257 logtostderr: Print everything to stderr instead of stdout.
258 consoletitle: Display progress in console window's titlebar.
259 writedescription: Write the video description to a .description file
260 writeinfojson: Write the video description to a .info.json file
261 clean_infojson: Remove private fields from the infojson
262 getcomments: Extract video comments. This will not be written to disk
263 unless writeinfojson is also given
264 writeannotations: Write the video annotations to a .annotations.xml file
265 writethumbnail: Write the thumbnail image to a file
266 allow_playlist_files: Whether to write playlists' description, infojson etc
267 also to disk when using the 'write*' options
268 write_all_thumbnails: Write all thumbnail formats to files
269 writelink: Write an internet shortcut file, depending on the
270 current platform (.url/.webloc/.desktop)
271 writeurllink: Write a Windows internet shortcut file (.url)
272 writewebloclink: Write a macOS internet shortcut file (.webloc)
273 writedesktoplink: Write a Linux internet shortcut file (.desktop)
274 writesubtitles: Write the video subtitles to a file
275 writeautomaticsub: Write the automatically generated subtitles to a file
276 listsubtitles: Lists all available subtitles for the video
277 subtitlesformat: The format code for subtitles
278 subtitleslangs: List of languages of the subtitles to download (can be regex).
279 The list may contain "all" to refer to all the available
280 subtitles. The language can be prefixed with a "-" to
281 exclude it from the requested languages, e.g. ['all', '-live_chat']
282 keepvideo: Keep the video file after post-processing
283 daterange: A DateRange object, download only if the upload_date is in the range.
284 skip_download: Skip the actual download of the video file
285 cachedir: Location of the cache files in the filesystem.
286 False to disable filesystem cache.
287 noplaylist: Download single video instead of a playlist if in doubt.
288 age_limit: An integer representing the user's age in years.
289 Unsuitable videos for the given age are skipped.
290 min_views: An integer representing the minimum view count the video
291 must have in order to not be skipped.
292 Videos without view count information are always
293 downloaded. None for no limit.
294 max_views: An integer representing the maximum view count.
295 Videos that are more popular than that are not
296 downloaded.
297 Videos without view count information are always
298 downloaded. None for no limit.
299 download_archive: A set, or the name of a file where all downloads are recorded.
300 Videos already present in the file are not downloaded again.
301 break_on_existing: Stop the download process after attempting to download a
302 file that is in the archive.
303 break_on_reject: Stop the download process when encountering a video that
304 has been filtered out.
305 break_per_url: Whether break_on_reject and break_on_existing
306 should act on each input URL as opposed to for the entire queue
307 cookiefile: File name or text stream from where cookies should be read and dumped to
308 cookiesfrombrowser: A tuple containing the name of the browser, the profile
309 name/path from where cookies are loaded, the name of the keyring,
310 and the container name, e.g. ('chrome', ) or
311 ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta')
312 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
313 support RFC 5746 secure renegotiation
314 nocheckcertificate: Do not verify SSL certificates
315 client_certificate: Path to client certificate file in PEM format. May include the private key
316 client_certificate_key: Path to private key file for client certificate
317 client_certificate_password: Password for client certificate private key, if encrypted.
318 If not provided and the key is encrypted, yt-dlp will ask interactively
319 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
320 (Only supported by some extractors)
321 http_headers: A dictionary of custom headers to be used for all requests
322 proxy: URL of the proxy server to use
323 geo_verification_proxy: URL of the proxy to use for IP address verification
324 on geo-restricted sites.
325 socket_timeout: Time to wait for unresponsive hosts, in seconds
326 bidi_workaround: Work around buggy terminals without bidirectional text
327 support, using fridibi
328 debug_printtraffic:Print out sent and received HTTP traffic
329 default_search: Prepend this string if an input url is not valid.
330 'auto' for elaborate guessing
331 encoding: Use this encoding instead of the system-specified.
332 extract_flat: Whether to resolve and process url_results further
333 * False: Always process (default)
334 * True: Never process
335 * 'in_playlist': Do not process inside playlist/multi_video
336 * 'discard': Always process, but don't return the result
337 from inside playlist/multi_video
338 * 'discard_in_playlist': Same as "discard", but only for
339 playlists (not multi_video)
340 wait_for_video: If given, wait for scheduled streams to become available.
341 The value should be a tuple containing the range
342 (min_secs, max_secs) to wait between retries
343 postprocessors: A list of dictionaries, each with an entry
344 * key: The name of the postprocessor. See
345 yt_dlp/postprocessor/__init__.py for a list.
346 * when: When to run the postprocessor. Allowed values are
347 the entries of utils.POSTPROCESS_WHEN
348 Assumed to be 'post_process' if not given
349 progress_hooks: A list of functions that get called on download
350 progress, with a dictionary with the entries
351 * status: One of "downloading", "error", or "finished".
352 Check this first and ignore unknown values.
353 * info_dict: The extracted info_dict
354
355 If status is one of "downloading", or "finished", the
356 following properties may also be present:
357 * filename: The final filename (always present)
358 * tmpfilename: The filename we're currently writing to
359 * downloaded_bytes: Bytes on disk
360 * total_bytes: Size of the whole file, None if unknown
361 * total_bytes_estimate: Guess of the eventual file size,
362 None if unavailable.
363 * elapsed: The number of seconds since download started.
364 * eta: The estimated time in seconds, None if unknown
365 * speed: The download speed in bytes/second, None if
366 unknown
367 * fragment_index: The counter of the currently
368 downloaded video fragment.
369 * fragment_count: The number of fragments (= individual
370 files that will be merged)
371
372 Progress hooks are guaranteed to be called at least once
373 (with status "finished") if the download is successful.
374 postprocessor_hooks: A list of functions that get called on postprocessing
375 progress, with a dictionary with the entries
376 * status: One of "started", "processing", or "finished".
377 Check this first and ignore unknown values.
378 * postprocessor: Name of the postprocessor
379 * info_dict: The extracted info_dict
380
381 Progress hooks are guaranteed to be called at least twice
382 (with status "started" and "finished") if the processing is successful.
383 merge_output_format: "/" separated list of extensions to use when merging formats.
384 final_ext: Expected final extension; used to detect when the file was
385 already downloaded and converted
386 fixup: Automatically correct known faults of the file.
387 One of:
388 - "never": do nothing
389 - "warn": only emit a warning
390 - "detect_or_warn": check whether we can do anything
391 about it, warn otherwise (default)
392 source_address: Client-side IP address to bind to.
393 sleep_interval_requests: Number of seconds to sleep between requests
394 during extraction
395 sleep_interval: Number of seconds to sleep before each download when
396 used alone or a lower bound of a range for randomized
397 sleep before each download (minimum possible number
398 of seconds to sleep) when used along with
399 max_sleep_interval.
400 max_sleep_interval:Upper bound of a range for randomized sleep before each
401 download (maximum possible number of seconds to sleep).
402 Must only be used along with sleep_interval.
403 Actual sleep time will be a random float from range
404 [sleep_interval; max_sleep_interval].
405 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
406 listformats: Print an overview of available video formats and exit.
407 list_thumbnails: Print a table of all thumbnails and exit.
408 match_filter: A function that gets called for every video with the signature
409 (info_dict, *, incomplete: bool) -> Optional[str]
410 For backward compatibility with youtube-dl, the signature
411 (info_dict) -> Optional[str] is also allowed.
412 - If it returns a message, the video is ignored.
413 - If it returns None, the video is downloaded.
414 - If it returns utils.NO_DEFAULT, the user is interactively
415 asked whether to download the video.
416 match_filter_func in utils.py is one example for this.
417 no_color: Do not emit color codes in output.
418 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
419 HTTP header
420 geo_bypass_country:
421 Two-letter ISO 3166-2 country code that will be used for
422 explicit geographic restriction bypassing via faking
423 X-Forwarded-For HTTP header
424 geo_bypass_ip_block:
425 IP range in CIDR notation that will be used similarly to
426 geo_bypass_country
427 external_downloader: A dictionary of protocol keys and the executable of the
428 external downloader to use for it. The allowed protocols
429 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
430 Set the value to 'native' to use the native downloader
431 compat_opts: Compatibility options. See "Differences in default behavior".
432 The following options do not work when used through the API:
433 filename, abort-on-error, multistreams, no-live-chat, format-sort
434 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
435 Refer __init__.py for their implementation
436 progress_template: Dictionary of templates for progress outputs.
437 Allowed keys are 'download', 'postprocess',
438 'download-title' (console title) and 'postprocess-title'.
439 The template is mapped on a dictionary with keys 'progress' and 'info'
440 retry_sleep_functions: Dictionary of functions that takes the number of attempts
441 as argument and returns the time to sleep in seconds.
442 Allowed keys are 'http', 'fragment', 'file_access'
443 download_ranges: A callback function that gets called for every video with
444 the signature (info_dict, ydl) -> Iterable[Section].
445 Only the returned sections will be downloaded.
446 Each Section is a dict with the following keys:
447 * start_time: Start time of the section in seconds
448 * end_time: End time of the section in seconds
449 * title: Section title (Optional)
450 * index: Section number (Optional)
451 force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts
452 noprogress: Do not print the progress bar
453 live_from_start: Whether to download livestreams videos from the start
454
455 The following parameters are not used by YoutubeDL itself, they are used by
456 the downloader (see yt_dlp/downloader/common.py):
457 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
458 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
459 continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
460 external_downloader_args, concurrent_fragment_downloads.
461
462 The following options are used by the post processors:
463 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
464 to the binary or its containing directory.
465 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
466 and a list of additional command-line arguments for the
467 postprocessor/executable. The dict can also have "PP+EXE" keys
468 which are used when the given exe is used by the given PP.
469 Use 'default' as the name for arguments to passed to all PP
470 For compatibility with youtube-dl, a single list of args
471 can also be used
472
473 The following options are used by the extractors:
474 extractor_retries: Number of times to retry for known errors
475 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
476 hls_split_discontinuity: Split HLS playlists to different formats at
477 discontinuities such as ad breaks (default: False)
478 extractor_args: A dictionary of arguments to be passed to the extractors.
479 See "EXTRACTOR ARGUMENTS" for details.
480 E.g. {'youtube': {'skip': ['dash', 'hls']}}
481 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
482
483 The following options are deprecated and may be removed in the future:
484
485 force_generic_extractor: Force downloader to use the generic extractor
486 - Use allowed_extractors = ['generic', 'default']
487 playliststart: - Use playlist_items
488 Playlist item to start at.
489 playlistend: - Use playlist_items
490 Playlist item to end at.
491 playlistreverse: - Use playlist_items
492 Download playlist items in reverse order.
493 forceurl: - Use forceprint
494 Force printing final URL.
495 forcetitle: - Use forceprint
496 Force printing title.
497 forceid: - Use forceprint
498 Force printing ID.
499 forcethumbnail: - Use forceprint
500 Force printing thumbnail URL.
501 forcedescription: - Use forceprint
502 Force printing description.
503 forcefilename: - Use forceprint
504 Force printing final filename.
505 forceduration: - Use forceprint
506 Force printing duration.
507 allsubtitles: - Use subtitleslangs = ['all']
508 Downloads all the subtitles of the video
509 (requires writesubtitles or writeautomaticsub)
510 include_ads: - Doesn't work
511 Download ads as well
512 call_home: - Not implemented
513 Boolean, true iff we are allowed to contact the
514 yt-dlp servers for debugging.
515 post_hooks: - Register a custom postprocessor
516 A list of functions that get called as the final step
517 for each video file, after all postprocessors have been
518 called. The filename will be passed as the only argument.
519 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
520 Use the native HLS downloader instead of ffmpeg/avconv
521 if True, otherwise use ffmpeg/avconv if False, otherwise
522 use downloader suggested by extractor if None.
523 prefer_ffmpeg: - avconv support is deprecated
524 If False, use avconv instead of ffmpeg if both are available,
525 otherwise prefer ffmpeg.
526 youtube_include_dash_manifest: - Use extractor_args
527 If True (default), DASH manifests and related
528 data will be downloaded and processed by extractor.
529 You can reduce network I/O by disabling it if you don't
530 care about DASH. (only for youtube)
531 youtube_include_hls_manifest: - Use extractor_args
532 If True (default), HLS manifests and related
533 data will be downloaded and processed by extractor.
534 You can reduce network I/O by disabling it if you don't
535 care about HLS. (only for youtube)
536 """
537
538 _NUMERIC_FIELDS = {
539 'width', 'height', 'asr', 'audio_channels', 'fps',
540 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx',
541 'timestamp', 'release_timestamp',
542 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
543 'average_rating', 'comment_count', 'age_limit',
544 'start_time', 'end_time',
545 'chapter_number', 'season_number', 'episode_number',
546 'track_number', 'disc_number', 'release_year',
547 }
548
549 _format_fields = {
550 # NB: Keep in sync with the docstring of extractor/common.py
551 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
552 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels',
553 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns',
554 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
555 'preference', 'language', 'language_preference', 'quality', 'source_preference',
556 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
557 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
558 }
559 _format_selection_exts = {
560 'audio': set(MEDIA_EXTENSIONS.common_audio),
561 'video': set(MEDIA_EXTENSIONS.common_video + ('3gp', )),
562 'storyboards': set(MEDIA_EXTENSIONS.storyboards),
563 }
564
565 def __init__(self, params=None, auto_init=True):
566 """Create a FileDownloader object with the given options.
567 @param auto_init Whether to load the default extractors and print header (if verbose).
568 Set to 'no_verbose_header' to not print the header
569 """
570 if params is None:
571 params = {}
572 self.params = params
573 self._ies = {}
574 self._ies_instances = {}
575 self._pps = {k: [] for k in POSTPROCESS_WHEN}
576 self._printed_messages = set()
577 self._first_webpage_request = True
578 self._post_hooks = []
579 self._progress_hooks = []
580 self._postprocessor_hooks = []
581 self._download_retcode = 0
582 self._num_downloads = 0
583 self._num_videos = 0
584 self._playlist_level = 0
585 self._playlist_urls = set()
586 self.cache = Cache(self)
587
588 windows_enable_vt_mode()
589 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
590 self._out_files = Namespace(
591 out=stdout,
592 error=sys.stderr,
593 screen=sys.stderr if self.params.get('quiet') else stdout,
594 console=None if compat_os_name == 'nt' else next(
595 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
596 )
597 self._allow_colors = Namespace(**{
598 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
599 for type_, stream in self._out_files.items_ if type_ != 'console'
600 })
601
602 # The code is left like this to be reused for future deprecations
603 MIN_SUPPORTED, MIN_RECOMMENDED = (3, 7), (3, 7)
604 current_version = sys.version_info[:2]
605 if current_version < MIN_RECOMMENDED:
606 msg = ('Support for Python version %d.%d has been deprecated. '
607 'See https://github.com/yt-dlp/yt-dlp/issues/3764 for more details.'
608 '\n You will no longer receive updates on this version')
609 if current_version < MIN_SUPPORTED:
610 msg = 'Python version %d.%d is no longer supported'
611 self.deprecation_warning(
612 f'{msg}! Please update to Python %d.%d or above' % (*current_version, *MIN_RECOMMENDED))
613
614 if self.params.get('allow_unplayable_formats'):
615 self.report_warning(
616 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
617 'This is a developer option intended for debugging. \n'
618 ' If you experience any issues while using this option, '
619 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
620
621 if self.params.get('bidi_workaround', False):
622 try:
623 import pty
624 master, slave = pty.openpty()
625 width = shutil.get_terminal_size().columns
626 width_args = [] if width is None else ['-w', str(width)]
627 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
628 try:
629 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
630 except OSError:
631 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
632 self._output_channel = os.fdopen(master, 'rb')
633 except OSError as ose:
634 if ose.errno == errno.ENOENT:
635 self.report_warning(
636 'Could not find fribidi executable, ignoring --bidi-workaround. '
637 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
638 else:
639 raise
640
641 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
642 if auto_init and auto_init != 'no_verbose_header':
643 self.print_debug_header()
644
645 def check_deprecated(param, option, suggestion):
646 if self.params.get(param) is not None:
647 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
648 return True
649 return False
650
651 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
652 if self.params.get('geo_verification_proxy') is None:
653 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
654
655 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
656 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
657 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
658
659 for msg in self.params.get('_warnings', []):
660 self.report_warning(msg)
661 for msg in self.params.get('_deprecation_warnings', []):
662 self.deprecated_feature(msg)
663
664 if 'list-formats' in self.params['compat_opts']:
665 self.params['listformats_table'] = False
666
667 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
668 # nooverwrites was unnecessarily changed to overwrites
669 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
670 # This ensures compatibility with both keys
671 self.params['overwrites'] = not self.params['nooverwrites']
672 elif self.params.get('overwrites') is None:
673 self.params.pop('overwrites', None)
674 else:
675 self.params['nooverwrites'] = not self.params['overwrites']
676
677 if self.params.get('simulate') is None and any((
678 self.params.get('list_thumbnails'),
679 self.params.get('listformats'),
680 self.params.get('listsubtitles'),
681 )):
682 self.params['simulate'] = 'list_only'
683
684 self.params.setdefault('forceprint', {})
685 self.params.setdefault('print_to_file', {})
686
687 # Compatibility with older syntax
688 if not isinstance(params['forceprint'], dict):
689 self.params['forceprint'] = {'video': params['forceprint']}
690
691 if auto_init:
692 self.add_default_info_extractors()
693
694 if (sys.platform != 'win32'
695 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
696 and not self.params.get('restrictfilenames', False)):
697 # Unicode filesystem API will throw errors (#1474, #13027)
698 self.report_warning(
699 'Assuming --restrict-filenames since file system encoding '
700 'cannot encode all characters. '
701 'Set the LC_ALL environment variable to fix this.')
702 self.params['restrictfilenames'] = True
703
704 self._parse_outtmpl()
705
706 # Creating format selector here allows us to catch syntax errors before the extraction
707 self.format_selector = (
708 self.params.get('format') if self.params.get('format') in (None, '-')
709 else self.params['format'] if callable(self.params['format'])
710 else self.build_format_selector(self.params['format']))
711
712 # Set http_headers defaults according to std_headers
713 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
714
715 hooks = {
716 'post_hooks': self.add_post_hook,
717 'progress_hooks': self.add_progress_hook,
718 'postprocessor_hooks': self.add_postprocessor_hook,
719 }
720 for opt, fn in hooks.items():
721 for ph in self.params.get(opt, []):
722 fn(ph)
723
724 for pp_def_raw in self.params.get('postprocessors', []):
725 pp_def = dict(pp_def_raw)
726 when = pp_def.pop('when', 'post_process')
727 self.add_post_processor(
728 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
729 when=when)
730
731 self._setup_opener()
732 register_socks_protocols()
733
734 def preload_download_archive(fn):
735 """Preload the archive, if any is specified"""
736 archive = set()
737 if fn is None:
738 return archive
739 elif not is_path_like(fn):
740 return fn
741
742 self.write_debug(f'Loading archive file {fn!r}')
743 try:
744 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
745 for line in archive_file:
746 archive.add(line.strip())
747 except OSError as ioe:
748 if ioe.errno != errno.ENOENT:
749 raise
750 return archive
751
752 self.archive = preload_download_archive(self.params.get('download_archive'))
753
754 def warn_if_short_id(self, argv):
755 # short YouTube ID starting with dash?
756 idxs = [
757 i for i, a in enumerate(argv)
758 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
759 if idxs:
760 correct_argv = (
761 ['yt-dlp']
762 + [a for i, a in enumerate(argv) if i not in idxs]
763 + ['--'] + [argv[i] for i in idxs]
764 )
765 self.report_warning(
766 'Long argument string detected. '
767 'Use -- to separate parameters and URLs, like this:\n%s' %
768 args_to_str(correct_argv))
769
770 def add_info_extractor(self, ie):
771 """Add an InfoExtractor object to the end of the list."""
772 ie_key = ie.ie_key()
773 self._ies[ie_key] = ie
774 if not isinstance(ie, type):
775 self._ies_instances[ie_key] = ie
776 ie.set_downloader(self)
777
778 def get_info_extractor(self, ie_key):
779 """
780 Get an instance of an IE with name ie_key, it will try to get one from
781 the _ies list, if there's no instance it will create a new one and add
782 it to the extractor list.
783 """
784 ie = self._ies_instances.get(ie_key)
785 if ie is None:
786 ie = get_info_extractor(ie_key)()
787 self.add_info_extractor(ie)
788 return ie
789
790 def add_default_info_extractors(self):
791 """
792 Add the InfoExtractors returned by gen_extractors to the end of the list
793 """
794 all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()}
795 all_ies['end'] = UnsupportedURLIE()
796 try:
797 ie_names = orderedSet_from_options(
798 self.params.get('allowed_extractors', ['default']), {
799 'all': list(all_ies),
800 'default': [name for name, ie in all_ies.items() if ie._ENABLED],
801 }, use_regex=True)
802 except re.error as e:
803 raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}')
804 for name in ie_names:
805 self.add_info_extractor(all_ies[name])
806 self.write_debug(f'Loaded {len(ie_names)} extractors')
807
808 def add_post_processor(self, pp, when='post_process'):
809 """Add a PostProcessor object to the end of the chain."""
810 assert when in POSTPROCESS_WHEN, f'Invalid when={when}'
811 self._pps[when].append(pp)
812 pp.set_downloader(self)
813
814 def add_post_hook(self, ph):
815 """Add the post hook"""
816 self._post_hooks.append(ph)
817
818 def add_progress_hook(self, ph):
819 """Add the download progress hook"""
820 self._progress_hooks.append(ph)
821
822 def add_postprocessor_hook(self, ph):
823 """Add the postprocessing progress hook"""
824 self._postprocessor_hooks.append(ph)
825 for pps in self._pps.values():
826 for pp in pps:
827 pp.add_progress_hook(ph)
828
829 def _bidi_workaround(self, message):
830 if not hasattr(self, '_output_channel'):
831 return message
832
833 assert hasattr(self, '_output_process')
834 assert isinstance(message, str)
835 line_count = message.count('\n') + 1
836 self._output_process.stdin.write((message + '\n').encode())
837 self._output_process.stdin.flush()
838 res = ''.join(self._output_channel.readline().decode()
839 for _ in range(line_count))
840 return res[:-len('\n')]
841
842 def _write_string(self, message, out=None, only_once=False):
843 if only_once:
844 if message in self._printed_messages:
845 return
846 self._printed_messages.add(message)
847 write_string(message, out=out, encoding=self.params.get('encoding'))
848
849 def to_stdout(self, message, skip_eol=False, quiet=None):
850 """Print message to stdout"""
851 if quiet is not None:
852 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. '
853 'Use "YoutubeDL.to_screen" instead')
854 if skip_eol is not False:
855 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. '
856 'Use "YoutubeDL.to_screen" instead')
857 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
858
859 def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
860 """Print message to screen if not in quiet mode"""
861 if self.params.get('logger'):
862 self.params['logger'].debug(message)
863 return
864 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
865 return
866 self._write_string(
867 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
868 self._out_files.screen, only_once=only_once)
869
870 def to_stderr(self, message, only_once=False):
871 """Print message to stderr"""
872 assert isinstance(message, str)
873 if self.params.get('logger'):
874 self.params['logger'].error(message)
875 else:
876 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
877
878 def _send_console_code(self, code):
879 if compat_os_name == 'nt' or not self._out_files.console:
880 return
881 self._write_string(code, self._out_files.console)
882
883 def to_console_title(self, message):
884 if not self.params.get('consoletitle', False):
885 return
886 message = remove_terminal_sequences(message)
887 if compat_os_name == 'nt':
888 if ctypes.windll.kernel32.GetConsoleWindow():
889 # c_wchar_p() might not be necessary if `message` is
890 # already of type unicode()
891 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
892 else:
893 self._send_console_code(f'\033]0;{message}\007')
894
895 def save_console_title(self):
896 if not self.params.get('consoletitle') or self.params.get('simulate'):
897 return
898 self._send_console_code('\033[22;0t') # Save the title on stack
899
900 def restore_console_title(self):
901 if not self.params.get('consoletitle') or self.params.get('simulate'):
902 return
903 self._send_console_code('\033[23;0t') # Restore the title from stack
904
905 def __enter__(self):
906 self.save_console_title()
907 return self
908
909 def __exit__(self, *args):
910 self.restore_console_title()
911
912 if self.params.get('cookiefile') is not None:
913 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
914
915 def trouble(self, message=None, tb=None, is_error=True):
916 """Determine action to take when a download problem appears.
917
918 Depending on if the downloader has been configured to ignore
919 download errors or not, this method may throw an exception or
920 not when errors are found, after printing the message.
921
922 @param tb If given, is additional traceback information
923 @param is_error Whether to raise error according to ignorerrors
924 """
925 if message is not None:
926 self.to_stderr(message)
927 if self.params.get('verbose'):
928 if tb is None:
929 if sys.exc_info()[0]: # if .trouble has been called from an except block
930 tb = ''
931 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
932 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
933 tb += encode_compat_str(traceback.format_exc())
934 else:
935 tb_data = traceback.format_list(traceback.extract_stack())
936 tb = ''.join(tb_data)
937 if tb:
938 self.to_stderr(tb)
939 if not is_error:
940 return
941 if not self.params.get('ignoreerrors'):
942 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
943 exc_info = sys.exc_info()[1].exc_info
944 else:
945 exc_info = sys.exc_info()
946 raise DownloadError(message, exc_info)
947 self._download_retcode = 1
948
949 Styles = Namespace(
950 HEADERS='yellow',
951 EMPHASIS='light blue',
952 FILENAME='green',
953 ID='green',
954 DELIM='blue',
955 ERROR='red',
956 WARNING='yellow',
957 SUPPRESS='light black',
958 )
959
960 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
961 text = str(text)
962 if test_encoding:
963 original_text = text
964 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
965 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
966 text = text.encode(encoding, 'ignore').decode(encoding)
967 if fallback is not None and text != original_text:
968 text = fallback
969 return format_text(text, f) if allow_colors else text if fallback is None else fallback
970
971 def _format_out(self, *args, **kwargs):
972 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
973
974 def _format_screen(self, *args, **kwargs):
975 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
976
977 def _format_err(self, *args, **kwargs):
978 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
979
980 def report_warning(self, message, only_once=False):
981 '''
982 Print the message to stderr, it will be prefixed with 'WARNING:'
983 If stderr is a tty file the 'WARNING:' will be colored
984 '''
985 if self.params.get('logger') is not None:
986 self.params['logger'].warning(message)
987 else:
988 if self.params.get('no_warnings'):
989 return
990 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
991
992 def deprecation_warning(self, message, *, stacklevel=0):
993 deprecation_warning(
994 message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False)
995
996 def deprecated_feature(self, message):
997 if self.params.get('logger') is not None:
998 self.params['logger'].warning(f'Deprecated Feature: {message}')
999 self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True)
1000
1001 def report_error(self, message, *args, **kwargs):
1002 '''
1003 Do the same as trouble, but prefixes the message with 'ERROR:', colored
1004 in red if stderr is a tty file.
1005 '''
1006 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
1007
1008 def write_debug(self, message, only_once=False):
1009 '''Log debug message or Print message to stderr'''
1010 if not self.params.get('verbose', False):
1011 return
1012 message = f'[debug] {message}'
1013 if self.params.get('logger'):
1014 self.params['logger'].debug(message)
1015 else:
1016 self.to_stderr(message, only_once)
1017
1018 def report_file_already_downloaded(self, file_name):
1019 """Report file has already been fully downloaded."""
1020 try:
1021 self.to_screen('[download] %s has already been downloaded' % file_name)
1022 except UnicodeEncodeError:
1023 self.to_screen('[download] The file has already been downloaded')
1024
1025 def report_file_delete(self, file_name):
1026 """Report that existing file will be deleted."""
1027 try:
1028 self.to_screen('Deleting existing file %s' % file_name)
1029 except UnicodeEncodeError:
1030 self.to_screen('Deleting existing file')
1031
1032 def raise_no_formats(self, info, forced=False, *, msg=None):
1033 has_drm = info.get('_has_drm')
1034 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
1035 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
1036 if forced or not ignored:
1037 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
1038 expected=has_drm or ignored or expected)
1039 else:
1040 self.report_warning(msg)
1041
1042 def parse_outtmpl(self):
1043 self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version')
1044 self._parse_outtmpl()
1045 return self.params['outtmpl']
1046
1047 def _parse_outtmpl(self):
1048 sanitize = IDENTITY
1049 if self.params.get('restrictfilenames'): # Remove spaces in the default template
1050 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1051
1052 outtmpl = self.params.setdefault('outtmpl', {})
1053 if not isinstance(outtmpl, dict):
1054 self.params['outtmpl'] = outtmpl = {'default': outtmpl}
1055 outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None})
1056
1057 def get_output_path(self, dir_type='', filename=None):
1058 paths = self.params.get('paths', {})
1059 assert isinstance(paths, dict), '"paths" parameter must be a dictionary'
1060 path = os.path.join(
1061 expand_path(paths.get('home', '').strip()),
1062 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1063 filename or '')
1064 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1065
1066 @staticmethod
1067 def _outtmpl_expandpath(outtmpl):
1068 # expand_path translates '%%' into '%' and '$$' into '$'
1069 # correspondingly that is not what we want since we need to keep
1070 # '%%' intact for template dict substitution step. Working around
1071 # with boundary-alike separator hack.
1072 sep = ''.join(random.choices(ascii_letters, k=32))
1073 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1074
1075 # outtmpl should be expand_path'ed before template dict substitution
1076 # because meta fields may contain env variables we don't want to
1077 # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and
1078 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1079 return expand_path(outtmpl).replace(sep, '')
1080
1081 @staticmethod
1082 def escape_outtmpl(outtmpl):
1083 ''' Escape any remaining strings like %s, %abc% etc. '''
1084 return re.sub(
1085 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1086 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1087 outtmpl)
1088
1089 @classmethod
1090 def validate_outtmpl(cls, outtmpl):
1091 ''' @return None or Exception object '''
1092 outtmpl = re.sub(
1093 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'),
1094 lambda mobj: f'{mobj.group(0)[:-1]}s',
1095 cls._outtmpl_expandpath(outtmpl))
1096 try:
1097 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1098 return None
1099 except ValueError as err:
1100 return err
1101
1102 @staticmethod
1103 def _copy_infodict(info_dict):
1104 info_dict = dict(info_dict)
1105 info_dict.pop('__postprocessors', None)
1106 info_dict.pop('__pending_error', None)
1107 return info_dict
1108
1109 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1110 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1111 @param sanitize Whether to sanitize the output as a filename.
1112 For backward compatibility, a function can also be passed
1113 """
1114
1115 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1116
1117 info_dict = self._copy_infodict(info_dict)
1118 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1119 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1120 if info_dict.get('duration', None) is not None
1121 else None)
1122 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1123 info_dict['video_autonumber'] = self._num_videos
1124 if info_dict.get('resolution') is None:
1125 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1126
1127 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1128 # of %(field)s to %(field)0Nd for backward compatibility
1129 field_size_compat_map = {
1130 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1131 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1132 'autonumber': self.params.get('autonumber_size') or 5,
1133 }
1134
1135 TMPL_DICT = {}
1136 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]'))
1137 MATH_FUNCTIONS = {
1138 '+': float.__add__,
1139 '-': float.__sub__,
1140 }
1141 # Field is of the form key1.key2...
1142 # where keys (except first) can be string, int, slice or "{field, ...}"
1143 FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
1144 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
1145 'inner': FIELD_INNER_RE,
1146 'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
1147 }
1148 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1149 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1150 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1151 (?P<negate>-)?
1152 (?P<fields>{FIELD_RE})
1153 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1154 (?:>(?P<strf_format>.+?))?
1155 (?P<remaining>
1156 (?P<alternate>(?<!\\),[^|&)]+)?
1157 (?:&(?P<replacement>.*?))?
1158 (?:\|(?P<default>.*?))?
1159 )$''')
1160
1161 def _traverse_infodict(fields):
1162 fields = [f for x in re.split(r'\.({.+?})\.?', fields)
1163 for f in ([x] if x.startswith('{') else x.split('.'))]
1164 for i in (0, -1):
1165 if fields and not fields[i]:
1166 fields.pop(i)
1167
1168 for i, f in enumerate(fields):
1169 if not f.startswith('{'):
1170 continue
1171 assert f.endswith('}'), f'No closing brace for {f} in {fields}'
1172 fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
1173
1174 return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
1175
1176 def get_value(mdict):
1177 # Object traversal
1178 value = _traverse_infodict(mdict['fields'])
1179 # Negative
1180 if mdict['negate']:
1181 value = float_or_none(value)
1182 if value is not None:
1183 value *= -1
1184 # Do maths
1185 offset_key = mdict['maths']
1186 if offset_key:
1187 value = float_or_none(value)
1188 operator = None
1189 while offset_key:
1190 item = re.match(
1191 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1192 offset_key).group(0)
1193 offset_key = offset_key[len(item):]
1194 if operator is None:
1195 operator = MATH_FUNCTIONS[item]
1196 continue
1197 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1198 offset = float_or_none(item)
1199 if offset is None:
1200 offset = float_or_none(_traverse_infodict(item))
1201 try:
1202 value = operator(value, multiplier * offset)
1203 except (TypeError, ZeroDivisionError):
1204 return None
1205 operator = None
1206 # Datetime formatting
1207 if mdict['strf_format']:
1208 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1209
1210 # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485
1211 if sanitize and value == '':
1212 value = None
1213 return value
1214
1215 na = self.params.get('outtmpl_na_placeholder', 'NA')
1216
1217 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1218 return sanitize_filename(str(value), restricted=restricted, is_id=(
1219 bool(re.search(r'(^|[_.])id(\.|$)', key))
1220 if 'filename-sanitization' in self.params['compat_opts']
1221 else NO_DEFAULT))
1222
1223 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1224 sanitize = bool(sanitize)
1225
1226 def _dumpjson_default(obj):
1227 if isinstance(obj, (set, LazyList)):
1228 return list(obj)
1229 return repr(obj)
1230
1231 def create_key(outer_mobj):
1232 if not outer_mobj.group('has_key'):
1233 return outer_mobj.group(0)
1234 key = outer_mobj.group('key')
1235 mobj = re.match(INTERNAL_FORMAT_RE, key)
1236 initial_field = mobj.group('fields') if mobj else ''
1237 value, replacement, default = None, None, na
1238 while mobj:
1239 mobj = mobj.groupdict()
1240 default = mobj['default'] if mobj['default'] is not None else default
1241 value = get_value(mobj)
1242 replacement = mobj['replacement']
1243 if value is None and mobj['alternate']:
1244 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1245 else:
1246 break
1247
1248 fmt = outer_mobj.group('format')
1249 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1250 fmt = f'0{field_size_compat_map[key]:d}d'
1251
1252 value = default if value is None else value if replacement is None else replacement
1253
1254 flags = outer_mobj.group('conversion') or ''
1255 str_fmt = f'{fmt[:-1]}s'
1256 if fmt[-1] == 'l': # list
1257 delim = '\n' if '#' in flags else ', '
1258 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1259 elif fmt[-1] == 'j': # json
1260 value, fmt = json.dumps(
1261 value, default=_dumpjson_default,
1262 indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
1263 elif fmt[-1] == 'h': # html
1264 value, fmt = escapeHTML(str(value)), str_fmt
1265 elif fmt[-1] == 'q': # quoted
1266 value = map(str, variadic(value) if '#' in flags else [value])
1267 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1268 elif fmt[-1] == 'B': # bytes
1269 value = f'%{str_fmt}'.encode() % str(value).encode()
1270 value, fmt = value.decode('utf-8', 'ignore'), 's'
1271 elif fmt[-1] == 'U': # unicode normalized
1272 value, fmt = unicodedata.normalize(
1273 # "+" = compatibility equivalence, "#" = NFD
1274 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1275 value), str_fmt
1276 elif fmt[-1] == 'D': # decimal suffix
1277 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1278 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1279 factor=1024 if '#' in flags else 1000)
1280 elif fmt[-1] == 'S': # filename sanitization
1281 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1282 elif fmt[-1] == 'c':
1283 if value:
1284 value = str(value)[0]
1285 else:
1286 fmt = str_fmt
1287 elif fmt[-1] not in 'rs': # numeric
1288 value = float_or_none(value)
1289 if value is None:
1290 value, fmt = default, 's'
1291
1292 if sanitize:
1293 if fmt[-1] == 'r':
1294 # If value is an object, sanitize might convert it to a string
1295 # So we convert it to repr first
1296 value, fmt = repr(value), str_fmt
1297 if fmt[-1] in 'csr':
1298 value = sanitizer(initial_field, value)
1299
1300 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1301 TMPL_DICT[key] = value
1302 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1303
1304 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1305
1306 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1307 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1308 return self.escape_outtmpl(outtmpl) % info_dict
1309
1310 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1311 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1312 if outtmpl is None:
1313 outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default'])
1314 try:
1315 outtmpl = self._outtmpl_expandpath(outtmpl)
1316 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1317 if not filename:
1318 return None
1319
1320 if tmpl_type in ('', 'temp'):
1321 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1322 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1323 filename = replace_extension(filename, ext, final_ext)
1324 elif tmpl_type:
1325 force_ext = OUTTMPL_TYPES[tmpl_type]
1326 if force_ext:
1327 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1328
1329 # https://github.com/blackjack4494/youtube-dlc/issues/85
1330 trim_file_name = self.params.get('trim_file_name', False)
1331 if trim_file_name:
1332 no_ext, *ext = filename.rsplit('.', 2)
1333 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1334
1335 return filename
1336 except ValueError as err:
1337 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1338 return None
1339
1340 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1341 """Generate the output filename"""
1342 if outtmpl:
1343 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1344 dir_type = None
1345 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1346 if not filename and dir_type not in ('', 'temp'):
1347 return ''
1348
1349 if warn:
1350 if not self.params.get('paths'):
1351 pass
1352 elif filename == '-':
1353 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1354 elif os.path.isabs(filename):
1355 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1356 if filename == '-' or not filename:
1357 return filename
1358
1359 return self.get_output_path(dir_type, filename)
1360
1361 def _match_entry(self, info_dict, incomplete=False, silent=False):
1362 """Returns None if the file should be downloaded"""
1363 _type = info_dict.get('_type', 'video')
1364 assert incomplete or _type == 'video', 'Only video result can be considered complete'
1365
1366 video_title = info_dict.get('title', info_dict.get('id', 'entry'))
1367
1368 def check_filter():
1369 if _type in ('playlist', 'multi_video'):
1370 return
1371 elif _type in ('url', 'url_transparent') and not try_call(
1372 lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])):
1373 return
1374
1375 if 'title' in info_dict:
1376 # This can happen when we're just evaluating the playlist
1377 title = info_dict['title']
1378 matchtitle = self.params.get('matchtitle', False)
1379 if matchtitle:
1380 if not re.search(matchtitle, title, re.IGNORECASE):
1381 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1382 rejecttitle = self.params.get('rejecttitle', False)
1383 if rejecttitle:
1384 if re.search(rejecttitle, title, re.IGNORECASE):
1385 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1386
1387 date = info_dict.get('upload_date')
1388 if date is not None:
1389 dateRange = self.params.get('daterange', DateRange())
1390 if date not in dateRange:
1391 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1392 view_count = info_dict.get('view_count')
1393 if view_count is not None:
1394 min_views = self.params.get('min_views')
1395 if min_views is not None and view_count < min_views:
1396 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1397 max_views = self.params.get('max_views')
1398 if max_views is not None and view_count > max_views:
1399 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1400 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1401 return 'Skipping "%s" because it is age restricted' % video_title
1402
1403 match_filter = self.params.get('match_filter')
1404 if match_filter is not None:
1405 try:
1406 ret = match_filter(info_dict, incomplete=incomplete)
1407 except TypeError:
1408 # For backward compatibility
1409 ret = None if incomplete else match_filter(info_dict)
1410 if ret is NO_DEFAULT:
1411 while True:
1412 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1413 reply = input(self._format_screen(
1414 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1415 if reply in {'y', ''}:
1416 return None
1417 elif reply == 'n':
1418 return f'Skipping {video_title}'
1419 elif ret is not None:
1420 return ret
1421 return None
1422
1423 if self.in_download_archive(info_dict):
1424 reason = '%s has already been recorded in the archive' % video_title
1425 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1426 else:
1427 reason = check_filter()
1428 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1429 if reason is not None:
1430 if not silent:
1431 self.to_screen('[download] ' + reason)
1432 if self.params.get(break_opt, False):
1433 raise break_err()
1434 return reason
1435
1436 @staticmethod
1437 def add_extra_info(info_dict, extra_info):
1438 '''Set the keys from extra_info in info dict if they are missing'''
1439 for key, value in extra_info.items():
1440 info_dict.setdefault(key, value)
1441
1442 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1443 process=True, force_generic_extractor=False):
1444 """
1445 Extract and return the information dictionary of the URL
1446
1447 Arguments:
1448 @param url URL to extract
1449
1450 Keyword arguments:
1451 @param download Whether to download videos
1452 @param process Whether to resolve all unresolved references (URLs, playlist items).
1453 Must be True for download to work
1454 @param ie_key Use only the extractor with this key
1455
1456 @param extra_info Dictionary containing the extra values to add to the info (For internal use only)
1457 @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic')
1458 """
1459
1460 if extra_info is None:
1461 extra_info = {}
1462
1463 if not ie_key and force_generic_extractor:
1464 ie_key = 'Generic'
1465
1466 if ie_key:
1467 ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {}
1468 else:
1469 ies = self._ies
1470
1471 for key, ie in ies.items():
1472 if not ie.suitable(url):
1473 continue
1474
1475 if not ie.working():
1476 self.report_warning('The program functionality for this site has been marked as broken, '
1477 'and will probably not work.')
1478
1479 temp_id = ie.get_temp_id(url)
1480 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}):
1481 self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive')
1482 if self.params.get('break_on_existing', False):
1483 raise ExistingVideoReached()
1484 break
1485 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1486 else:
1487 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
1488 self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}',
1489 tb=False if extractors_restricted else None)
1490
1491 def _handle_extraction_exceptions(func):
1492 @functools.wraps(func)
1493 def wrapper(self, *args, **kwargs):
1494 while True:
1495 try:
1496 return func(self, *args, **kwargs)
1497 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1498 raise
1499 except ReExtractInfo as e:
1500 if e.expected:
1501 self.to_screen(f'{e}; Re-extracting data')
1502 else:
1503 self.to_stderr('\r')
1504 self.report_warning(f'{e}; Re-extracting data')
1505 continue
1506 except GeoRestrictedError as e:
1507 msg = e.msg
1508 if e.countries:
1509 msg += '\nThis video is available in %s.' % ', '.join(
1510 map(ISO3166Utils.short2full, e.countries))
1511 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1512 self.report_error(msg)
1513 except ExtractorError as e: # An error we somewhat expected
1514 self.report_error(str(e), e.format_traceback())
1515 except Exception as e:
1516 if self.params.get('ignoreerrors'):
1517 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1518 else:
1519 raise
1520 break
1521 return wrapper
1522
1523 def _wait_for_video(self, ie_result={}):
1524 if (not self.params.get('wait_for_video')
1525 or ie_result.get('_type', 'video') != 'video'
1526 or ie_result.get('formats') or ie_result.get('url')):
1527 return
1528
1529 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1530 last_msg = ''
1531
1532 def progress(msg):
1533 nonlocal last_msg
1534 full_msg = f'{msg}\n'
1535 if not self.params.get('noprogress'):
1536 full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r'
1537 elif last_msg:
1538 return
1539 self.to_screen(full_msg, skip_eol=True)
1540 last_msg = msg
1541
1542 min_wait, max_wait = self.params.get('wait_for_video')
1543 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1544 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1545 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1546 self.report_warning('Release time of video is not known')
1547 elif ie_result and (diff or 0) <= 0:
1548 self.report_warning('Video should already be available according to extracted info')
1549 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1550 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1551
1552 wait_till = time.time() + diff
1553 try:
1554 while True:
1555 diff = wait_till - time.time()
1556 if diff <= 0:
1557 progress('')
1558 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1559 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1560 time.sleep(1)
1561 except KeyboardInterrupt:
1562 progress('')
1563 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1564 except BaseException as e:
1565 if not isinstance(e, ReExtractInfo):
1566 self.to_screen('')
1567 raise
1568
1569 @_handle_extraction_exceptions
1570 def __extract_info(self, url, ie, download, extra_info, process):
1571 try:
1572 ie_result = ie.extract(url)
1573 except UserNotLive as e:
1574 if process:
1575 if self.params.get('wait_for_video'):
1576 self.report_warning(e)
1577 self._wait_for_video()
1578 raise
1579 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1580 self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}')
1581 return
1582 if isinstance(ie_result, list):
1583 # Backwards compatibility: old IE result format
1584 ie_result = {
1585 '_type': 'compat_list',
1586 'entries': ie_result,
1587 }
1588 if extra_info.get('original_url'):
1589 ie_result.setdefault('original_url', extra_info['original_url'])
1590 self.add_default_extra_info(ie_result, ie, url)
1591 if process:
1592 self._wait_for_video(ie_result)
1593 return self.process_ie_result(ie_result, download, extra_info)
1594 else:
1595 return ie_result
1596
1597 def add_default_extra_info(self, ie_result, ie, url):
1598 if url is not None:
1599 self.add_extra_info(ie_result, {
1600 'webpage_url': url,
1601 'original_url': url,
1602 })
1603 webpage_url = ie_result.get('webpage_url')
1604 if webpage_url:
1605 self.add_extra_info(ie_result, {
1606 'webpage_url_basename': url_basename(webpage_url),
1607 'webpage_url_domain': get_domain(webpage_url),
1608 })
1609 if ie is not None:
1610 self.add_extra_info(ie_result, {
1611 'extractor': ie.IE_NAME,
1612 'extractor_key': ie.ie_key(),
1613 })
1614
1615 def process_ie_result(self, ie_result, download=True, extra_info=None):
1616 """
1617 Take the result of the ie(may be modified) and resolve all unresolved
1618 references (URLs, playlist items).
1619
1620 It will also download the videos if 'download'.
1621 Returns the resolved ie_result.
1622 """
1623 if extra_info is None:
1624 extra_info = {}
1625 result_type = ie_result.get('_type', 'video')
1626
1627 if result_type in ('url', 'url_transparent'):
1628 ie_result['url'] = sanitize_url(
1629 ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https')
1630 if ie_result.get('original_url') and not extra_info.get('original_url'):
1631 extra_info = {'original_url': ie_result['original_url'], **extra_info}
1632
1633 extract_flat = self.params.get('extract_flat', False)
1634 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1635 or extract_flat is True):
1636 info_copy = ie_result.copy()
1637 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1638 if ie and not ie_result.get('id'):
1639 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1640 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1641 self.add_extra_info(info_copy, extra_info)
1642 info_copy, _ = self.pre_process(info_copy)
1643 self._fill_common_fields(info_copy, False)
1644 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1645 self._raise_pending_errors(info_copy)
1646 if self.params.get('force_write_download_archive', False):
1647 self.record_download_archive(info_copy)
1648 return ie_result
1649
1650 if result_type == 'video':
1651 self.add_extra_info(ie_result, extra_info)
1652 ie_result = self.process_video_result(ie_result, download=download)
1653 self._raise_pending_errors(ie_result)
1654 additional_urls = (ie_result or {}).get('additional_urls')
1655 if additional_urls:
1656 # TODO: Improve MetadataParserPP to allow setting a list
1657 if isinstance(additional_urls, str):
1658 additional_urls = [additional_urls]
1659 self.to_screen(
1660 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1661 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1662 ie_result['additional_entries'] = [
1663 self.extract_info(
1664 url, download, extra_info=extra_info,
1665 force_generic_extractor=self.params.get('force_generic_extractor'))
1666 for url in additional_urls
1667 ]
1668 return ie_result
1669 elif result_type == 'url':
1670 # We have to add extra_info to the results because it may be
1671 # contained in a playlist
1672 return self.extract_info(
1673 ie_result['url'], download,
1674 ie_key=ie_result.get('ie_key'),
1675 extra_info=extra_info)
1676 elif result_type == 'url_transparent':
1677 # Use the information from the embedding page
1678 info = self.extract_info(
1679 ie_result['url'], ie_key=ie_result.get('ie_key'),
1680 extra_info=extra_info, download=False, process=False)
1681
1682 # extract_info may return None when ignoreerrors is enabled and
1683 # extraction failed with an error, don't crash and return early
1684 # in this case
1685 if not info:
1686 return info
1687
1688 exempted_fields = {'_type', 'url', 'ie_key'}
1689 if not ie_result.get('section_end') and ie_result.get('section_start') is None:
1690 # For video clips, the id etc of the clip extractor should be used
1691 exempted_fields |= {'id', 'extractor', 'extractor_key'}
1692
1693 new_result = info.copy()
1694 new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields))
1695
1696 # Extracted info may not be a video result (i.e.
1697 # info.get('_type', 'video') != video) but rather an url or
1698 # url_transparent. In such cases outer metadata (from ie_result)
1699 # should be propagated to inner one (info). For this to happen
1700 # _type of info should be overridden with url_transparent. This
1701 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1702 if new_result.get('_type') == 'url':
1703 new_result['_type'] = 'url_transparent'
1704
1705 return self.process_ie_result(
1706 new_result, download=download, extra_info=extra_info)
1707 elif result_type in ('playlist', 'multi_video'):
1708 # Protect from infinite recursion due to recursively nested playlists
1709 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1710 webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url
1711 if webpage_url and webpage_url in self._playlist_urls:
1712 self.to_screen(
1713 '[download] Skipping already downloaded playlist: %s'
1714 % ie_result.get('title') or ie_result.get('id'))
1715 return
1716
1717 self._playlist_level += 1
1718 self._playlist_urls.add(webpage_url)
1719 self._fill_common_fields(ie_result, False)
1720 self._sanitize_thumbnails(ie_result)
1721 try:
1722 return self.__process_playlist(ie_result, download)
1723 finally:
1724 self._playlist_level -= 1
1725 if not self._playlist_level:
1726 self._playlist_urls.clear()
1727 elif result_type == 'compat_list':
1728 self.report_warning(
1729 'Extractor %s returned a compat_list result. '
1730 'It needs to be updated.' % ie_result.get('extractor'))
1731
1732 def _fixup(r):
1733 self.add_extra_info(r, {
1734 'extractor': ie_result['extractor'],
1735 'webpage_url': ie_result['webpage_url'],
1736 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1737 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1738 'extractor_key': ie_result['extractor_key'],
1739 })
1740 return r
1741 ie_result['entries'] = [
1742 self.process_ie_result(_fixup(r), download, extra_info)
1743 for r in ie_result['entries']
1744 ]
1745 return ie_result
1746 else:
1747 raise Exception('Invalid result type: %s' % result_type)
1748
1749 def _ensure_dir_exists(self, path):
1750 return make_dir(path, self.report_error)
1751
1752 @staticmethod
1753 def _playlist_infodict(ie_result, strict=False, **kwargs):
1754 info = {
1755 'playlist_count': ie_result.get('playlist_count'),
1756 'playlist': ie_result.get('title') or ie_result.get('id'),
1757 'playlist_id': ie_result.get('id'),
1758 'playlist_title': ie_result.get('title'),
1759 'playlist_uploader': ie_result.get('uploader'),
1760 'playlist_uploader_id': ie_result.get('uploader_id'),
1761 **kwargs,
1762 }
1763 if strict:
1764 return info
1765 if ie_result.get('webpage_url'):
1766 info.update({
1767 'webpage_url': ie_result['webpage_url'],
1768 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1769 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1770 })
1771 return {
1772 **info,
1773 'playlist_index': 0,
1774 '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
1775 'extractor': ie_result['extractor'],
1776 'extractor_key': ie_result['extractor_key'],
1777 }
1778
1779 def __process_playlist(self, ie_result, download):
1780 """Process each entry in the playlist"""
1781 assert ie_result['_type'] in ('playlist', 'multi_video')
1782
1783 common_info = self._playlist_infodict(ie_result, strict=True)
1784 title = common_info.get('playlist') or '<Untitled>'
1785 if self._match_entry(common_info, incomplete=True) is not None:
1786 return
1787 self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}')
1788
1789 all_entries = PlaylistEntries(self, ie_result)
1790 entries = orderedSet(all_entries.get_requested_items(), lazy=True)
1791
1792 lazy = self.params.get('lazy_playlist')
1793 if lazy:
1794 resolved_entries, n_entries = [], 'N/A'
1795 ie_result['requested_entries'], ie_result['entries'] = None, None
1796 else:
1797 entries = resolved_entries = list(entries)
1798 n_entries = len(resolved_entries)
1799 ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
1800 if not ie_result.get('playlist_count'):
1801 # Better to do this after potentially exhausting entries
1802 ie_result['playlist_count'] = all_entries.get_full_count()
1803
1804 extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
1805 ie_copy = collections.ChainMap(ie_result, extra)
1806
1807 _infojson_written = False
1808 write_playlist_files = self.params.get('allow_playlist_files', True)
1809 if write_playlist_files and self.params.get('list_thumbnails'):
1810 self.list_thumbnails(ie_result)
1811 if write_playlist_files and not self.params.get('simulate'):
1812 _infojson_written = self._write_info_json(
1813 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1814 if _infojson_written is None:
1815 return
1816 if self._write_description('playlist', ie_result,
1817 self.prepare_filename(ie_copy, 'pl_description')) is None:
1818 return
1819 # TODO: This should be passed to ThumbnailsConvertor if necessary
1820 self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1821
1822 if lazy:
1823 if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
1824 self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
1825 elif self.params.get('playlistreverse'):
1826 entries.reverse()
1827 elif self.params.get('playlistrandom'):
1828 random.shuffle(entries)
1829
1830 self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items'
1831 f'{format_field(ie_result, "playlist_count", " of %s")}')
1832
1833 keep_resolved_entries = self.params.get('extract_flat') != 'discard'
1834 if self.params.get('extract_flat') == 'discard_in_playlist':
1835 keep_resolved_entries = ie_result['_type'] != 'playlist'
1836 if keep_resolved_entries:
1837 self.write_debug('The information of all playlist entries will be held in memory')
1838
1839 failures = 0
1840 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1841 for i, (playlist_index, entry) in enumerate(entries):
1842 if lazy:
1843 resolved_entries.append((playlist_index, entry))
1844 if not entry:
1845 continue
1846
1847 entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
1848 if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
1849 playlist_index = ie_result['requested_entries'][i]
1850
1851 entry_copy = collections.ChainMap(entry, {
1852 **common_info,
1853 'n_entries': int_or_none(n_entries),
1854 'playlist_index': playlist_index,
1855 'playlist_autonumber': i + 1,
1856 })
1857
1858 if self._match_entry(entry_copy, incomplete=True) is not None:
1859 # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369
1860 resolved_entries[i] = (playlist_index, NO_DEFAULT)
1861 continue
1862
1863 self.to_screen('[download] Downloading item %s of %s' % (
1864 self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1865
1866 entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({
1867 'playlist_index': playlist_index,
1868 'playlist_autonumber': i + 1,
1869 }, extra))
1870 if not entry_result:
1871 failures += 1
1872 if failures >= max_failures:
1873 self.report_error(
1874 f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
1875 break
1876 if keep_resolved_entries:
1877 resolved_entries[i] = (playlist_index, entry_result)
1878
1879 # Update with processed data
1880 ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT]
1881 ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT]
1882 if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))):
1883 # Do not set for full playlist
1884 ie_result.pop('requested_entries')
1885
1886 # Write the updated info to json
1887 if _infojson_written is True and self._write_info_json(
1888 'updated playlist', ie_result,
1889 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1890 return
1891
1892 ie_result = self.run_all_pps('playlist', ie_result)
1893 self.to_screen(f'[download] Finished downloading playlist: {title}')
1894 return ie_result
1895
1896 @_handle_extraction_exceptions
1897 def __process_iterable_entry(self, entry, download, extra_info):
1898 return self.process_ie_result(
1899 entry, download=download, extra_info=extra_info)
1900
1901 def _build_format_filter(self, filter_spec):
1902 " Returns a function to filter the formats according to the filter_spec "
1903
1904 OPERATORS = {
1905 '<': operator.lt,
1906 '<=': operator.le,
1907 '>': operator.gt,
1908 '>=': operator.ge,
1909 '=': operator.eq,
1910 '!=': operator.ne,
1911 }
1912 operator_rex = re.compile(r'''(?x)\s*
1913 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1914 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1915 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1916 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1917 m = operator_rex.fullmatch(filter_spec)
1918 if m:
1919 try:
1920 comparison_value = int(m.group('value'))
1921 except ValueError:
1922 comparison_value = parse_filesize(m.group('value'))
1923 if comparison_value is None:
1924 comparison_value = parse_filesize(m.group('value') + 'B')
1925 if comparison_value is None:
1926 raise ValueError(
1927 'Invalid value %r in format specification %r' % (
1928 m.group('value'), filter_spec))
1929 op = OPERATORS[m.group('op')]
1930
1931 if not m:
1932 STR_OPERATORS = {
1933 '=': operator.eq,
1934 '^=': lambda attr, value: attr.startswith(value),
1935 '$=': lambda attr, value: attr.endswith(value),
1936 '*=': lambda attr, value: value in attr,
1937 '~=': lambda attr, value: value.search(attr) is not None
1938 }
1939 str_operator_rex = re.compile(r'''(?x)\s*
1940 (?P<key>[a-zA-Z0-9._-]+)\s*
1941 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1942 (?P<quote>["'])?
1943 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1944 (?(quote)(?P=quote))\s*
1945 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1946 m = str_operator_rex.fullmatch(filter_spec)
1947 if m:
1948 if m.group('op') == '~=':
1949 comparison_value = re.compile(m.group('value'))
1950 else:
1951 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1952 str_op = STR_OPERATORS[m.group('op')]
1953 if m.group('negation'):
1954 op = lambda attr, value: not str_op(attr, value)
1955 else:
1956 op = str_op
1957
1958 if not m:
1959 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1960
1961 def _filter(f):
1962 actual_value = f.get(m.group('key'))
1963 if actual_value is None:
1964 return m.group('none_inclusive')
1965 return op(actual_value, comparison_value)
1966 return _filter
1967
1968 def _check_formats(self, formats):
1969 for f in formats:
1970 self.to_screen('[info] Testing format %s' % f['format_id'])
1971 path = self.get_output_path('temp')
1972 if not self._ensure_dir_exists(f'{path}/'):
1973 continue
1974 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1975 temp_file.close()
1976 try:
1977 success, _ = self.dl(temp_file.name, f, test=True)
1978 except (DownloadError, OSError, ValueError) + network_exceptions:
1979 success = False
1980 finally:
1981 if os.path.exists(temp_file.name):
1982 try:
1983 os.remove(temp_file.name)
1984 except OSError:
1985 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1986 if success:
1987 yield f
1988 else:
1989 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1990
1991 def _default_format_spec(self, info_dict, download=True):
1992
1993 def can_merge():
1994 merger = FFmpegMergerPP(self)
1995 return merger.available and merger.can_merge()
1996
1997 prefer_best = (
1998 not self.params.get('simulate')
1999 and download
2000 and (
2001 not can_merge()
2002 or info_dict.get('is_live') and not self.params.get('live_from_start')
2003 or self.params['outtmpl']['default'] == '-'))
2004 compat = (
2005 prefer_best
2006 or self.params.get('allow_multiple_audio_streams', False)
2007 or 'format-spec' in self.params['compat_opts'])
2008
2009 return (
2010 'best/bestvideo+bestaudio' if prefer_best
2011 else 'bestvideo*+bestaudio/best' if not compat
2012 else 'bestvideo+bestaudio/best')
2013
2014 def build_format_selector(self, format_spec):
2015 def syntax_error(note, start):
2016 message = (
2017 'Invalid format specification: '
2018 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
2019 return SyntaxError(message)
2020
2021 PICKFIRST = 'PICKFIRST'
2022 MERGE = 'MERGE'
2023 SINGLE = 'SINGLE'
2024 GROUP = 'GROUP'
2025 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
2026
2027 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
2028 'video': self.params.get('allow_multiple_video_streams', False)}
2029
2030 check_formats = self.params.get('check_formats') == 'selected'
2031
2032 def _parse_filter(tokens):
2033 filter_parts = []
2034 for type, string, start, _, _ in tokens:
2035 if type == tokenize.OP and string == ']':
2036 return ''.join(filter_parts)
2037 else:
2038 filter_parts.append(string)
2039
2040 def _remove_unused_ops(tokens):
2041 # Remove operators that we don't use and join them with the surrounding strings.
2042 # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
2043 ALLOWED_OPS = ('/', '+', ',', '(', ')')
2044 last_string, last_start, last_end, last_line = None, None, None, None
2045 for type, string, start, end, line in tokens:
2046 if type == tokenize.OP and string == '[':
2047 if last_string:
2048 yield tokenize.NAME, last_string, last_start, last_end, last_line
2049 last_string = None
2050 yield type, string, start, end, line
2051 # everything inside brackets will be handled by _parse_filter
2052 for type, string, start, end, line in tokens:
2053 yield type, string, start, end, line
2054 if type == tokenize.OP and string == ']':
2055 break
2056 elif type == tokenize.OP and string in ALLOWED_OPS:
2057 if last_string:
2058 yield tokenize.NAME, last_string, last_start, last_end, last_line
2059 last_string = None
2060 yield type, string, start, end, line
2061 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2062 if not last_string:
2063 last_string = string
2064 last_start = start
2065 last_end = end
2066 else:
2067 last_string += string
2068 if last_string:
2069 yield tokenize.NAME, last_string, last_start, last_end, last_line
2070
2071 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2072 selectors = []
2073 current_selector = None
2074 for type, string, start, _, _ in tokens:
2075 # ENCODING is only defined in python 3.x
2076 if type == getattr(tokenize, 'ENCODING', None):
2077 continue
2078 elif type in [tokenize.NAME, tokenize.NUMBER]:
2079 current_selector = FormatSelector(SINGLE, string, [])
2080 elif type == tokenize.OP:
2081 if string == ')':
2082 if not inside_group:
2083 # ')' will be handled by the parentheses group
2084 tokens.restore_last_token()
2085 break
2086 elif inside_merge and string in ['/', ',']:
2087 tokens.restore_last_token()
2088 break
2089 elif inside_choice and string == ',':
2090 tokens.restore_last_token()
2091 break
2092 elif string == ',':
2093 if not current_selector:
2094 raise syntax_error('"," must follow a format selector', start)
2095 selectors.append(current_selector)
2096 current_selector = None
2097 elif string == '/':
2098 if not current_selector:
2099 raise syntax_error('"/" must follow a format selector', start)
2100 first_choice = current_selector
2101 second_choice = _parse_format_selection(tokens, inside_choice=True)
2102 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2103 elif string == '[':
2104 if not current_selector:
2105 current_selector = FormatSelector(SINGLE, 'best', [])
2106 format_filter = _parse_filter(tokens)
2107 current_selector.filters.append(format_filter)
2108 elif string == '(':
2109 if current_selector:
2110 raise syntax_error('Unexpected "("', start)
2111 group = _parse_format_selection(tokens, inside_group=True)
2112 current_selector = FormatSelector(GROUP, group, [])
2113 elif string == '+':
2114 if not current_selector:
2115 raise syntax_error('Unexpected "+"', start)
2116 selector_1 = current_selector
2117 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2118 if not selector_2:
2119 raise syntax_error('Expected a selector', start)
2120 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2121 else:
2122 raise syntax_error(f'Operator not recognized: "{string}"', start)
2123 elif type == tokenize.ENDMARKER:
2124 break
2125 if current_selector:
2126 selectors.append(current_selector)
2127 return selectors
2128
2129 def _merge(formats_pair):
2130 format_1, format_2 = formats_pair
2131
2132 formats_info = []
2133 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2134 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2135
2136 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2137 get_no_more = {'video': False, 'audio': False}
2138 for (i, fmt_info) in enumerate(formats_info):
2139 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2140 formats_info.pop(i)
2141 continue
2142 for aud_vid in ['audio', 'video']:
2143 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2144 if get_no_more[aud_vid]:
2145 formats_info.pop(i)
2146 break
2147 get_no_more[aud_vid] = True
2148
2149 if len(formats_info) == 1:
2150 return formats_info[0]
2151
2152 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2153 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2154
2155 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2156 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2157
2158 output_ext = get_compatible_ext(
2159 vcodecs=[f.get('vcodec') for f in video_fmts],
2160 acodecs=[f.get('acodec') for f in audio_fmts],
2161 vexts=[f['ext'] for f in video_fmts],
2162 aexts=[f['ext'] for f in audio_fmts],
2163 preferences=(try_call(lambda: self.params['merge_output_format'].split('/'))
2164 or self.params.get('prefer_free_formats') and ('webm', 'mkv')))
2165
2166 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2167
2168 new_dict = {
2169 'requested_formats': formats_info,
2170 'format': '+'.join(filtered('format')),
2171 'format_id': '+'.join(filtered('format_id')),
2172 'ext': output_ext,
2173 'protocol': '+'.join(map(determine_protocol, formats_info)),
2174 'language': '+'.join(orderedSet(filtered('language'))) or None,
2175 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2176 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2177 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2178 }
2179
2180 if the_only_video:
2181 new_dict.update({
2182 'width': the_only_video.get('width'),
2183 'height': the_only_video.get('height'),
2184 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2185 'fps': the_only_video.get('fps'),
2186 'dynamic_range': the_only_video.get('dynamic_range'),
2187 'vcodec': the_only_video.get('vcodec'),
2188 'vbr': the_only_video.get('vbr'),
2189 'stretched_ratio': the_only_video.get('stretched_ratio'),
2190 'aspect_ratio': the_only_video.get('aspect_ratio'),
2191 })
2192
2193 if the_only_audio:
2194 new_dict.update({
2195 'acodec': the_only_audio.get('acodec'),
2196 'abr': the_only_audio.get('abr'),
2197 'asr': the_only_audio.get('asr'),
2198 'audio_channels': the_only_audio.get('audio_channels')
2199 })
2200
2201 return new_dict
2202
2203 def _check_formats(formats):
2204 if not check_formats:
2205 yield from formats
2206 return
2207 yield from self._check_formats(formats)
2208
2209 def _build_selector_function(selector):
2210 if isinstance(selector, list): # ,
2211 fs = [_build_selector_function(s) for s in selector]
2212
2213 def selector_function(ctx):
2214 for f in fs:
2215 yield from f(ctx)
2216 return selector_function
2217
2218 elif selector.type == GROUP: # ()
2219 selector_function = _build_selector_function(selector.selector)
2220
2221 elif selector.type == PICKFIRST: # /
2222 fs = [_build_selector_function(s) for s in selector.selector]
2223
2224 def selector_function(ctx):
2225 for f in fs:
2226 picked_formats = list(f(ctx))
2227 if picked_formats:
2228 return picked_formats
2229 return []
2230
2231 elif selector.type == MERGE: # +
2232 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2233
2234 def selector_function(ctx):
2235 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2236 yield _merge(pair)
2237
2238 elif selector.type == SINGLE: # atom
2239 format_spec = selector.selector or 'best'
2240
2241 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2242 if format_spec == 'all':
2243 def selector_function(ctx):
2244 yield from _check_formats(ctx['formats'][::-1])
2245 elif format_spec == 'mergeall':
2246 def selector_function(ctx):
2247 formats = list(_check_formats(
2248 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2249 if not formats:
2250 return
2251 merged_format = formats[-1]
2252 for f in formats[-2::-1]:
2253 merged_format = _merge((merged_format, f))
2254 yield merged_format
2255
2256 else:
2257 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2258 mobj = re.match(
2259 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2260 format_spec)
2261 if mobj is not None:
2262 format_idx = int_or_none(mobj.group('n'), default=1)
2263 format_reverse = mobj.group('bw')[0] == 'b'
2264 format_type = (mobj.group('type') or [None])[0]
2265 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2266 format_modified = mobj.group('mod') is not None
2267
2268 format_fallback = not format_type and not format_modified # for b, w
2269 _filter_f = (
2270 (lambda f: f.get('%scodec' % format_type) != 'none')
2271 if format_type and format_modified # bv*, ba*, wv*, wa*
2272 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2273 if format_type # bv, ba, wv, wa
2274 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2275 if not format_modified # b, w
2276 else lambda f: True) # b*, w*
2277 filter_f = lambda f: _filter_f(f) and (
2278 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2279 else:
2280 if format_spec in self._format_selection_exts['audio']:
2281 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2282 elif format_spec in self._format_selection_exts['video']:
2283 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2284 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2285 elif format_spec in self._format_selection_exts['storyboards']:
2286 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2287 else:
2288 filter_f = lambda f: f.get('format_id') == format_spec # id
2289
2290 def selector_function(ctx):
2291 formats = list(ctx['formats'])
2292 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2293 if not matches:
2294 if format_fallback and ctx['incomplete_formats']:
2295 # for extractors with incomplete formats (audio only (soundcloud)
2296 # or video only (imgur)) best/worst will fallback to
2297 # best/worst {video,audio}-only format
2298 matches = formats
2299 elif seperate_fallback and not ctx['has_merged_format']:
2300 # for compatibility with youtube-dl when there is no pre-merged format
2301 matches = list(filter(seperate_fallback, formats))
2302 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2303 try:
2304 yield matches[format_idx - 1]
2305 except LazyList.IndexError:
2306 return
2307
2308 filters = [self._build_format_filter(f) for f in selector.filters]
2309
2310 def final_selector(ctx):
2311 ctx_copy = dict(ctx)
2312 for _filter in filters:
2313 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2314 return selector_function(ctx_copy)
2315 return final_selector
2316
2317 stream = io.BytesIO(format_spec.encode())
2318 try:
2319 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2320 except tokenize.TokenError:
2321 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2322
2323 class TokenIterator:
2324 def __init__(self, tokens):
2325 self.tokens = tokens
2326 self.counter = 0
2327
2328 def __iter__(self):
2329 return self
2330
2331 def __next__(self):
2332 if self.counter >= len(self.tokens):
2333 raise StopIteration()
2334 value = self.tokens[self.counter]
2335 self.counter += 1
2336 return value
2337
2338 next = __next__
2339
2340 def restore_last_token(self):
2341 self.counter -= 1
2342
2343 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2344 return _build_selector_function(parsed_selector)
2345
2346 def _calc_headers(self, info_dict):
2347 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2348
2349 cookies = self._calc_cookies(info_dict['url'])
2350 if cookies:
2351 res['Cookie'] = cookies
2352
2353 if 'X-Forwarded-For' not in res:
2354 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2355 if x_forwarded_for_ip:
2356 res['X-Forwarded-For'] = x_forwarded_for_ip
2357
2358 return res
2359
2360 def _calc_cookies(self, url):
2361 pr = sanitized_Request(url)
2362 self.cookiejar.add_cookie_header(pr)
2363 return pr.get_header('Cookie')
2364
2365 def _sort_thumbnails(self, thumbnails):
2366 thumbnails.sort(key=lambda t: (
2367 t.get('preference') if t.get('preference') is not None else -1,
2368 t.get('width') if t.get('width') is not None else -1,
2369 t.get('height') if t.get('height') is not None else -1,
2370 t.get('id') if t.get('id') is not None else '',
2371 t.get('url')))
2372
2373 def _sanitize_thumbnails(self, info_dict):
2374 thumbnails = info_dict.get('thumbnails')
2375 if thumbnails is None:
2376 thumbnail = info_dict.get('thumbnail')
2377 if thumbnail:
2378 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2379 if not thumbnails:
2380 return
2381
2382 def check_thumbnails(thumbnails):
2383 for t in thumbnails:
2384 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2385 try:
2386 self.urlopen(HEADRequest(t['url']))
2387 except network_exceptions as err:
2388 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2389 continue
2390 yield t
2391
2392 self._sort_thumbnails(thumbnails)
2393 for i, t in enumerate(thumbnails):
2394 if t.get('id') is None:
2395 t['id'] = '%d' % i
2396 if t.get('width') and t.get('height'):
2397 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2398 t['url'] = sanitize_url(t['url'])
2399
2400 if self.params.get('check_formats') is True:
2401 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2402 else:
2403 info_dict['thumbnails'] = thumbnails
2404
2405 def _fill_common_fields(self, info_dict, final=True):
2406 # TODO: move sanitization here
2407 if final:
2408 title = info_dict.get('title', NO_DEFAULT)
2409 if title is NO_DEFAULT:
2410 raise ExtractorError('Missing "title" field in extractor result',
2411 video_id=info_dict['id'], ie=info_dict['extractor'])
2412 info_dict['fulltitle'] = title
2413 if not title:
2414 if title == '':
2415 self.write_debug('Extractor gave empty title. Creating a generic title')
2416 else:
2417 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2418 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2419
2420 if info_dict.get('duration') is not None:
2421 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2422
2423 for ts_key, date_key in (
2424 ('timestamp', 'upload_date'),
2425 ('release_timestamp', 'release_date'),
2426 ('modified_timestamp', 'modified_date'),
2427 ):
2428 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2429 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2430 # see http://bugs.python.org/issue1646728)
2431 with contextlib.suppress(ValueError, OverflowError, OSError):
2432 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2433 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2434
2435 live_keys = ('is_live', 'was_live')
2436 live_status = info_dict.get('live_status')
2437 if live_status is None:
2438 for key in live_keys:
2439 if info_dict.get(key) is False:
2440 continue
2441 if info_dict.get(key):
2442 live_status = key
2443 break
2444 if all(info_dict.get(key) is False for key in live_keys):
2445 live_status = 'not_live'
2446 if live_status:
2447 info_dict['live_status'] = live_status
2448 for key in live_keys:
2449 if info_dict.get(key) is None:
2450 info_dict[key] = (live_status == key)
2451 if live_status == 'post_live':
2452 info_dict['was_live'] = True
2453
2454 # Auto generate title fields corresponding to the *_number fields when missing
2455 # in order to always have clean titles. This is very common for TV series.
2456 for field in ('chapter', 'season', 'episode'):
2457 if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2458 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2459
2460 def _raise_pending_errors(self, info):
2461 err = info.pop('__pending_error', None)
2462 if err:
2463 self.report_error(err, tb=False)
2464
2465 def sort_formats(self, info_dict):
2466 formats = self._get_formats(info_dict)
2467 if not formats:
2468 return
2469 # Backward compatibility with InfoExtractor._sort_formats
2470 field_preference = formats[0].pop('__sort_fields', None)
2471 if field_preference:
2472 info_dict['_format_sort_fields'] = field_preference
2473
2474 formats.sort(key=FormatSorter(
2475 self, info_dict.get('_format_sort_fields', [])).calculate_preference)
2476
2477 def process_video_result(self, info_dict, download=True):
2478 assert info_dict.get('_type', 'video') == 'video'
2479 self._num_videos += 1
2480
2481 if 'id' not in info_dict:
2482 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2483 elif not info_dict.get('id'):
2484 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2485
2486 def report_force_conversion(field, field_not, conversion):
2487 self.report_warning(
2488 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2489 % (field, field_not, conversion))
2490
2491 def sanitize_string_field(info, string_field):
2492 field = info.get(string_field)
2493 if field is None or isinstance(field, str):
2494 return
2495 report_force_conversion(string_field, 'a string', 'string')
2496 info[string_field] = str(field)
2497
2498 def sanitize_numeric_fields(info):
2499 for numeric_field in self._NUMERIC_FIELDS:
2500 field = info.get(numeric_field)
2501 if field is None or isinstance(field, (int, float)):
2502 continue
2503 report_force_conversion(numeric_field, 'numeric', 'int')
2504 info[numeric_field] = int_or_none(field)
2505
2506 sanitize_string_field(info_dict, 'id')
2507 sanitize_numeric_fields(info_dict)
2508 if info_dict.get('section_end') and info_dict.get('section_start') is not None:
2509 info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3)
2510 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2511 self.report_warning('"duration" field is negative, there is an error in extractor')
2512
2513 chapters = info_dict.get('chapters') or []
2514 if chapters and chapters[0].get('start_time'):
2515 chapters.insert(0, {'start_time': 0})
2516
2517 dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')}
2518 for idx, (prev, current, next_) in enumerate(zip(
2519 (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1):
2520 if current.get('start_time') is None:
2521 current['start_time'] = prev.get('end_time')
2522 if not current.get('end_time'):
2523 current['end_time'] = next_.get('start_time')
2524 if not current.get('title'):
2525 current['title'] = f'<Untitled Chapter {idx}>'
2526
2527 if 'playlist' not in info_dict:
2528 # It isn't part of a playlist
2529 info_dict['playlist'] = None
2530 info_dict['playlist_index'] = None
2531
2532 self._sanitize_thumbnails(info_dict)
2533
2534 thumbnail = info_dict.get('thumbnail')
2535 thumbnails = info_dict.get('thumbnails')
2536 if thumbnail:
2537 info_dict['thumbnail'] = sanitize_url(thumbnail)
2538 elif thumbnails:
2539 info_dict['thumbnail'] = thumbnails[-1]['url']
2540
2541 if info_dict.get('display_id') is None and 'id' in info_dict:
2542 info_dict['display_id'] = info_dict['id']
2543
2544 self._fill_common_fields(info_dict)
2545
2546 for cc_kind in ('subtitles', 'automatic_captions'):
2547 cc = info_dict.get(cc_kind)
2548 if cc:
2549 for _, subtitle in cc.items():
2550 for subtitle_format in subtitle:
2551 if subtitle_format.get('url'):
2552 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2553 if subtitle_format.get('ext') is None:
2554 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2555
2556 automatic_captions = info_dict.get('automatic_captions')
2557 subtitles = info_dict.get('subtitles')
2558
2559 info_dict['requested_subtitles'] = self.process_subtitles(
2560 info_dict['id'], subtitles, automatic_captions)
2561
2562 self.sort_formats(info_dict)
2563 formats = self._get_formats(info_dict)
2564
2565 # or None ensures --clean-infojson removes it
2566 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2567 if not self.params.get('allow_unplayable_formats'):
2568 formats = [f for f in formats if not f.get('has_drm')]
2569
2570 if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2571 self.report_warning(
2572 f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}'
2573 'only images are available for download. Use --list-formats to see them'.capitalize())
2574
2575 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2576 if not get_from_start:
2577 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2578 if info_dict.get('is_live') and formats:
2579 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2580 if get_from_start and not formats:
2581 self.raise_no_formats(info_dict, msg=(
2582 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2583 'If you want to download from the current time, use --no-live-from-start'))
2584
2585 def is_wellformed(f):
2586 url = f.get('url')
2587 if not url:
2588 self.report_warning(
2589 '"url" field is missing or empty - skipping format, '
2590 'there is an error in extractor')
2591 return False
2592 if isinstance(url, bytes):
2593 sanitize_string_field(f, 'url')
2594 return True
2595
2596 # Filter out malformed formats for better extraction robustness
2597 formats = list(filter(is_wellformed, formats or []))
2598
2599 if not formats:
2600 self.raise_no_formats(info_dict)
2601
2602 formats_dict = {}
2603
2604 # We check that all the formats have the format and format_id fields
2605 for i, format in enumerate(formats):
2606 sanitize_string_field(format, 'format_id')
2607 sanitize_numeric_fields(format)
2608 format['url'] = sanitize_url(format['url'])
2609 if not format.get('format_id'):
2610 format['format_id'] = str(i)
2611 else:
2612 # Sanitize format_id from characters used in format selector expression
2613 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2614 format_id = format['format_id']
2615 if format_id not in formats_dict:
2616 formats_dict[format_id] = []
2617 formats_dict[format_id].append(format)
2618
2619 # Make sure all formats have unique format_id
2620 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2621 for format_id, ambiguous_formats in formats_dict.items():
2622 ambigious_id = len(ambiguous_formats) > 1
2623 for i, format in enumerate(ambiguous_formats):
2624 if ambigious_id:
2625 format['format_id'] = '%s-%d' % (format_id, i)
2626 if format.get('ext') is None:
2627 format['ext'] = determine_ext(format['url']).lower()
2628 # Ensure there is no conflict between id and ext in format selection
2629 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2630 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2631 format['format_id'] = 'f%s' % format['format_id']
2632
2633 for i, format in enumerate(formats):
2634 if format.get('format') is None:
2635 format['format'] = '{id} - {res}{note}'.format(
2636 id=format['format_id'],
2637 res=self.format_resolution(format),
2638 note=format_field(format, 'format_note', ' (%s)'),
2639 )
2640 if format.get('protocol') is None:
2641 format['protocol'] = determine_protocol(format)
2642 if format.get('resolution') is None:
2643 format['resolution'] = self.format_resolution(format, default=None)
2644 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2645 format['dynamic_range'] = 'SDR'
2646 if format.get('aspect_ratio') is None:
2647 format['aspect_ratio'] = try_call(lambda: round(format['width'] / format['height'], 2))
2648 if (info_dict.get('duration') and format.get('tbr')
2649 and not format.get('filesize') and not format.get('filesize_approx')):
2650 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2651
2652 # Add HTTP headers, so that external programs can use them from the
2653 # json output
2654 full_format_info = info_dict.copy()
2655 full_format_info.update(format)
2656 format['http_headers'] = self._calc_headers(full_format_info)
2657 # Remove private housekeeping stuff
2658 if '__x_forwarded_for_ip' in info_dict:
2659 del info_dict['__x_forwarded_for_ip']
2660
2661 if self.params.get('check_formats') is True:
2662 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2663
2664 if not formats or formats[0] is not info_dict:
2665 # only set the 'formats' fields if the original info_dict list them
2666 # otherwise we end up with a circular reference, the first (and unique)
2667 # element in the 'formats' field in info_dict is info_dict itself,
2668 # which can't be exported to json
2669 info_dict['formats'] = formats
2670
2671 info_dict, _ = self.pre_process(info_dict)
2672
2673 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2674 return info_dict
2675
2676 self.post_extract(info_dict)
2677 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2678
2679 # The pre-processors may have modified the formats
2680 formats = self._get_formats(info_dict)
2681
2682 list_only = self.params.get('simulate') == 'list_only'
2683 interactive_format_selection = not list_only and self.format_selector == '-'
2684 if self.params.get('list_thumbnails'):
2685 self.list_thumbnails(info_dict)
2686 if self.params.get('listsubtitles'):
2687 if 'automatic_captions' in info_dict:
2688 self.list_subtitles(
2689 info_dict['id'], automatic_captions, 'automatic captions')
2690 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2691 if self.params.get('listformats') or interactive_format_selection:
2692 self.list_formats(info_dict)
2693 if list_only:
2694 # Without this printing, -F --print-json will not work
2695 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2696 return info_dict
2697
2698 format_selector = self.format_selector
2699 if format_selector is None:
2700 req_format = self._default_format_spec(info_dict, download=download)
2701 self.write_debug('Default format spec: %s' % req_format)
2702 format_selector = self.build_format_selector(req_format)
2703
2704 while True:
2705 if interactive_format_selection:
2706 req_format = input(
2707 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2708 try:
2709 format_selector = self.build_format_selector(req_format)
2710 except SyntaxError as err:
2711 self.report_error(err, tb=False, is_error=False)
2712 continue
2713
2714 formats_to_download = list(format_selector({
2715 'formats': formats,
2716 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2717 'incomplete_formats': (
2718 # All formats are video-only or
2719 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2720 # all formats are audio-only
2721 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2722 }))
2723 if interactive_format_selection and not formats_to_download:
2724 self.report_error('Requested format is not available', tb=False, is_error=False)
2725 continue
2726 break
2727
2728 if not formats_to_download:
2729 if not self.params.get('ignore_no_formats_error'):
2730 raise ExtractorError(
2731 'Requested format is not available. Use --list-formats for a list of available formats',
2732 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2733 self.report_warning('Requested format is not available')
2734 # Process what we can, even without any available formats.
2735 formats_to_download = [{}]
2736
2737 requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
2738 best_format, downloaded_formats = formats_to_download[-1], []
2739 if download:
2740 if best_format and requested_ranges:
2741 def to_screen(*msg):
2742 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2743
2744 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2745 (f['format_id'] for f in formats_to_download))
2746 if requested_ranges != ({}, ):
2747 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2748 (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
2749 max_downloads_reached = False
2750
2751 for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
2752 new_info = self._copy_infodict(info_dict)
2753 new_info.update(fmt)
2754 offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
2755 end_time = offset + min(chapter.get('end_time', duration), duration)
2756 if chapter or offset:
2757 new_info.update({
2758 'section_start': offset + chapter.get('start_time', 0),
2759 # duration may not be accurate. So allow deviations <1sec
2760 'section_end': end_time if end_time <= offset + duration + 1 else None,
2761 'section_title': chapter.get('title'),
2762 'section_number': chapter.get('index'),
2763 })
2764 downloaded_formats.append(new_info)
2765 try:
2766 self.process_info(new_info)
2767 except MaxDownloadsReached:
2768 max_downloads_reached = True
2769 self._raise_pending_errors(new_info)
2770 # Remove copied info
2771 for key, val in tuple(new_info.items()):
2772 if info_dict.get(key) == val:
2773 new_info.pop(key)
2774 if max_downloads_reached:
2775 break
2776
2777 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2778 assert write_archive.issubset({True, False, 'ignore'})
2779 if True in write_archive and False not in write_archive:
2780 self.record_download_archive(info_dict)
2781
2782 info_dict['requested_downloads'] = downloaded_formats
2783 info_dict = self.run_all_pps('after_video', info_dict)
2784 if max_downloads_reached:
2785 raise MaxDownloadsReached()
2786
2787 # We update the info dict with the selected best quality format (backwards compatibility)
2788 info_dict.update(best_format)
2789 return info_dict
2790
2791 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2792 """Select the requested subtitles and their format"""
2793 available_subs, normal_sub_langs = {}, []
2794 if normal_subtitles and self.params.get('writesubtitles'):
2795 available_subs.update(normal_subtitles)
2796 normal_sub_langs = tuple(normal_subtitles.keys())
2797 if automatic_captions and self.params.get('writeautomaticsub'):
2798 for lang, cap_info in automatic_captions.items():
2799 if lang not in available_subs:
2800 available_subs[lang] = cap_info
2801
2802 if not available_subs or (
2803 not self.params.get('writesubtitles')
2804 and not self.params.get('writeautomaticsub')):
2805 return None
2806
2807 all_sub_langs = tuple(available_subs.keys())
2808 if self.params.get('allsubtitles', False):
2809 requested_langs = all_sub_langs
2810 elif self.params.get('subtitleslangs', False):
2811 try:
2812 requested_langs = orderedSet_from_options(
2813 self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True)
2814 except re.error as e:
2815 raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}')
2816 elif normal_sub_langs:
2817 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2818 else:
2819 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2820 if requested_langs:
2821 self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}')
2822
2823 formats_query = self.params.get('subtitlesformat', 'best')
2824 formats_preference = formats_query.split('/') if formats_query else []
2825 subs = {}
2826 for lang in requested_langs:
2827 formats = available_subs.get(lang)
2828 if formats is None:
2829 self.report_warning(f'{lang} subtitles not available for {video_id}')
2830 continue
2831 for ext in formats_preference:
2832 if ext == 'best':
2833 f = formats[-1]
2834 break
2835 matches = list(filter(lambda f: f['ext'] == ext, formats))
2836 if matches:
2837 f = matches[-1]
2838 break
2839 else:
2840 f = formats[-1]
2841 self.report_warning(
2842 'No subtitle format found matching "%s" for language %s, '
2843 'using %s' % (formats_query, lang, f['ext']))
2844 subs[lang] = f
2845 return subs
2846
2847 def _forceprint(self, key, info_dict):
2848 if info_dict is None:
2849 return
2850 info_copy = info_dict.copy()
2851 info_copy['formats_table'] = self.render_formats_table(info_dict)
2852 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2853 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2854 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2855
2856 def format_tmpl(tmpl):
2857 mobj = re.fullmatch(r'([\w.:,]|-\d|(?P<dict>{([\w.:,]|-\d)+}))+=?', tmpl)
2858 if not mobj:
2859 return tmpl
2860
2861 fmt = '%({})s'
2862 if tmpl.startswith('{'):
2863 tmpl = f'.{tmpl}'
2864 if tmpl.endswith('='):
2865 tmpl, fmt = tmpl[:-1], '{0} = %({0})#j'
2866 return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(',')))
2867
2868 for tmpl in self.params['forceprint'].get(key, []):
2869 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2870
2871 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2872 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2873 tmpl = format_tmpl(tmpl)
2874 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2875 if self._ensure_dir_exists(filename):
2876 with open(filename, 'a', encoding='utf-8') as f:
2877 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2878
2879 def __forced_printings(self, info_dict, filename, incomplete):
2880 def print_mandatory(field, actual_field=None):
2881 if actual_field is None:
2882 actual_field = field
2883 if (self.params.get('force%s' % field, False)
2884 and (not incomplete or info_dict.get(actual_field) is not None)):
2885 self.to_stdout(info_dict[actual_field])
2886
2887 def print_optional(field):
2888 if (self.params.get('force%s' % field, False)
2889 and info_dict.get(field) is not None):
2890 self.to_stdout(info_dict[field])
2891
2892 info_dict = info_dict.copy()
2893 if filename is not None:
2894 info_dict['filename'] = filename
2895 if info_dict.get('requested_formats') is not None:
2896 # For RTMP URLs, also include the playpath
2897 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2898 elif info_dict.get('url'):
2899 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2900
2901 if (self.params.get('forcejson')
2902 or self.params['forceprint'].get('video')
2903 or self.params['print_to_file'].get('video')):
2904 self.post_extract(info_dict)
2905 self._forceprint('video', info_dict)
2906
2907 print_mandatory('title')
2908 print_mandatory('id')
2909 print_mandatory('url', 'urls')
2910 print_optional('thumbnail')
2911 print_optional('description')
2912 print_optional('filename')
2913 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2914 self.to_stdout(formatSeconds(info_dict['duration']))
2915 print_mandatory('format')
2916
2917 if self.params.get('forcejson'):
2918 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2919
2920 def dl(self, name, info, subtitle=False, test=False):
2921 if not info.get('url'):
2922 self.raise_no_formats(info, True)
2923
2924 if test:
2925 verbose = self.params.get('verbose')
2926 params = {
2927 'test': True,
2928 'quiet': self.params.get('quiet') or not verbose,
2929 'verbose': verbose,
2930 'noprogress': not verbose,
2931 'nopart': True,
2932 'skip_unavailable_fragments': False,
2933 'keep_fragments': False,
2934 'overwrites': True,
2935 '_no_ytdl_file': True,
2936 }
2937 else:
2938 params = self.params
2939 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2940 if not test:
2941 for ph in self._progress_hooks:
2942 fd.add_progress_hook(ph)
2943 urls = '", "'.join(
2944 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2945 for f in info.get('requested_formats', []) or [info])
2946 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2947
2948 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2949 # But it may contain objects that are not deep-copyable
2950 new_info = self._copy_infodict(info)
2951 if new_info.get('http_headers') is None:
2952 new_info['http_headers'] = self._calc_headers(new_info)
2953 return fd.download(name, new_info, subtitle)
2954
2955 def existing_file(self, filepaths, *, default_overwrite=True):
2956 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2957 if existing_files and not self.params.get('overwrites', default_overwrite):
2958 return existing_files[0]
2959
2960 for file in existing_files:
2961 self.report_file_delete(file)
2962 os.remove(file)
2963 return None
2964
2965 def process_info(self, info_dict):
2966 """Process a single resolved IE result. (Modifies it in-place)"""
2967
2968 assert info_dict.get('_type', 'video') == 'video'
2969 original_infodict = info_dict
2970
2971 if 'format' not in info_dict and 'ext' in info_dict:
2972 info_dict['format'] = info_dict['ext']
2973
2974 if self._match_entry(info_dict) is not None:
2975 info_dict['__write_download_archive'] = 'ignore'
2976 return
2977
2978 # Does nothing under normal operation - for backward compatibility of process_info
2979 self.post_extract(info_dict)
2980
2981 def replace_info_dict(new_info):
2982 nonlocal info_dict
2983 if new_info == info_dict:
2984 return
2985 info_dict.clear()
2986 info_dict.update(new_info)
2987
2988 new_info, _ = self.pre_process(info_dict, 'video')
2989 replace_info_dict(new_info)
2990 self._num_downloads += 1
2991
2992 # info_dict['_filename'] needs to be set for backward compatibility
2993 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2994 temp_filename = self.prepare_filename(info_dict, 'temp')
2995 files_to_move = {}
2996
2997 # Forced printings
2998 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2999
3000 def check_max_downloads():
3001 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
3002 raise MaxDownloadsReached()
3003
3004 if self.params.get('simulate'):
3005 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3006 check_max_downloads()
3007 return
3008
3009 if full_filename is None:
3010 return
3011 if not self._ensure_dir_exists(encodeFilename(full_filename)):
3012 return
3013 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
3014 return
3015
3016 if self._write_description('video', info_dict,
3017 self.prepare_filename(info_dict, 'description')) is None:
3018 return
3019
3020 sub_files = self._write_subtitles(info_dict, temp_filename)
3021 if sub_files is None:
3022 return
3023 files_to_move.update(dict(sub_files))
3024
3025 thumb_files = self._write_thumbnails(
3026 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
3027 if thumb_files is None:
3028 return
3029 files_to_move.update(dict(thumb_files))
3030
3031 infofn = self.prepare_filename(info_dict, 'infojson')
3032 _infojson_written = self._write_info_json('video', info_dict, infofn)
3033 if _infojson_written:
3034 info_dict['infojson_filename'] = infofn
3035 # For backward compatibility, even though it was a private field
3036 info_dict['__infojson_filename'] = infofn
3037 elif _infojson_written is None:
3038 return
3039
3040 # Note: Annotations are deprecated
3041 annofn = None
3042 if self.params.get('writeannotations', False):
3043 annofn = self.prepare_filename(info_dict, 'annotation')
3044 if annofn:
3045 if not self._ensure_dir_exists(encodeFilename(annofn)):
3046 return
3047 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
3048 self.to_screen('[info] Video annotations are already present')
3049 elif not info_dict.get('annotations'):
3050 self.report_warning('There are no annotations to write.')
3051 else:
3052 try:
3053 self.to_screen('[info] Writing video annotations to: ' + annofn)
3054 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
3055 annofile.write(info_dict['annotations'])
3056 except (KeyError, TypeError):
3057 self.report_warning('There are no annotations to write.')
3058 except OSError:
3059 self.report_error('Cannot write annotations file: ' + annofn)
3060 return
3061
3062 # Write internet shortcut files
3063 def _write_link_file(link_type):
3064 url = try_get(info_dict['webpage_url'], iri_to_uri)
3065 if not url:
3066 self.report_warning(
3067 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
3068 return True
3069 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
3070 if not self._ensure_dir_exists(encodeFilename(linkfn)):
3071 return False
3072 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3073 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3074 return True
3075 try:
3076 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3077 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3078 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3079 template_vars = {'url': url}
3080 if link_type == 'desktop':
3081 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3082 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3083 except OSError:
3084 self.report_error(f'Cannot write internet shortcut {linkfn}')
3085 return False
3086 return True
3087
3088 write_links = {
3089 'url': self.params.get('writeurllink'),
3090 'webloc': self.params.get('writewebloclink'),
3091 'desktop': self.params.get('writedesktoplink'),
3092 }
3093 if self.params.get('writelink'):
3094 link_type = ('webloc' if sys.platform == 'darwin'
3095 else 'desktop' if sys.platform.startswith('linux')
3096 else 'url')
3097 write_links[link_type] = True
3098
3099 if any(should_write and not _write_link_file(link_type)
3100 for link_type, should_write in write_links.items()):
3101 return
3102
3103 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3104 replace_info_dict(new_info)
3105
3106 if self.params.get('skip_download'):
3107 info_dict['filepath'] = temp_filename
3108 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3109 info_dict['__files_to_move'] = files_to_move
3110 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3111 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3112 else:
3113 # Download
3114 info_dict.setdefault('__postprocessors', [])
3115 try:
3116
3117 def existing_video_file(*filepaths):
3118 ext = info_dict.get('ext')
3119 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3120 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3121 default_overwrite=False)
3122 if file:
3123 info_dict['ext'] = os.path.splitext(file)[1][1:]
3124 return file
3125
3126 fd, success = None, True
3127 if info_dict.get('protocol') or info_dict.get('url'):
3128 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3129 if fd is not FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and (
3130 info_dict.get('section_start') or info_dict.get('section_end')):
3131 msg = ('This format cannot be partially downloaded' if FFmpegFD.available()
3132 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3133 self.report_error(f'{msg}. Aborting')
3134 return
3135
3136 if info_dict.get('requested_formats') is not None:
3137 requested_formats = info_dict['requested_formats']
3138 old_ext = info_dict['ext']
3139 if self.params.get('merge_output_format') is None:
3140 if (info_dict['ext'] == 'webm'
3141 and info_dict.get('thumbnails')
3142 # check with type instead of pp_key, __name__, or isinstance
3143 # since we dont want any custom PPs to trigger this
3144 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3145 info_dict['ext'] = 'mkv'
3146 self.report_warning(
3147 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3148 new_ext = info_dict['ext']
3149
3150 def correct_ext(filename, ext=new_ext):
3151 if filename == '-':
3152 return filename
3153 filename_real_ext = os.path.splitext(filename)[1][1:]
3154 filename_wo_ext = (
3155 os.path.splitext(filename)[0]
3156 if filename_real_ext in (old_ext, new_ext)
3157 else filename)
3158 return f'{filename_wo_ext}.{ext}'
3159
3160 # Ensure filename always has a correct extension for successful merge
3161 full_filename = correct_ext(full_filename)
3162 temp_filename = correct_ext(temp_filename)
3163 dl_filename = existing_video_file(full_filename, temp_filename)
3164 info_dict['__real_download'] = False
3165
3166 merger = FFmpegMergerPP(self)
3167 downloaded = []
3168 if dl_filename is not None:
3169 self.report_file_already_downloaded(dl_filename)
3170 elif fd:
3171 for f in requested_formats if fd != FFmpegFD else []:
3172 f['filepath'] = fname = prepend_extension(
3173 correct_ext(temp_filename, info_dict['ext']),
3174 'f%s' % f['format_id'], info_dict['ext'])
3175 downloaded.append(fname)
3176 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3177 success, real_download = self.dl(temp_filename, info_dict)
3178 info_dict['__real_download'] = real_download
3179 else:
3180 if self.params.get('allow_unplayable_formats'):
3181 self.report_warning(
3182 'You have requested merging of multiple formats '
3183 'while also allowing unplayable formats to be downloaded. '
3184 'The formats won\'t be merged to prevent data corruption.')
3185 elif not merger.available:
3186 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3187 if not self.params.get('ignoreerrors'):
3188 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3189 return
3190 self.report_warning(f'{msg}. The formats won\'t be merged')
3191
3192 if temp_filename == '-':
3193 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3194 else 'but the formats are incompatible for simultaneous download' if merger.available
3195 else 'but ffmpeg is not installed')
3196 self.report_warning(
3197 f'You have requested downloading multiple formats to stdout {reason}. '
3198 'The formats will be streamed one after the other')
3199 fname = temp_filename
3200 for f in requested_formats:
3201 new_info = dict(info_dict)
3202 del new_info['requested_formats']
3203 new_info.update(f)
3204 if temp_filename != '-':
3205 fname = prepend_extension(
3206 correct_ext(temp_filename, new_info['ext']),
3207 'f%s' % f['format_id'], new_info['ext'])
3208 if not self._ensure_dir_exists(fname):
3209 return
3210 f['filepath'] = fname
3211 downloaded.append(fname)
3212 partial_success, real_download = self.dl(fname, new_info)
3213 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3214 success = success and partial_success
3215
3216 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3217 info_dict['__postprocessors'].append(merger)
3218 info_dict['__files_to_merge'] = downloaded
3219 # Even if there were no downloads, it is being merged only now
3220 info_dict['__real_download'] = True
3221 else:
3222 for file in downloaded:
3223 files_to_move[file] = None
3224 else:
3225 # Just a single file
3226 dl_filename = existing_video_file(full_filename, temp_filename)
3227 if dl_filename is None or dl_filename == temp_filename:
3228 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3229 # So we should try to resume the download
3230 success, real_download = self.dl(temp_filename, info_dict)
3231 info_dict['__real_download'] = real_download
3232 else:
3233 self.report_file_already_downloaded(dl_filename)
3234
3235 dl_filename = dl_filename or temp_filename
3236 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3237
3238 except network_exceptions as err:
3239 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3240 return
3241 except OSError as err:
3242 raise UnavailableVideoError(err)
3243 except (ContentTooShortError, ) as err:
3244 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3245 return
3246
3247 self._raise_pending_errors(info_dict)
3248 if success and full_filename != '-':
3249
3250 def fixup():
3251 do_fixup = True
3252 fixup_policy = self.params.get('fixup')
3253 vid = info_dict['id']
3254
3255 if fixup_policy in ('ignore', 'never'):
3256 return
3257 elif fixup_policy == 'warn':
3258 do_fixup = 'warn'
3259 elif fixup_policy != 'force':
3260 assert fixup_policy in ('detect_or_warn', None)
3261 if not info_dict.get('__real_download'):
3262 do_fixup = False
3263
3264 def ffmpeg_fixup(cndn, msg, cls):
3265 if not (do_fixup and cndn):
3266 return
3267 elif do_fixup == 'warn':
3268 self.report_warning(f'{vid}: {msg}')
3269 return
3270 pp = cls(self)
3271 if pp.available:
3272 info_dict['__postprocessors'].append(pp)
3273 else:
3274 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3275
3276 stretched_ratio = info_dict.get('stretched_ratio')
3277 ffmpeg_fixup(stretched_ratio not in (1, None),
3278 f'Non-uniform pixel ratio {stretched_ratio}',
3279 FFmpegFixupStretchedPP)
3280
3281 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3282 downloader = downloader.FD_NAME if downloader else None
3283
3284 ext = info_dict.get('ext')
3285 postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any((
3286 isinstance(pp, FFmpegVideoConvertorPP)
3287 and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None)
3288 ) for pp in self._pps['post_process'])
3289
3290 if not postprocessed_by_ffmpeg:
3291 ffmpeg_fixup(ext == 'm4a' and info_dict.get('container') == 'm4a_dash',
3292 'writing DASH m4a. Only some players support this container',
3293 FFmpegFixupM4aPP)
3294 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3295 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3296 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3297 FFmpegFixupM3u8PP)
3298 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3299 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3300
3301 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3302 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3303
3304 fixup()
3305 try:
3306 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3307 except PostProcessingError as err:
3308 self.report_error('Postprocessing: %s' % str(err))
3309 return
3310 try:
3311 for ph in self._post_hooks:
3312 ph(info_dict['filepath'])
3313 except Exception as err:
3314 self.report_error('post hooks: %s' % str(err))
3315 return
3316 info_dict['__write_download_archive'] = True
3317
3318 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3319 if self.params.get('force_write_download_archive'):
3320 info_dict['__write_download_archive'] = True
3321 check_max_downloads()
3322
3323 def __download_wrapper(self, func):
3324 @functools.wraps(func)
3325 def wrapper(*args, **kwargs):
3326 try:
3327 res = func(*args, **kwargs)
3328 except UnavailableVideoError as e:
3329 self.report_error(e)
3330 except DownloadCancelled as e:
3331 self.to_screen(f'[info] {e}')
3332 if not self.params.get('break_per_url'):
3333 raise
3334 self._num_downloads = 0
3335 else:
3336 if self.params.get('dump_single_json', False):
3337 self.post_extract(res)
3338 self.to_stdout(json.dumps(self.sanitize_info(res)))
3339 return wrapper
3340
3341 def download(self, url_list):
3342 """Download a given list of URLs."""
3343 url_list = variadic(url_list) # Passing a single URL is a common mistake
3344 outtmpl = self.params['outtmpl']['default']
3345 if (len(url_list) > 1
3346 and outtmpl != '-'
3347 and '%' not in outtmpl
3348 and self.params.get('max_downloads') != 1):
3349 raise SameFileError(outtmpl)
3350
3351 for url in url_list:
3352 self.__download_wrapper(self.extract_info)(
3353 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3354
3355 return self._download_retcode
3356
3357 def download_with_info_file(self, info_filename):
3358 with contextlib.closing(fileinput.FileInput(
3359 [info_filename], mode='r',
3360 openhook=fileinput.hook_encoded('utf-8'))) as f:
3361 # FileInput doesn't have a read method, we can't call json.load
3362 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3363 try:
3364 self.__download_wrapper(self.process_ie_result)(info, download=True)
3365 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3366 if not isinstance(e, EntryNotInPlaylist):
3367 self.to_stderr('\r')
3368 webpage_url = info.get('webpage_url')
3369 if webpage_url is not None:
3370 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3371 return self.download([webpage_url])
3372 else:
3373 raise
3374 return self._download_retcode
3375
3376 @staticmethod
3377 def sanitize_info(info_dict, remove_private_keys=False):
3378 ''' Sanitize the infodict for converting to json '''
3379 if info_dict is None:
3380 return info_dict
3381 info_dict.setdefault('epoch', int(time.time()))
3382 info_dict.setdefault('_type', 'video')
3383 info_dict.setdefault('_version', {
3384 'version': __version__,
3385 'current_git_head': current_git_head(),
3386 'release_git_head': RELEASE_GIT_HEAD,
3387 'repository': REPOSITORY,
3388 })
3389
3390 if remove_private_keys:
3391 reject = lambda k, v: v is None or k.startswith('__') or k in {
3392 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3393 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3394 }
3395 else:
3396 reject = lambda k, v: False
3397
3398 def filter_fn(obj):
3399 if isinstance(obj, dict):
3400 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3401 elif isinstance(obj, (list, tuple, set, LazyList)):
3402 return list(map(filter_fn, obj))
3403 elif obj is None or isinstance(obj, (str, int, float, bool)):
3404 return obj
3405 else:
3406 return repr(obj)
3407
3408 return filter_fn(info_dict)
3409
3410 @staticmethod
3411 def filter_requested_info(info_dict, actually_filter=True):
3412 ''' Alias of sanitize_info for backward compatibility '''
3413 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3414
3415 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3416 for filename in set(filter(None, files_to_delete)):
3417 if msg:
3418 self.to_screen(msg % filename)
3419 try:
3420 os.remove(filename)
3421 except OSError:
3422 self.report_warning(f'Unable to delete file {filename}')
3423 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3424 del info['__files_to_move'][filename]
3425
3426 @staticmethod
3427 def post_extract(info_dict):
3428 def actual_post_extract(info_dict):
3429 if info_dict.get('_type') in ('playlist', 'multi_video'):
3430 for video_dict in info_dict.get('entries', {}):
3431 actual_post_extract(video_dict or {})
3432 return
3433
3434 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3435 info_dict.update(post_extractor())
3436
3437 actual_post_extract(info_dict or {})
3438
3439 def run_pp(self, pp, infodict):
3440 files_to_delete = []
3441 if '__files_to_move' not in infodict:
3442 infodict['__files_to_move'] = {}
3443 try:
3444 files_to_delete, infodict = pp.run(infodict)
3445 except PostProcessingError as e:
3446 # Must be True and not 'only_download'
3447 if self.params.get('ignoreerrors') is True:
3448 self.report_error(e)
3449 return infodict
3450 raise
3451
3452 if not files_to_delete:
3453 return infodict
3454 if self.params.get('keepvideo', False):
3455 for f in files_to_delete:
3456 infodict['__files_to_move'].setdefault(f, '')
3457 else:
3458 self._delete_downloaded_files(
3459 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3460 return infodict
3461
3462 def run_all_pps(self, key, info, *, additional_pps=None):
3463 if key != 'video':
3464 self._forceprint(key, info)
3465 for pp in (additional_pps or []) + self._pps[key]:
3466 info = self.run_pp(pp, info)
3467 return info
3468
3469 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3470 info = dict(ie_info)
3471 info['__files_to_move'] = files_to_move or {}
3472 try:
3473 info = self.run_all_pps(key, info)
3474 except PostProcessingError as err:
3475 msg = f'Preprocessing: {err}'
3476 info.setdefault('__pending_error', msg)
3477 self.report_error(msg, is_error=False)
3478 return info, info.pop('__files_to_move', None)
3479
3480 def post_process(self, filename, info, files_to_move=None):
3481 """Run all the postprocessors on the given file."""
3482 info['filepath'] = filename
3483 info['__files_to_move'] = files_to_move or {}
3484 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3485 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3486 del info['__files_to_move']
3487 return self.run_all_pps('after_move', info)
3488
3489 def _make_archive_id(self, info_dict):
3490 video_id = info_dict.get('id')
3491 if not video_id:
3492 return
3493 # Future-proof against any change in case
3494 # and backwards compatibility with prior versions
3495 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3496 if extractor is None:
3497 url = str_or_none(info_dict.get('url'))
3498 if not url:
3499 return
3500 # Try to find matching extractor for the URL and take its ie_key
3501 for ie_key, ie in self._ies.items():
3502 if ie.suitable(url):
3503 extractor = ie_key
3504 break
3505 else:
3506 return
3507 return make_archive_id(extractor, video_id)
3508
3509 def in_download_archive(self, info_dict):
3510 if not self.archive:
3511 return False
3512
3513 vid_ids = [self._make_archive_id(info_dict)]
3514 vid_ids.extend(info_dict.get('_old_archive_ids') or [])
3515 return any(id_ in self.archive for id_ in vid_ids)
3516
3517 def record_download_archive(self, info_dict):
3518 fn = self.params.get('download_archive')
3519 if fn is None:
3520 return
3521 vid_id = self._make_archive_id(info_dict)
3522 assert vid_id
3523
3524 self.write_debug(f'Adding to archive: {vid_id}')
3525 if is_path_like(fn):
3526 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3527 archive_file.write(vid_id + '\n')
3528 self.archive.add(vid_id)
3529
3530 @staticmethod
3531 def format_resolution(format, default='unknown'):
3532 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3533 return 'audio only'
3534 if format.get('resolution') is not None:
3535 return format['resolution']
3536 if format.get('width') and format.get('height'):
3537 return '%dx%d' % (format['width'], format['height'])
3538 elif format.get('height'):
3539 return '%sp' % format['height']
3540 elif format.get('width'):
3541 return '%dx?' % format['width']
3542 return default
3543
3544 def _list_format_headers(self, *headers):
3545 if self.params.get('listformats_table', True) is not False:
3546 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3547 return headers
3548
3549 def _format_note(self, fdict):
3550 res = ''
3551 if fdict.get('ext') in ['f4f', 'f4m']:
3552 res += '(unsupported)'
3553 if fdict.get('language'):
3554 if res:
3555 res += ' '
3556 res += '[%s]' % fdict['language']
3557 if fdict.get('format_note') is not None:
3558 if res:
3559 res += ' '
3560 res += fdict['format_note']
3561 if fdict.get('tbr') is not None:
3562 if res:
3563 res += ', '
3564 res += '%4dk' % fdict['tbr']
3565 if fdict.get('container') is not None:
3566 if res:
3567 res += ', '
3568 res += '%s container' % fdict['container']
3569 if (fdict.get('vcodec') is not None
3570 and fdict.get('vcodec') != 'none'):
3571 if res:
3572 res += ', '
3573 res += fdict['vcodec']
3574 if fdict.get('vbr') is not None:
3575 res += '@'
3576 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3577 res += 'video@'
3578 if fdict.get('vbr') is not None:
3579 res += '%4dk' % fdict['vbr']
3580 if fdict.get('fps') is not None:
3581 if res:
3582 res += ', '
3583 res += '%sfps' % fdict['fps']
3584 if fdict.get('acodec') is not None:
3585 if res:
3586 res += ', '
3587 if fdict['acodec'] == 'none':
3588 res += 'video only'
3589 else:
3590 res += '%-5s' % fdict['acodec']
3591 elif fdict.get('abr') is not None:
3592 if res:
3593 res += ', '
3594 res += 'audio'
3595 if fdict.get('abr') is not None:
3596 res += '@%3dk' % fdict['abr']
3597 if fdict.get('asr') is not None:
3598 res += ' (%5dHz)' % fdict['asr']
3599 if fdict.get('filesize') is not None:
3600 if res:
3601 res += ', '
3602 res += format_bytes(fdict['filesize'])
3603 elif fdict.get('filesize_approx') is not None:
3604 if res:
3605 res += ', '
3606 res += '~' + format_bytes(fdict['filesize_approx'])
3607 return res
3608
3609 def _get_formats(self, info_dict):
3610 if info_dict.get('formats') is None:
3611 if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
3612 return [info_dict]
3613 return []
3614 return info_dict['formats']
3615
3616 def render_formats_table(self, info_dict):
3617 formats = self._get_formats(info_dict)
3618 if not formats:
3619 return
3620 if not self.params.get('listformats_table', True) is not False:
3621 table = [
3622 [
3623 format_field(f, 'format_id'),
3624 format_field(f, 'ext'),
3625 self.format_resolution(f),
3626 self._format_note(f)
3627 ] for f in formats if (f.get('preference') or 0) >= -1000]
3628 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3629
3630 def simplified_codec(f, field):
3631 assert field in ('acodec', 'vcodec')
3632 codec = f.get(field, 'unknown')
3633 if not codec:
3634 return 'unknown'
3635 elif codec != 'none':
3636 return '.'.join(codec.split('.')[:4])
3637
3638 if field == 'vcodec' and f.get('acodec') == 'none':
3639 return 'images'
3640 elif field == 'acodec' and f.get('vcodec') == 'none':
3641 return ''
3642 return self._format_out('audio only' if field == 'vcodec' else 'video only',
3643 self.Styles.SUPPRESS)
3644
3645 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3646 table = [
3647 [
3648 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3649 format_field(f, 'ext'),
3650 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3651 format_field(f, 'fps', '\t%d', func=round),
3652 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3653 format_field(f, 'audio_channels', '\t%s'),
3654 delim,
3655 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3656 format_field(f, 'tbr', '\t%dk', func=round),
3657 shorten_protocol_name(f.get('protocol', '')),
3658 delim,
3659 simplified_codec(f, 'vcodec'),
3660 format_field(f, 'vbr', '\t%dk', func=round),
3661 simplified_codec(f, 'acodec'),
3662 format_field(f, 'abr', '\t%dk', func=round),
3663 format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
3664 join_nonempty(
3665 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3666 format_field(f, 'language', '[%s]'),
3667 join_nonempty(format_field(f, 'format_note'),
3668 format_field(f, 'container', ignore=(None, f.get('ext'))),
3669 delim=', '),
3670 delim=' '),
3671 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3672 header_line = self._list_format_headers(
3673 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3674 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3675
3676 return render_table(
3677 header_line, table, hide_empty=True,
3678 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3679
3680 def render_thumbnails_table(self, info_dict):
3681 thumbnails = list(info_dict.get('thumbnails') or [])
3682 if not thumbnails:
3683 return None
3684 return render_table(
3685 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3686 [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
3687
3688 def render_subtitles_table(self, video_id, subtitles):
3689 def _row(lang, formats):
3690 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3691 if len(set(names)) == 1:
3692 names = [] if names[0] == 'unknown' else names[:1]
3693 return [lang, ', '.join(names), ', '.join(exts)]
3694
3695 if not subtitles:
3696 return None
3697 return render_table(
3698 self._list_format_headers('Language', 'Name', 'Formats'),
3699 [_row(lang, formats) for lang, formats in subtitles.items()],
3700 hide_empty=True)
3701
3702 def __list_table(self, video_id, name, func, *args):
3703 table = func(*args)
3704 if not table:
3705 self.to_screen(f'{video_id} has no {name}')
3706 return
3707 self.to_screen(f'[info] Available {name} for {video_id}:')
3708 self.to_stdout(table)
3709
3710 def list_formats(self, info_dict):
3711 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3712
3713 def list_thumbnails(self, info_dict):
3714 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3715
3716 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3717 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3718
3719 def urlopen(self, req):
3720 """ Start an HTTP download """
3721 if isinstance(req, str):
3722 req = sanitized_Request(req)
3723 return self._opener.open(req, timeout=self._socket_timeout)
3724
3725 def print_debug_header(self):
3726 if not self.params.get('verbose'):
3727 return
3728
3729 from . import _IN_CLI # Must be delayed import
3730
3731 # These imports can be slow. So import them only as needed
3732 from .extractor.extractors import _LAZY_LOADER
3733 from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
3734
3735 def get_encoding(stream):
3736 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3737 if not supports_terminal_sequences(stream):
3738 from .utils import WINDOWS_VT_MODE # Must be imported locally
3739 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3740 return ret
3741
3742 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3743 locale.getpreferredencoding(),
3744 sys.getfilesystemencoding(),
3745 self.get_encoding(),
3746 ', '.join(
3747 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3748 if stream is not None and key != 'console')
3749 )
3750
3751 logger = self.params.get('logger')
3752 if logger:
3753 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3754 write_debug(encoding_str)
3755 else:
3756 write_string(f'[debug] {encoding_str}\n', encoding=None)
3757 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3758
3759 source = detect_variant()
3760 if VARIANT not in (None, 'pip'):
3761 source += '*'
3762 write_debug(join_nonempty(
3763 f'{"yt-dlp" if REPOSITORY == "yt-dlp/yt-dlp" else REPOSITORY} version',
3764 __version__,
3765 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3766 '' if source == 'unknown' else f'({source})',
3767 '' if _IN_CLI else 'API',
3768 delim=' '))
3769
3770 if not _IN_CLI:
3771 write_debug(f'params: {self.params}')
3772
3773 if not _LAZY_LOADER:
3774 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3775 write_debug('Lazy loading extractors is forcibly disabled')
3776 else:
3777 write_debug('Lazy loading extractors is disabled')
3778 if self.params['compat_opts']:
3779 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3780
3781 if current_git_head():
3782 write_debug(f'Git HEAD: {current_git_head()}')
3783 write_debug(system_identifier())
3784
3785 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3786 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3787 if ffmpeg_features:
3788 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3789
3790 exe_versions['rtmpdump'] = rtmpdump_version()
3791 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3792 exe_str = ', '.join(
3793 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3794 ) or 'none'
3795 write_debug('exe versions: %s' % exe_str)
3796
3797 from .compat.compat_utils import get_package_info
3798 from .dependencies import available_dependencies
3799
3800 write_debug('Optional libraries: %s' % (', '.join(sorted({
3801 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3802 })) or 'none'))
3803
3804 self._setup_opener()
3805 proxy_map = {}
3806 for handler in self._opener.handlers:
3807 if hasattr(handler, 'proxies'):
3808 proxy_map.update(handler.proxies)
3809 write_debug(f'Proxy map: {proxy_map}')
3810
3811 for plugin_type, plugins in {'Extractor': plugin_extractors, 'Post-Processor': plugin_postprocessors}.items():
3812 if not plugins:
3813 continue
3814 write_debug(f'{plugin_type} Plugins: %s' % (', '.join(sorted(('%s%s' % (
3815 klass.__name__, '' if klass.__name__ == name else f' as {name}')
3816 for name, klass in plugins.items())))))
3817 plugin_dirs = plugin_directories()
3818 if plugin_dirs:
3819 write_debug(f'Plugin directories: {plugin_dirs}')
3820
3821 # Not implemented
3822 if False and self.params.get('call_home'):
3823 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3824 write_debug('Public IP address: %s' % ipaddr)
3825 latest_version = self.urlopen(
3826 'https://yt-dl.org/latest/version').read().decode()
3827 if version_tuple(latest_version) > version_tuple(__version__):
3828 self.report_warning(
3829 'You are using an outdated version (newest version: %s)! '
3830 'See https://yt-dl.org/update if you need help updating.' %
3831 latest_version)
3832
3833 def _setup_opener(self):
3834 if hasattr(self, '_opener'):
3835 return
3836 timeout_val = self.params.get('socket_timeout')
3837 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3838
3839 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3840 opts_cookiefile = self.params.get('cookiefile')
3841 opts_proxy = self.params.get('proxy')
3842
3843 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3844
3845 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3846 if opts_proxy is not None:
3847 if opts_proxy == '':
3848 proxies = {}
3849 else:
3850 proxies = {'http': opts_proxy, 'https': opts_proxy}
3851 else:
3852 proxies = urllib.request.getproxies()
3853 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3854 if 'http' in proxies and 'https' not in proxies:
3855 proxies['https'] = proxies['http']
3856 proxy_handler = PerRequestProxyHandler(proxies)
3857
3858 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3859 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3860 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3861 redirect_handler = YoutubeDLRedirectHandler()
3862 data_handler = urllib.request.DataHandler()
3863
3864 # When passing our own FileHandler instance, build_opener won't add the
3865 # default FileHandler and allows us to disable the file protocol, which
3866 # can be used for malicious purposes (see
3867 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3868 file_handler = urllib.request.FileHandler()
3869
3870 def file_open(*args, **kwargs):
3871 raise urllib.error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3872 file_handler.file_open = file_open
3873
3874 opener = urllib.request.build_opener(
3875 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3876
3877 # Delete the default user-agent header, which would otherwise apply in
3878 # cases where our custom HTTP handler doesn't come into play
3879 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3880 opener.addheaders = []
3881 self._opener = opener
3882
3883 def encode(self, s):
3884 if isinstance(s, bytes):
3885 return s # Already encoded
3886
3887 try:
3888 return s.encode(self.get_encoding())
3889 except UnicodeEncodeError as err:
3890 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3891 raise
3892
3893 def get_encoding(self):
3894 encoding = self.params.get('encoding')
3895 if encoding is None:
3896 encoding = preferredencoding()
3897 return encoding
3898
3899 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3900 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3901 if overwrite is None:
3902 overwrite = self.params.get('overwrites', True)
3903 if not self.params.get('writeinfojson'):
3904 return False
3905 elif not infofn:
3906 self.write_debug(f'Skipping writing {label} infojson')
3907 return False
3908 elif not self._ensure_dir_exists(infofn):
3909 return None
3910 elif not overwrite and os.path.exists(infofn):
3911 self.to_screen(f'[info] {label.title()} metadata is already present')
3912 return 'exists'
3913
3914 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3915 try:
3916 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3917 return True
3918 except OSError:
3919 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3920 return None
3921
3922 def _write_description(self, label, ie_result, descfn):
3923 ''' Write description and returns True = written, False = skip, None = error '''
3924 if not self.params.get('writedescription'):
3925 return False
3926 elif not descfn:
3927 self.write_debug(f'Skipping writing {label} description')
3928 return False
3929 elif not self._ensure_dir_exists(descfn):
3930 return None
3931 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3932 self.to_screen(f'[info] {label.title()} description is already present')
3933 elif ie_result.get('description') is None:
3934 self.to_screen(f'[info] There\'s no {label} description to write')
3935 return False
3936 else:
3937 try:
3938 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3939 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3940 descfile.write(ie_result['description'])
3941 except OSError:
3942 self.report_error(f'Cannot write {label} description file {descfn}')
3943 return None
3944 return True
3945
3946 def _write_subtitles(self, info_dict, filename):
3947 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3948 ret = []
3949 subtitles = info_dict.get('requested_subtitles')
3950 if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3951 # subtitles download errors are already managed as troubles in relevant IE
3952 # that way it will silently go on when used with unsupporting IE
3953 return ret
3954 elif not subtitles:
3955 self.to_screen('[info] There\'s no subtitles for the requested languages')
3956 return ret
3957 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3958 if not sub_filename_base:
3959 self.to_screen('[info] Skipping writing video subtitles')
3960 return ret
3961
3962 for sub_lang, sub_info in subtitles.items():
3963 sub_format = sub_info['ext']
3964 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3965 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3966 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3967 if existing_sub:
3968 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3969 sub_info['filepath'] = existing_sub
3970 ret.append((existing_sub, sub_filename_final))
3971 continue
3972
3973 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3974 if sub_info.get('data') is not None:
3975 try:
3976 # Use newline='' to prevent conversion of newline characters
3977 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3978 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3979 subfile.write(sub_info['data'])
3980 sub_info['filepath'] = sub_filename
3981 ret.append((sub_filename, sub_filename_final))
3982 continue
3983 except OSError:
3984 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3985 return None
3986
3987 try:
3988 sub_copy = sub_info.copy()
3989 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3990 self.dl(sub_filename, sub_copy, subtitle=True)
3991 sub_info['filepath'] = sub_filename
3992 ret.append((sub_filename, sub_filename_final))
3993 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3994 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3995 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3996 if not self.params.get('ignoreerrors'):
3997 self.report_error(msg)
3998 raise DownloadError(msg)
3999 self.report_warning(msg)
4000 return ret
4001
4002 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
4003 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
4004 write_all = self.params.get('write_all_thumbnails', False)
4005 thumbnails, ret = [], []
4006 if write_all or self.params.get('writethumbnail', False):
4007 thumbnails = info_dict.get('thumbnails') or []
4008 if not thumbnails:
4009 self.to_screen(f'[info] There\'s no {label} thumbnails to download')
4010 return ret
4011 multiple = write_all and len(thumbnails) > 1
4012
4013 if thumb_filename_base is None:
4014 thumb_filename_base = filename
4015 if thumbnails and not thumb_filename_base:
4016 self.write_debug(f'Skipping writing {label} thumbnail')
4017 return ret
4018
4019 for idx, t in list(enumerate(thumbnails))[::-1]:
4020 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
4021 thumb_display_id = f'{label} thumbnail {t["id"]}'
4022 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
4023 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
4024
4025 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
4026 if existing_thumb:
4027 self.to_screen('[info] %s is already present' % (
4028 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
4029 t['filepath'] = existing_thumb
4030 ret.append((existing_thumb, thumb_filename_final))
4031 else:
4032 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
4033 try:
4034 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
4035 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
4036 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
4037 shutil.copyfileobj(uf, thumbf)
4038 ret.append((thumb_filename, thumb_filename_final))
4039 t['filepath'] = thumb_filename
4040 except network_exceptions as err:
4041 thumbnails.pop(idx)
4042 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
4043 if ret and not write_all:
4044 break
4045 return ret