]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup] Misc fixes (see desc)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 import collections
3 import contextlib
4 import datetime
5 import errno
6 import fileinput
7 import functools
8 import io
9 import itertools
10 import json
11 import locale
12 import operator
13 import os
14 import platform
15 import random
16 import re
17 import shutil
18 import subprocess
19 import sys
20 import tempfile
21 import time
22 import tokenize
23 import traceback
24 import unicodedata
25 import urllib.request
26 from string import ascii_letters
27
28 from .cache import Cache
29 from .compat import (
30 HAS_LEGACY as compat_has_legacy,
31 compat_get_terminal_size,
32 compat_os_name,
33 compat_shlex_quote,
34 compat_str,
35 compat_urllib_error,
36 compat_urllib_request,
37 )
38 from .cookies import load_cookies
39 from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
40 from .downloader.rtmp import rtmpdump_version
41 from .extractor import _LAZY_LOADER
42 from .extractor import _PLUGIN_CLASSES as plugin_extractors
43 from .extractor import gen_extractor_classes, get_info_extractor
44 from .extractor.openload import PhantomJSwrapper
45 from .minicurses import format_text
46 from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
47 from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
57 MoveFilesAfterDownloadPP,
58 get_postprocessor,
59 )
60 from .update import detect_variant
61 from .utils import (
62 DEFAULT_OUTTMPL,
63 LINK_TEMPLATES,
64 NO_DEFAULT,
65 NUMBER_RE,
66 OUTTMPL_TYPES,
67 POSTPROCESS_WHEN,
68 STR_FORMAT_RE_TMPL,
69 STR_FORMAT_TYPES,
70 ContentTooShortError,
71 DateRange,
72 DownloadCancelled,
73 DownloadError,
74 EntryNotInPlaylist,
75 ExistingVideoReached,
76 ExtractorError,
77 GeoRestrictedError,
78 HEADRequest,
79 InAdvancePagedList,
80 ISO3166Utils,
81 LazyList,
82 MaxDownloadsReached,
83 Namespace,
84 PagedList,
85 PerRequestProxyHandler,
86 Popen,
87 PostProcessingError,
88 ReExtractInfo,
89 RejectedVideoReached,
90 SameFileError,
91 UnavailableVideoError,
92 YoutubeDLCookieProcessor,
93 YoutubeDLHandler,
94 YoutubeDLRedirectHandler,
95 age_restricted,
96 args_to_str,
97 date_from_str,
98 determine_ext,
99 determine_protocol,
100 encode_compat_str,
101 encodeFilename,
102 error_to_compat_str,
103 expand_path,
104 filter_dict,
105 float_or_none,
106 format_bytes,
107 format_decimal_suffix,
108 format_field,
109 formatSeconds,
110 get_domain,
111 int_or_none,
112 iri_to_uri,
113 join_nonempty,
114 locked_file,
115 make_dir,
116 make_HTTPS_handler,
117 merge_headers,
118 network_exceptions,
119 number_of_digits,
120 orderedSet,
121 parse_filesize,
122 platform_name,
123 preferredencoding,
124 prepend_extension,
125 register_socks_protocols,
126 remove_terminal_sequences,
127 render_table,
128 replace_extension,
129 sanitize_filename,
130 sanitize_path,
131 sanitize_url,
132 sanitized_Request,
133 std_headers,
134 str_or_none,
135 strftime_or_none,
136 subtitles_filename,
137 supports_terminal_sequences,
138 timetuple_from_msec,
139 to_high_limit_path,
140 traverse_obj,
141 try_get,
142 url_basename,
143 variadic,
144 version_tuple,
145 windows_enable_vt_mode,
146 write_json_file,
147 write_string,
148 )
149 from .version import RELEASE_GIT_HEAD, __version__
150
151 if compat_os_name == 'nt':
152 import ctypes
153
154
155 class YoutubeDL:
156 """YoutubeDL class.
157
158 YoutubeDL objects are the ones responsible of downloading the
159 actual video file and writing it to disk if the user has requested
160 it, among some other tasks. In most cases there should be one per
161 program. As, given a video URL, the downloader doesn't know how to
162 extract all the needed information, task that InfoExtractors do, it
163 has to pass the URL to one of them.
164
165 For this, YoutubeDL objects have a method that allows
166 InfoExtractors to be registered in a given order. When it is passed
167 a URL, the YoutubeDL object handles it to the first InfoExtractor it
168 finds that reports being able to handle it. The InfoExtractor extracts
169 all the information about the video or videos the URL refers to, and
170 YoutubeDL process the extracted information, possibly using a File
171 Downloader to download the video.
172
173 YoutubeDL objects accept a lot of parameters. In order not to saturate
174 the object constructor with arguments, it receives a dictionary of
175 options instead. These options are available through the params
176 attribute for the InfoExtractors to use. The YoutubeDL also
177 registers itself as the downloader in charge for the InfoExtractors
178 that are added to it, so this is a "mutual registration".
179
180 Available options:
181
182 username: Username for authentication purposes.
183 password: Password for authentication purposes.
184 videopassword: Password for accessing a video.
185 ap_mso: Adobe Pass multiple-system operator identifier.
186 ap_username: Multiple-system operator account username.
187 ap_password: Multiple-system operator account password.
188 usenetrc: Use netrc for authentication instead.
189 verbose: Print additional info to stdout.
190 quiet: Do not print messages to stdout.
191 no_warnings: Do not print out anything for warnings.
192 forceprint: A dict with keys WHEN mapped to a list of templates to
193 print to stdout. The allowed keys are video or any of the
194 items in utils.POSTPROCESS_WHEN.
195 For compatibility, a single list is also accepted
196 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
197 a list of tuples with (template, filename)
198 forcejson: Force printing info_dict as JSON.
199 dump_single_json: Force printing the info_dict of the whole playlist
200 (or video) as a single JSON line.
201 force_write_download_archive: Force writing download archive regardless
202 of 'skip_download' or 'simulate'.
203 simulate: Do not download the video files. If unset (or None),
204 simulate only if listsubtitles, listformats or list_thumbnails is used
205 format: Video format code. see "FORMAT SELECTION" for more details.
206 You can also pass a function. The function takes 'ctx' as
207 argument and returns the formats to download.
208 See "build_format_selector" for an implementation
209 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
210 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
211 extracting metadata even if the video is not actually
212 available for download (experimental)
213 format_sort: A list of fields by which to sort the video formats.
214 See "Sorting Formats" for more details.
215 format_sort_force: Force the given format_sort. see "Sorting Formats"
216 for more details.
217 prefer_free_formats: Whether to prefer video formats with free containers
218 over non-free ones of same quality.
219 allow_multiple_video_streams: Allow multiple video streams to be merged
220 into a single file
221 allow_multiple_audio_streams: Allow multiple audio streams to be merged
222 into a single file
223 check_formats Whether to test if the formats are downloadable.
224 Can be True (check all), False (check none),
225 'selected' (check selected formats),
226 or None (check only if requested by extractor)
227 paths: Dictionary of output paths. The allowed keys are 'home'
228 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
229 outtmpl: Dictionary of templates for output names. Allowed keys
230 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
231 For compatibility with youtube-dl, a single string can also be used
232 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
233 restrictfilenames: Do not allow "&" and spaces in file names
234 trim_file_name: Limit length of filename (extension excluded)
235 windowsfilenames: Force the filenames to be windows compatible
236 ignoreerrors: Do not stop on download/postprocessing errors.
237 Can be 'only_download' to ignore only download errors.
238 Default is 'only_download' for CLI, but False for API
239 skip_playlist_after_errors: Number of allowed failures until the rest of
240 the playlist is skipped
241 force_generic_extractor: Force downloader to use the generic extractor
242 overwrites: Overwrite all video and metadata files if True,
243 overwrite only non-video files if None
244 and don't overwrite any file if False
245 For compatibility with youtube-dl,
246 "nooverwrites" may also be used instead
247 playliststart: Playlist item to start at.
248 playlistend: Playlist item to end at.
249 playlist_items: Specific indices of playlist to download.
250 playlistreverse: Download playlist items in reverse order.
251 playlistrandom: Download playlist items in random order.
252 matchtitle: Download only matching titles.
253 rejecttitle: Reject downloads for matching titles.
254 logger: Log messages to a logging.Logger instance.
255 logtostderr: Log messages to stderr instead of stdout.
256 consoletitle: Display progress in console window's titlebar.
257 writedescription: Write the video description to a .description file
258 writeinfojson: Write the video description to a .info.json file
259 clean_infojson: Remove private fields from the infojson
260 getcomments: Extract video comments. This will not be written to disk
261 unless writeinfojson is also given
262 writeannotations: Write the video annotations to a .annotations.xml file
263 writethumbnail: Write the thumbnail image to a file
264 allow_playlist_files: Whether to write playlists' description, infojson etc
265 also to disk when using the 'write*' options
266 write_all_thumbnails: Write all thumbnail formats to files
267 writelink: Write an internet shortcut file, depending on the
268 current platform (.url/.webloc/.desktop)
269 writeurllink: Write a Windows internet shortcut file (.url)
270 writewebloclink: Write a macOS internet shortcut file (.webloc)
271 writedesktoplink: Write a Linux internet shortcut file (.desktop)
272 writesubtitles: Write the video subtitles to a file
273 writeautomaticsub: Write the automatically generated subtitles to a file
274 listsubtitles: Lists all available subtitles for the video
275 subtitlesformat: The format code for subtitles
276 subtitleslangs: List of languages of the subtitles to download (can be regex).
277 The list may contain "all" to refer to all the available
278 subtitles. The language can be prefixed with a "-" to
279 exclude it from the requested languages. Eg: ['all', '-live_chat']
280 keepvideo: Keep the video file after post-processing
281 daterange: A DateRange object, download only if the upload_date is in the range.
282 skip_download: Skip the actual download of the video file
283 cachedir: Location of the cache files in the filesystem.
284 False to disable filesystem cache.
285 noplaylist: Download single video instead of a playlist if in doubt.
286 age_limit: An integer representing the user's age in years.
287 Unsuitable videos for the given age are skipped.
288 min_views: An integer representing the minimum view count the video
289 must have in order to not be skipped.
290 Videos without view count information are always
291 downloaded. None for no limit.
292 max_views: An integer representing the maximum view count.
293 Videos that are more popular than that are not
294 downloaded.
295 Videos without view count information are always
296 downloaded. None for no limit.
297 download_archive: File name of a file where all downloads are recorded.
298 Videos already present in the file are not downloaded
299 again.
300 break_on_existing: Stop the download process after attempting to download a
301 file that is in the archive.
302 break_on_reject: Stop the download process when encountering a video that
303 has been filtered out.
304 break_per_url: Whether break_on_reject and break_on_existing
305 should act on each input URL as opposed to for the entire queue
306 cookiefile: File name or text stream from where cookies should be read and dumped to
307 cookiesfrombrowser: A tuple containing the name of the browser, the profile
308 name/pathfrom where cookies are loaded, and the name of the
309 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
310 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
311 support RFC 5746 secure renegotiation
312 nocheckcertificate: Do not verify SSL certificates
313 client_certificate: Path to client certificate file in PEM format. May include the private key
314 client_certificate_key: Path to private key file for client certificate
315 client_certificate_password: Password for client certificate private key, if encrypted.
316 If not provided and the key is encrypted, yt-dlp will ask interactively
317 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
318 At the moment, this is only supported by YouTube.
319 http_headers: A dictionary of custom headers to be used for all requests
320 proxy: URL of the proxy server to use
321 geo_verification_proxy: URL of the proxy to use for IP address verification
322 on geo-restricted sites.
323 socket_timeout: Time to wait for unresponsive hosts, in seconds
324 bidi_workaround: Work around buggy terminals without bidirectional text
325 support, using fridibi
326 debug_printtraffic:Print out sent and received HTTP traffic
327 default_search: Prepend this string if an input url is not valid.
328 'auto' for elaborate guessing
329 encoding: Use this encoding instead of the system-specified.
330 extract_flat: Do not resolve URLs, return the immediate result.
331 Pass in 'in_playlist' to only show this behavior for
332 playlist items.
333 wait_for_video: If given, wait for scheduled streams to become available.
334 The value should be a tuple containing the range
335 (min_secs, max_secs) to wait between retries
336 postprocessors: A list of dictionaries, each with an entry
337 * key: The name of the postprocessor. See
338 yt_dlp/postprocessor/__init__.py for a list.
339 * when: When to run the postprocessor. Allowed values are
340 the entries of utils.POSTPROCESS_WHEN
341 Assumed to be 'post_process' if not given
342 progress_hooks: A list of functions that get called on download
343 progress, with a dictionary with the entries
344 * status: One of "downloading", "error", or "finished".
345 Check this first and ignore unknown values.
346 * info_dict: The extracted info_dict
347
348 If status is one of "downloading", or "finished", the
349 following properties may also be present:
350 * filename: The final filename (always present)
351 * tmpfilename: The filename we're currently writing to
352 * downloaded_bytes: Bytes on disk
353 * total_bytes: Size of the whole file, None if unknown
354 * total_bytes_estimate: Guess of the eventual file size,
355 None if unavailable.
356 * elapsed: The number of seconds since download started.
357 * eta: The estimated time in seconds, None if unknown
358 * speed: The download speed in bytes/second, None if
359 unknown
360 * fragment_index: The counter of the currently
361 downloaded video fragment.
362 * fragment_count: The number of fragments (= individual
363 files that will be merged)
364
365 Progress hooks are guaranteed to be called at least once
366 (with status "finished") if the download is successful.
367 postprocessor_hooks: A list of functions that get called on postprocessing
368 progress, with a dictionary with the entries
369 * status: One of "started", "processing", or "finished".
370 Check this first and ignore unknown values.
371 * postprocessor: Name of the postprocessor
372 * info_dict: The extracted info_dict
373
374 Progress hooks are guaranteed to be called at least twice
375 (with status "started" and "finished") if the processing is successful.
376 merge_output_format: Extension to use when merging formats.
377 final_ext: Expected final extension; used to detect when the file was
378 already downloaded and converted
379 fixup: Automatically correct known faults of the file.
380 One of:
381 - "never": do nothing
382 - "warn": only emit a warning
383 - "detect_or_warn": check whether we can do anything
384 about it, warn otherwise (default)
385 source_address: Client-side IP address to bind to.
386 sleep_interval_requests: Number of seconds to sleep between requests
387 during extraction
388 sleep_interval: Number of seconds to sleep before each download when
389 used alone or a lower bound of a range for randomized
390 sleep before each download (minimum possible number
391 of seconds to sleep) when used along with
392 max_sleep_interval.
393 max_sleep_interval:Upper bound of a range for randomized sleep before each
394 download (maximum possible number of seconds to sleep).
395 Must only be used along with sleep_interval.
396 Actual sleep time will be a random float from range
397 [sleep_interval; max_sleep_interval].
398 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
399 listformats: Print an overview of available video formats and exit.
400 list_thumbnails: Print a table of all thumbnails and exit.
401 match_filter: A function that gets called for every video with the signature
402 (info_dict, *, incomplete: bool) -> Optional[str]
403 For backward compatibility with youtube-dl, the signature
404 (info_dict) -> Optional[str] is also allowed.
405 - If it returns a message, the video is ignored.
406 - If it returns None, the video is downloaded.
407 - If it returns utils.NO_DEFAULT, the user is interactively
408 asked whether to download the video.
409 match_filter_func in utils.py is one example for this.
410 no_color: Do not emit color codes in output.
411 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
412 HTTP header
413 geo_bypass_country:
414 Two-letter ISO 3166-2 country code that will be used for
415 explicit geographic restriction bypassing via faking
416 X-Forwarded-For HTTP header
417 geo_bypass_ip_block:
418 IP range in CIDR notation that will be used similarly to
419 geo_bypass_country
420 external_downloader: A dictionary of protocol keys and the executable of the
421 external downloader to use for it. The allowed protocols
422 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
423 Set the value to 'native' to use the native downloader
424 compat_opts: Compatibility options. See "Differences in default behavior".
425 The following options do not work when used through the API:
426 filename, abort-on-error, multistreams, no-live-chat, format-sort
427 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
428 Refer __init__.py for their implementation
429 progress_template: Dictionary of templates for progress outputs.
430 Allowed keys are 'download', 'postprocess',
431 'download-title' (console title) and 'postprocess-title'.
432 The template is mapped on a dictionary with keys 'progress' and 'info'
433 retry_sleep_functions: Dictionary of functions that takes the number of attempts
434 as argument and returns the time to sleep in seconds.
435 Allowed keys are 'http', 'fragment', 'file_access'
436 download_ranges: A function that gets called for every video with the signature
437 (info_dict, *, ydl) -> Iterable[Section].
438 Only the returned sections will be downloaded. Each Section contains:
439 * start_time: Start time of the section in seconds
440 * end_time: End time of the section in seconds
441 * title: Section title (Optional)
442 * index: Section number (Optional)
443
444 The following parameters are not used by YoutubeDL itself, they are used by
445 the downloader (see yt_dlp/downloader/common.py):
446 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
447 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
448 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
449 external_downloader_args, concurrent_fragment_downloads.
450
451 The following options are used by the post processors:
452 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
453 to the binary or its containing directory.
454 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
455 and a list of additional command-line arguments for the
456 postprocessor/executable. The dict can also have "PP+EXE" keys
457 which are used when the given exe is used by the given PP.
458 Use 'default' as the name for arguments to passed to all PP
459 For compatibility with youtube-dl, a single list of args
460 can also be used
461
462 The following options are used by the extractors:
463 extractor_retries: Number of times to retry for known errors
464 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
465 hls_split_discontinuity: Split HLS playlists to different formats at
466 discontinuities such as ad breaks (default: False)
467 extractor_args: A dictionary of arguments to be passed to the extractors.
468 See "EXTRACTOR ARGUMENTS" for details.
469 Eg: {'youtube': {'skip': ['dash', 'hls']}}
470 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
471
472 The following options are deprecated and may be removed in the future:
473
474 forceurl: - Use forceprint
475 Force printing final URL.
476 forcetitle: - Use forceprint
477 Force printing title.
478 forceid: - Use forceprint
479 Force printing ID.
480 forcethumbnail: - Use forceprint
481 Force printing thumbnail URL.
482 forcedescription: - Use forceprint
483 Force printing description.
484 forcefilename: - Use forceprint
485 Force printing final filename.
486 forceduration: - Use forceprint
487 Force printing duration.
488 allsubtitles: - Use subtitleslangs = ['all']
489 Downloads all the subtitles of the video
490 (requires writesubtitles or writeautomaticsub)
491 include_ads: - Doesn't work
492 Download ads as well
493 call_home: - Not implemented
494 Boolean, true iff we are allowed to contact the
495 yt-dlp servers for debugging.
496 post_hooks: - Register a custom postprocessor
497 A list of functions that get called as the final step
498 for each video file, after all postprocessors have been
499 called. The filename will be passed as the only argument.
500 hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}.
501 Use the native HLS downloader instead of ffmpeg/avconv
502 if True, otherwise use ffmpeg/avconv if False, otherwise
503 use downloader suggested by extractor if None.
504 prefer_ffmpeg: - avconv support is deprecated
505 If False, use avconv instead of ffmpeg if both are available,
506 otherwise prefer ffmpeg.
507 youtube_include_dash_manifest: - Use extractor_args
508 If True (default), DASH manifests and related
509 data will be downloaded and processed by extractor.
510 You can reduce network I/O by disabling it if you don't
511 care about DASH. (only for youtube)
512 youtube_include_hls_manifest: - Use extractor_args
513 If True (default), HLS manifests and related
514 data will be downloaded and processed by extractor.
515 You can reduce network I/O by disabling it if you don't
516 care about HLS. (only for youtube)
517 """
518
519 _NUMERIC_FIELDS = {
520 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
521 'timestamp', 'release_timestamp',
522 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
523 'average_rating', 'comment_count', 'age_limit',
524 'start_time', 'end_time',
525 'chapter_number', 'season_number', 'episode_number',
526 'track_number', 'disc_number', 'release_year',
527 }
528
529 _format_fields = {
530 # NB: Keep in sync with the docstring of extractor/common.py
531 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
532 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
533 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
534 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
535 'preference', 'language', 'language_preference', 'quality', 'source_preference',
536 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
537 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
538 }
539 _format_selection_exts = {
540 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
541 'video': {'mp4', 'flv', 'webm', '3gp'},
542 'storyboards': {'mhtml'},
543 }
544
545 def __init__(self, params=None, auto_init=True):
546 """Create a FileDownloader object with the given options.
547 @param auto_init Whether to load the default extractors and print header (if verbose).
548 Set to 'no_verbose_header' to not print the header
549 """
550 if params is None:
551 params = {}
552 self.params = params
553 self._ies = {}
554 self._ies_instances = {}
555 self._pps = {k: [] for k in POSTPROCESS_WHEN}
556 self._printed_messages = set()
557 self._first_webpage_request = True
558 self._post_hooks = []
559 self._progress_hooks = []
560 self._postprocessor_hooks = []
561 self._download_retcode = 0
562 self._num_downloads = 0
563 self._num_videos = 0
564 self._playlist_level = 0
565 self._playlist_urls = set()
566 self.cache = Cache(self)
567
568 windows_enable_vt_mode()
569 stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout
570 self._out_files = Namespace(
571 out=stdout,
572 error=sys.stderr,
573 screen=sys.stderr if self.params.get('quiet') else stdout,
574 console=None if compat_os_name == 'nt' else next(
575 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
576 )
577 self._allow_colors = Namespace(**{
578 type_: not self.params.get('no_color') and supports_terminal_sequences(stream)
579 for type_, stream in self._out_files.items_ if type_ != 'console'
580 })
581
582 if sys.version_info < (3, 6):
583 self.report_warning(
584 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
585
586 if self.params.get('allow_unplayable_formats'):
587 self.report_warning(
588 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
589 'This is a developer option intended for debugging. \n'
590 ' If you experience any issues while using this option, '
591 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
592
593 def check_deprecated(param, option, suggestion):
594 if self.params.get(param) is not None:
595 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
596 return True
597 return False
598
599 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
600 if self.params.get('geo_verification_proxy') is None:
601 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
602
603 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
604 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
605 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
606
607 for msg in self.params.get('_warnings', []):
608 self.report_warning(msg)
609 for msg in self.params.get('_deprecation_warnings', []):
610 self.deprecation_warning(msg)
611
612 self.params['compat_opts'] = set(self.params.get('compat_opts', ()))
613 if not compat_has_legacy:
614 self.params['compat_opts'].add('no-compat-legacy')
615 if 'list-formats' in self.params['compat_opts']:
616 self.params['listformats_table'] = False
617
618 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
619 # nooverwrites was unnecessarily changed to overwrites
620 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
621 # This ensures compatibility with both keys
622 self.params['overwrites'] = not self.params['nooverwrites']
623 elif self.params.get('overwrites') is None:
624 self.params.pop('overwrites', None)
625 else:
626 self.params['nooverwrites'] = not self.params['overwrites']
627
628 self.params.setdefault('forceprint', {})
629 self.params.setdefault('print_to_file', {})
630
631 # Compatibility with older syntax
632 if not isinstance(params['forceprint'], dict):
633 self.params['forceprint'] = {'video': params['forceprint']}
634
635 if self.params.get('bidi_workaround', False):
636 try:
637 import pty
638 master, slave = pty.openpty()
639 width = compat_get_terminal_size().columns
640 width_args = [] if width is None else ['-w', str(width)]
641 sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error}
642 try:
643 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
644 except OSError:
645 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
646 self._output_channel = os.fdopen(master, 'rb')
647 except OSError as ose:
648 if ose.errno == errno.ENOENT:
649 self.report_warning(
650 'Could not find fribidi executable, ignoring --bidi-workaround. '
651 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
652 else:
653 raise
654
655 if auto_init:
656 if auto_init != 'no_verbose_header':
657 self.print_debug_header()
658 self.add_default_info_extractors()
659
660 if (sys.platform != 'win32'
661 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
662 and not self.params.get('restrictfilenames', False)):
663 # Unicode filesystem API will throw errors (#1474, #13027)
664 self.report_warning(
665 'Assuming --restrict-filenames since file system encoding '
666 'cannot encode all characters. '
667 'Set the LC_ALL environment variable to fix this.')
668 self.params['restrictfilenames'] = True
669
670 self.outtmpl_dict = self.parse_outtmpl()
671
672 # Creating format selector here allows us to catch syntax errors before the extraction
673 self.format_selector = (
674 self.params.get('format') if self.params.get('format') in (None, '-')
675 else self.params['format'] if callable(self.params['format'])
676 else self.build_format_selector(self.params['format']))
677
678 # Set http_headers defaults according to std_headers
679 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
680
681 hooks = {
682 'post_hooks': self.add_post_hook,
683 'progress_hooks': self.add_progress_hook,
684 'postprocessor_hooks': self.add_postprocessor_hook,
685 }
686 for opt, fn in hooks.items():
687 for ph in self.params.get(opt, []):
688 fn(ph)
689
690 for pp_def_raw in self.params.get('postprocessors', []):
691 pp_def = dict(pp_def_raw)
692 when = pp_def.pop('when', 'post_process')
693 self.add_post_processor(
694 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
695 when=when)
696
697 self._setup_opener()
698 register_socks_protocols()
699
700 def preload_download_archive(fn):
701 """Preload the archive, if any is specified"""
702 if fn is None:
703 return False
704 self.write_debug(f'Loading archive file {fn!r}')
705 try:
706 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
707 for line in archive_file:
708 self.archive.add(line.strip())
709 except OSError as ioe:
710 if ioe.errno != errno.ENOENT:
711 raise
712 return False
713 return True
714
715 self.archive = set()
716 preload_download_archive(self.params.get('download_archive'))
717
718 def warn_if_short_id(self, argv):
719 # short YouTube ID starting with dash?
720 idxs = [
721 i for i, a in enumerate(argv)
722 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
723 if idxs:
724 correct_argv = (
725 ['yt-dlp']
726 + [a for i, a in enumerate(argv) if i not in idxs]
727 + ['--'] + [argv[i] for i in idxs]
728 )
729 self.report_warning(
730 'Long argument string detected. '
731 'Use -- to separate parameters and URLs, like this:\n%s' %
732 args_to_str(correct_argv))
733
734 def add_info_extractor(self, ie):
735 """Add an InfoExtractor object to the end of the list."""
736 ie_key = ie.ie_key()
737 self._ies[ie_key] = ie
738 if not isinstance(ie, type):
739 self._ies_instances[ie_key] = ie
740 ie.set_downloader(self)
741
742 def _get_info_extractor_class(self, ie_key):
743 ie = self._ies.get(ie_key)
744 if ie is None:
745 ie = get_info_extractor(ie_key)
746 self.add_info_extractor(ie)
747 return ie
748
749 def get_info_extractor(self, ie_key):
750 """
751 Get an instance of an IE with name ie_key, it will try to get one from
752 the _ies list, if there's no instance it will create a new one and add
753 it to the extractor list.
754 """
755 ie = self._ies_instances.get(ie_key)
756 if ie is None:
757 ie = get_info_extractor(ie_key)()
758 self.add_info_extractor(ie)
759 return ie
760
761 def add_default_info_extractors(self):
762 """
763 Add the InfoExtractors returned by gen_extractors to the end of the list
764 """
765 for ie in gen_extractor_classes():
766 self.add_info_extractor(ie)
767
768 def add_post_processor(self, pp, when='post_process'):
769 """Add a PostProcessor object to the end of the chain."""
770 self._pps[when].append(pp)
771 pp.set_downloader(self)
772
773 def add_post_hook(self, ph):
774 """Add the post hook"""
775 self._post_hooks.append(ph)
776
777 def add_progress_hook(self, ph):
778 """Add the download progress hook"""
779 self._progress_hooks.append(ph)
780
781 def add_postprocessor_hook(self, ph):
782 """Add the postprocessing progress hook"""
783 self._postprocessor_hooks.append(ph)
784 for pps in self._pps.values():
785 for pp in pps:
786 pp.add_progress_hook(ph)
787
788 def _bidi_workaround(self, message):
789 if not hasattr(self, '_output_channel'):
790 return message
791
792 assert hasattr(self, '_output_process')
793 assert isinstance(message, compat_str)
794 line_count = message.count('\n') + 1
795 self._output_process.stdin.write((message + '\n').encode())
796 self._output_process.stdin.flush()
797 res = ''.join(self._output_channel.readline().decode()
798 for _ in range(line_count))
799 return res[:-len('\n')]
800
801 def _write_string(self, message, out=None, only_once=False):
802 if only_once:
803 if message in self._printed_messages:
804 return
805 self._printed_messages.add(message)
806 write_string(message, out=out, encoding=self.params.get('encoding'))
807
808 def to_stdout(self, message, skip_eol=False, quiet=None):
809 """Print message to stdout"""
810 if quiet is not None:
811 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
812 if skip_eol is not False:
813 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. Use "YoutubeDL.to_screen" instead')
814 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
815
816 def to_screen(self, message, skip_eol=False, quiet=None):
817 """Print message to screen if not in quiet mode"""
818 if self.params.get('logger'):
819 self.params['logger'].debug(message)
820 return
821 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
822 return
823 self._write_string(
824 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
825 self._out_files.screen)
826
827 def to_stderr(self, message, only_once=False):
828 """Print message to stderr"""
829 assert isinstance(message, compat_str)
830 if self.params.get('logger'):
831 self.params['logger'].error(message)
832 else:
833 self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once)
834
835 def _send_console_code(self, code):
836 if compat_os_name == 'nt' or not self._out_files.console:
837 return
838 self._write_string(code, self._out_files.console)
839
840 def to_console_title(self, message):
841 if not self.params.get('consoletitle', False):
842 return
843 message = remove_terminal_sequences(message)
844 if compat_os_name == 'nt':
845 if ctypes.windll.kernel32.GetConsoleWindow():
846 # c_wchar_p() might not be necessary if `message` is
847 # already of type unicode()
848 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
849 else:
850 self._send_console_code(f'\033]0;{message}\007')
851
852 def save_console_title(self):
853 if not self.params.get('consoletitle') or self.params.get('simulate'):
854 return
855 self._send_console_code('\033[22;0t') # Save the title on stack
856
857 def restore_console_title(self):
858 if not self.params.get('consoletitle') or self.params.get('simulate'):
859 return
860 self._send_console_code('\033[23;0t') # Restore the title from stack
861
862 def __enter__(self):
863 self.save_console_title()
864 return self
865
866 def __exit__(self, *args):
867 self.restore_console_title()
868
869 if self.params.get('cookiefile') is not None:
870 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
871
872 def trouble(self, message=None, tb=None, is_error=True):
873 """Determine action to take when a download problem appears.
874
875 Depending on if the downloader has been configured to ignore
876 download errors or not, this method may throw an exception or
877 not when errors are found, after printing the message.
878
879 @param tb If given, is additional traceback information
880 @param is_error Whether to raise error according to ignorerrors
881 """
882 if message is not None:
883 self.to_stderr(message)
884 if self.params.get('verbose'):
885 if tb is None:
886 if sys.exc_info()[0]: # if .trouble has been called from an except block
887 tb = ''
888 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
889 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
890 tb += encode_compat_str(traceback.format_exc())
891 else:
892 tb_data = traceback.format_list(traceback.extract_stack())
893 tb = ''.join(tb_data)
894 if tb:
895 self.to_stderr(tb)
896 if not is_error:
897 return
898 if not self.params.get('ignoreerrors'):
899 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
900 exc_info = sys.exc_info()[1].exc_info
901 else:
902 exc_info = sys.exc_info()
903 raise DownloadError(message, exc_info)
904 self._download_retcode = 1
905
906 Styles = Namespace(
907 HEADERS='yellow',
908 EMPHASIS='light blue',
909 FILENAME='green',
910 ID='green',
911 DELIM='blue',
912 ERROR='red',
913 WARNING='yellow',
914 SUPPRESS='light black',
915 )
916
917 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
918 text = str(text)
919 if test_encoding:
920 original_text = text
921 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
922 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
923 text = text.encode(encoding, 'ignore').decode(encoding)
924 if fallback is not None and text != original_text:
925 text = fallback
926 return format_text(text, f) if allow_colors else text if fallback is None else fallback
927
928 def _format_out(self, *args, **kwargs):
929 return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs)
930
931 def _format_screen(self, *args, **kwargs):
932 return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs)
933
934 def _format_err(self, *args, **kwargs):
935 return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs)
936
937 def report_warning(self, message, only_once=False):
938 '''
939 Print the message to stderr, it will be prefixed with 'WARNING:'
940 If stderr is a tty file the 'WARNING:' will be colored
941 '''
942 if self.params.get('logger') is not None:
943 self.params['logger'].warning(message)
944 else:
945 if self.params.get('no_warnings'):
946 return
947 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
948
949 def deprecation_warning(self, message):
950 if self.params.get('logger') is not None:
951 self.params['logger'].warning(f'DeprecationWarning: {message}')
952 else:
953 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
954
955 def report_error(self, message, *args, **kwargs):
956 '''
957 Do the same as trouble, but prefixes the message with 'ERROR:', colored
958 in red if stderr is a tty file.
959 '''
960 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
961
962 def write_debug(self, message, only_once=False):
963 '''Log debug message or Print message to stderr'''
964 if not self.params.get('verbose', False):
965 return
966 message = f'[debug] {message}'
967 if self.params.get('logger'):
968 self.params['logger'].debug(message)
969 else:
970 self.to_stderr(message, only_once)
971
972 def report_file_already_downloaded(self, file_name):
973 """Report file has already been fully downloaded."""
974 try:
975 self.to_screen('[download] %s has already been downloaded' % file_name)
976 except UnicodeEncodeError:
977 self.to_screen('[download] The file has already been downloaded')
978
979 def report_file_delete(self, file_name):
980 """Report that existing file will be deleted."""
981 try:
982 self.to_screen('Deleting existing file %s' % file_name)
983 except UnicodeEncodeError:
984 self.to_screen('Deleting existing file')
985
986 def raise_no_formats(self, info, forced=False, *, msg=None):
987 has_drm = info.get('_has_drm')
988 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
989 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
990 if forced or not ignored:
991 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
992 expected=has_drm or ignored or expected)
993 else:
994 self.report_warning(msg)
995
996 def parse_outtmpl(self):
997 outtmpl_dict = self.params.get('outtmpl', {})
998 if not isinstance(outtmpl_dict, dict):
999 outtmpl_dict = {'default': outtmpl_dict}
1000 # Remove spaces in the default template
1001 if self.params.get('restrictfilenames'):
1002 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
1003 else:
1004 sanitize = lambda x: x
1005 outtmpl_dict.update({
1006 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
1007 if outtmpl_dict.get(k) is None})
1008 for _, val in outtmpl_dict.items():
1009 if isinstance(val, bytes):
1010 self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
1011 return outtmpl_dict
1012
1013 def get_output_path(self, dir_type='', filename=None):
1014 paths = self.params.get('paths', {})
1015 assert isinstance(paths, dict)
1016 path = os.path.join(
1017 expand_path(paths.get('home', '').strip()),
1018 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1019 filename or '')
1020 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1021
1022 @staticmethod
1023 def _outtmpl_expandpath(outtmpl):
1024 # expand_path translates '%%' into '%' and '$$' into '$'
1025 # correspondingly that is not what we want since we need to keep
1026 # '%%' intact for template dict substitution step. Working around
1027 # with boundary-alike separator hack.
1028 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1029 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1030
1031 # outtmpl should be expand_path'ed before template dict substitution
1032 # because meta fields may contain env variables we don't want to
1033 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1034 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1035 return expand_path(outtmpl).replace(sep, '')
1036
1037 @staticmethod
1038 def escape_outtmpl(outtmpl):
1039 ''' Escape any remaining strings like %s, %abc% etc. '''
1040 return re.sub(
1041 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1042 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1043 outtmpl)
1044
1045 @classmethod
1046 def validate_outtmpl(cls, outtmpl):
1047 ''' @return None or Exception object '''
1048 outtmpl = re.sub(
1049 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1050 lambda mobj: f'{mobj.group(0)[:-1]}s',
1051 cls._outtmpl_expandpath(outtmpl))
1052 try:
1053 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1054 return None
1055 except ValueError as err:
1056 return err
1057
1058 @staticmethod
1059 def _copy_infodict(info_dict):
1060 info_dict = dict(info_dict)
1061 info_dict.pop('__postprocessors', None)
1062 info_dict.pop('__pending_error', None)
1063 return info_dict
1064
1065 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1066 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1067 @param sanitize Whether to sanitize the output as a filename.
1068 For backward compatibility, a function can also be passed
1069 """
1070
1071 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1072
1073 info_dict = self._copy_infodict(info_dict)
1074 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1075 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1076 if info_dict.get('duration', None) is not None
1077 else None)
1078 info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
1079 info_dict['video_autonumber'] = self._num_videos
1080 if info_dict.get('resolution') is None:
1081 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1082
1083 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1084 # of %(field)s to %(field)0Nd for backward compatibility
1085 field_size_compat_map = {
1086 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1087 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1088 'autonumber': self.params.get('autonumber_size') or 5,
1089 }
1090
1091 TMPL_DICT = {}
1092 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1093 MATH_FUNCTIONS = {
1094 '+': float.__add__,
1095 '-': float.__sub__,
1096 }
1097 # Field is of the form key1.key2...
1098 # where keys (except first) can be string, int or slice
1099 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1100 MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
1101 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1102 INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
1103 (?P<negate>-)?
1104 (?P<fields>{FIELD_RE})
1105 (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
1106 (?:>(?P<strf_format>.+?))?
1107 (?P<remaining>
1108 (?P<alternate>(?<!\\),[^|&)]+)?
1109 (?:&(?P<replacement>.*?))?
1110 (?:\|(?P<default>.*?))?
1111 )$''')
1112
1113 def _traverse_infodict(k):
1114 k = k.split('.')
1115 if k[0] == '':
1116 k.pop(0)
1117 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1118
1119 def get_value(mdict):
1120 # Object traversal
1121 value = _traverse_infodict(mdict['fields'])
1122 # Negative
1123 if mdict['negate']:
1124 value = float_or_none(value)
1125 if value is not None:
1126 value *= -1
1127 # Do maths
1128 offset_key = mdict['maths']
1129 if offset_key:
1130 value = float_or_none(value)
1131 operator = None
1132 while offset_key:
1133 item = re.match(
1134 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1135 offset_key).group(0)
1136 offset_key = offset_key[len(item):]
1137 if operator is None:
1138 operator = MATH_FUNCTIONS[item]
1139 continue
1140 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1141 offset = float_or_none(item)
1142 if offset is None:
1143 offset = float_or_none(_traverse_infodict(item))
1144 try:
1145 value = operator(value, multiplier * offset)
1146 except (TypeError, ZeroDivisionError):
1147 return None
1148 operator = None
1149 # Datetime formatting
1150 if mdict['strf_format']:
1151 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1152
1153 return value
1154
1155 na = self.params.get('outtmpl_na_placeholder', 'NA')
1156
1157 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1158 return sanitize_filename(str(value), restricted=restricted, is_id=(
1159 bool(re.search(r'(^|[_.])id(\.|$)', key))
1160 if 'filename-sanitization' in self.params['compat_opts']
1161 else NO_DEFAULT))
1162
1163 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1164 sanitize = bool(sanitize)
1165
1166 def _dumpjson_default(obj):
1167 if isinstance(obj, (set, LazyList)):
1168 return list(obj)
1169 return repr(obj)
1170
1171 def create_key(outer_mobj):
1172 if not outer_mobj.group('has_key'):
1173 return outer_mobj.group(0)
1174 key = outer_mobj.group('key')
1175 mobj = re.match(INTERNAL_FORMAT_RE, key)
1176 initial_field = mobj.group('fields') if mobj else ''
1177 value, replacement, default = None, None, na
1178 while mobj:
1179 mobj = mobj.groupdict()
1180 default = mobj['default'] if mobj['default'] is not None else default
1181 value = get_value(mobj)
1182 replacement = mobj['replacement']
1183 if value is None and mobj['alternate']:
1184 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1185 else:
1186 break
1187
1188 fmt = outer_mobj.group('format')
1189 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1190 fmt = f'0{field_size_compat_map[key]:d}d'
1191
1192 value = default if value is None else value if replacement is None else replacement
1193
1194 flags = outer_mobj.group('conversion') or ''
1195 str_fmt = f'{fmt[:-1]}s'
1196 if fmt[-1] == 'l': # list
1197 delim = '\n' if '#' in flags else ', '
1198 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1199 elif fmt[-1] == 'j': # json
1200 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1201 elif fmt[-1] == 'q': # quoted
1202 value = map(str, variadic(value) if '#' in flags else [value])
1203 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1204 elif fmt[-1] == 'B': # bytes
1205 value = f'%{str_fmt}'.encode() % str(value).encode()
1206 value, fmt = value.decode('utf-8', 'ignore'), 's'
1207 elif fmt[-1] == 'U': # unicode normalized
1208 value, fmt = unicodedata.normalize(
1209 # "+" = compatibility equivalence, "#" = NFD
1210 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1211 value), str_fmt
1212 elif fmt[-1] == 'D': # decimal suffix
1213 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1214 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1215 factor=1024 if '#' in flags else 1000)
1216 elif fmt[-1] == 'S': # filename sanitization
1217 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1218 elif fmt[-1] == 'c':
1219 if value:
1220 value = str(value)[0]
1221 else:
1222 fmt = str_fmt
1223 elif fmt[-1] not in 'rs': # numeric
1224 value = float_or_none(value)
1225 if value is None:
1226 value, fmt = default, 's'
1227
1228 if sanitize:
1229 if fmt[-1] == 'r':
1230 # If value is an object, sanitize might convert it to a string
1231 # So we convert it to repr first
1232 value, fmt = repr(value), str_fmt
1233 if fmt[-1] in 'csr':
1234 value = sanitizer(initial_field, value)
1235
1236 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1237 TMPL_DICT[key] = value
1238 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1239
1240 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1241
1242 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1243 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1244 return self.escape_outtmpl(outtmpl) % info_dict
1245
1246 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1247 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1248 if outtmpl is None:
1249 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
1250 try:
1251 outtmpl = self._outtmpl_expandpath(outtmpl)
1252 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1253 if not filename:
1254 return None
1255
1256 if tmpl_type in ('', 'temp'):
1257 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1258 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1259 filename = replace_extension(filename, ext, final_ext)
1260 elif tmpl_type:
1261 force_ext = OUTTMPL_TYPES[tmpl_type]
1262 if force_ext:
1263 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1264
1265 # https://github.com/blackjack4494/youtube-dlc/issues/85
1266 trim_file_name = self.params.get('trim_file_name', False)
1267 if trim_file_name:
1268 no_ext, *ext = filename.rsplit('.', 2)
1269 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1270
1271 return filename
1272 except ValueError as err:
1273 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1274 return None
1275
1276 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1277 """Generate the output filename"""
1278 if outtmpl:
1279 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1280 dir_type = None
1281 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1282 if not filename and dir_type not in ('', 'temp'):
1283 return ''
1284
1285 if warn:
1286 if not self.params.get('paths'):
1287 pass
1288 elif filename == '-':
1289 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1290 elif os.path.isabs(filename):
1291 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1292 if filename == '-' or not filename:
1293 return filename
1294
1295 return self.get_output_path(dir_type, filename)
1296
1297 def _match_entry(self, info_dict, incomplete=False, silent=False):
1298 """ Returns None if the file should be downloaded """
1299
1300 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1301
1302 def check_filter():
1303 if 'title' in info_dict:
1304 # This can happen when we're just evaluating the playlist
1305 title = info_dict['title']
1306 matchtitle = self.params.get('matchtitle', False)
1307 if matchtitle:
1308 if not re.search(matchtitle, title, re.IGNORECASE):
1309 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1310 rejecttitle = self.params.get('rejecttitle', False)
1311 if rejecttitle:
1312 if re.search(rejecttitle, title, re.IGNORECASE):
1313 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1314 date = info_dict.get('upload_date')
1315 if date is not None:
1316 dateRange = self.params.get('daterange', DateRange())
1317 if date not in dateRange:
1318 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1319 view_count = info_dict.get('view_count')
1320 if view_count is not None:
1321 min_views = self.params.get('min_views')
1322 if min_views is not None and view_count < min_views:
1323 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1324 max_views = self.params.get('max_views')
1325 if max_views is not None and view_count > max_views:
1326 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1327 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1328 return 'Skipping "%s" because it is age restricted' % video_title
1329
1330 match_filter = self.params.get('match_filter')
1331 if match_filter is not None:
1332 try:
1333 ret = match_filter(info_dict, incomplete=incomplete)
1334 except TypeError:
1335 # For backward compatibility
1336 ret = None if incomplete else match_filter(info_dict)
1337 if ret is NO_DEFAULT:
1338 while True:
1339 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1340 reply = input(self._format_screen(
1341 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1342 if reply in {'y', ''}:
1343 return None
1344 elif reply == 'n':
1345 return f'Skipping {video_title}'
1346 elif ret is not None:
1347 return ret
1348 return None
1349
1350 if self.in_download_archive(info_dict):
1351 reason = '%s has already been recorded in the archive' % video_title
1352 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1353 else:
1354 reason = check_filter()
1355 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1356 if reason is not None:
1357 if not silent:
1358 self.to_screen('[download] ' + reason)
1359 if self.params.get(break_opt, False):
1360 raise break_err()
1361 return reason
1362
1363 @staticmethod
1364 def add_extra_info(info_dict, extra_info):
1365 '''Set the keys from extra_info in info dict if they are missing'''
1366 for key, value in extra_info.items():
1367 info_dict.setdefault(key, value)
1368
1369 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1370 process=True, force_generic_extractor=False):
1371 """
1372 Return a list with a dictionary for each video extracted.
1373
1374 Arguments:
1375 url -- URL to extract
1376
1377 Keyword arguments:
1378 download -- whether to download videos during extraction
1379 ie_key -- extractor key hint
1380 extra_info -- dictionary containing the extra values to add to each result
1381 process -- whether to resolve all unresolved references (URLs, playlist items),
1382 must be True for download to work.
1383 force_generic_extractor -- force using the generic extractor
1384 """
1385
1386 if extra_info is None:
1387 extra_info = {}
1388
1389 if not ie_key and force_generic_extractor:
1390 ie_key = 'Generic'
1391
1392 if ie_key:
1393 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1394 else:
1395 ies = self._ies
1396
1397 for ie_key, ie in ies.items():
1398 if not ie.suitable(url):
1399 continue
1400
1401 if not ie.working():
1402 self.report_warning('The program functionality for this site has been marked as broken, '
1403 'and will probably not work.')
1404
1405 temp_id = ie.get_temp_id(url)
1406 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1407 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1408 if self.params.get('break_on_existing', False):
1409 raise ExistingVideoReached()
1410 break
1411 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1412 else:
1413 self.report_error('no suitable InfoExtractor for URL %s' % url)
1414
1415 def __handle_extraction_exceptions(func):
1416 @functools.wraps(func)
1417 def wrapper(self, *args, **kwargs):
1418 while True:
1419 try:
1420 return func(self, *args, **kwargs)
1421 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1422 raise
1423 except ReExtractInfo as e:
1424 if e.expected:
1425 self.to_screen(f'{e}; Re-extracting data')
1426 else:
1427 self.to_stderr('\r')
1428 self.report_warning(f'{e}; Re-extracting data')
1429 continue
1430 except GeoRestrictedError as e:
1431 msg = e.msg
1432 if e.countries:
1433 msg += '\nThis video is available in %s.' % ', '.join(
1434 map(ISO3166Utils.short2full, e.countries))
1435 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1436 self.report_error(msg)
1437 except ExtractorError as e: # An error we somewhat expected
1438 self.report_error(str(e), e.format_traceback())
1439 except Exception as e:
1440 if self.params.get('ignoreerrors'):
1441 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1442 else:
1443 raise
1444 break
1445 return wrapper
1446
1447 def _wait_for_video(self, ie_result):
1448 if (not self.params.get('wait_for_video')
1449 or ie_result.get('_type', 'video') != 'video'
1450 or ie_result.get('formats') or ie_result.get('url')):
1451 return
1452
1453 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1454 last_msg = ''
1455
1456 def progress(msg):
1457 nonlocal last_msg
1458 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1459 last_msg = msg
1460
1461 min_wait, max_wait = self.params.get('wait_for_video')
1462 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1463 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1464 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1465 self.report_warning('Release time of video is not known')
1466 elif (diff or 0) <= 0:
1467 self.report_warning('Video should already be available according to extracted info')
1468 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1469 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1470
1471 wait_till = time.time() + diff
1472 try:
1473 while True:
1474 diff = wait_till - time.time()
1475 if diff <= 0:
1476 progress('')
1477 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1478 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1479 time.sleep(1)
1480 except KeyboardInterrupt:
1481 progress('')
1482 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1483 except BaseException as e:
1484 if not isinstance(e, ReExtractInfo):
1485 self.to_screen('')
1486 raise
1487
1488 @__handle_extraction_exceptions
1489 def __extract_info(self, url, ie, download, extra_info, process):
1490 ie_result = ie.extract(url)
1491 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1492 return
1493 if isinstance(ie_result, list):
1494 # Backwards compatibility: old IE result format
1495 ie_result = {
1496 '_type': 'compat_list',
1497 'entries': ie_result,
1498 }
1499 if extra_info.get('original_url'):
1500 ie_result.setdefault('original_url', extra_info['original_url'])
1501 self.add_default_extra_info(ie_result, ie, url)
1502 if process:
1503 self._wait_for_video(ie_result)
1504 return self.process_ie_result(ie_result, download, extra_info)
1505 else:
1506 return ie_result
1507
1508 def add_default_extra_info(self, ie_result, ie, url):
1509 if url is not None:
1510 self.add_extra_info(ie_result, {
1511 'webpage_url': url,
1512 'original_url': url,
1513 })
1514 webpage_url = ie_result.get('webpage_url')
1515 if webpage_url:
1516 self.add_extra_info(ie_result, {
1517 'webpage_url_basename': url_basename(webpage_url),
1518 'webpage_url_domain': get_domain(webpage_url),
1519 })
1520 if ie is not None:
1521 self.add_extra_info(ie_result, {
1522 'extractor': ie.IE_NAME,
1523 'extractor_key': ie.ie_key(),
1524 })
1525
1526 def process_ie_result(self, ie_result, download=True, extra_info=None):
1527 """
1528 Take the result of the ie(may be modified) and resolve all unresolved
1529 references (URLs, playlist items).
1530
1531 It will also download the videos if 'download'.
1532 Returns the resolved ie_result.
1533 """
1534 if extra_info is None:
1535 extra_info = {}
1536 result_type = ie_result.get('_type', 'video')
1537
1538 if result_type in ('url', 'url_transparent'):
1539 ie_result['url'] = sanitize_url(ie_result['url'])
1540 if ie_result.get('original_url'):
1541 extra_info.setdefault('original_url', ie_result['original_url'])
1542
1543 extract_flat = self.params.get('extract_flat', False)
1544 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1545 or extract_flat is True):
1546 info_copy = ie_result.copy()
1547 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1548 if ie and not ie_result.get('id'):
1549 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1550 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1551 self.add_extra_info(info_copy, extra_info)
1552 info_copy, _ = self.pre_process(info_copy)
1553 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1554 self._raise_pending_errors(info_copy)
1555 if self.params.get('force_write_download_archive', False):
1556 self.record_download_archive(info_copy)
1557 return ie_result
1558
1559 if result_type == 'video':
1560 self.add_extra_info(ie_result, extra_info)
1561 ie_result = self.process_video_result(ie_result, download=download)
1562 self._raise_pending_errors(ie_result)
1563 additional_urls = (ie_result or {}).get('additional_urls')
1564 if additional_urls:
1565 # TODO: Improve MetadataParserPP to allow setting a list
1566 if isinstance(additional_urls, compat_str):
1567 additional_urls = [additional_urls]
1568 self.to_screen(
1569 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1570 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1571 ie_result['additional_entries'] = [
1572 self.extract_info(
1573 url, download, extra_info=extra_info,
1574 force_generic_extractor=self.params.get('force_generic_extractor'))
1575 for url in additional_urls
1576 ]
1577 return ie_result
1578 elif result_type == 'url':
1579 # We have to add extra_info to the results because it may be
1580 # contained in a playlist
1581 return self.extract_info(
1582 ie_result['url'], download,
1583 ie_key=ie_result.get('ie_key'),
1584 extra_info=extra_info)
1585 elif result_type == 'url_transparent':
1586 # Use the information from the embedding page
1587 info = self.extract_info(
1588 ie_result['url'], ie_key=ie_result.get('ie_key'),
1589 extra_info=extra_info, download=False, process=False)
1590
1591 # extract_info may return None when ignoreerrors is enabled and
1592 # extraction failed with an error, don't crash and return early
1593 # in this case
1594 if not info:
1595 return info
1596
1597 new_result = info.copy()
1598 new_result.update(filter_dict(ie_result, lambda k, v: (
1599 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
1600
1601 # Extracted info may not be a video result (i.e.
1602 # info.get('_type', 'video') != video) but rather an url or
1603 # url_transparent. In such cases outer metadata (from ie_result)
1604 # should be propagated to inner one (info). For this to happen
1605 # _type of info should be overridden with url_transparent. This
1606 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1607 if new_result.get('_type') == 'url':
1608 new_result['_type'] = 'url_transparent'
1609
1610 return self.process_ie_result(
1611 new_result, download=download, extra_info=extra_info)
1612 elif result_type in ('playlist', 'multi_video'):
1613 # Protect from infinite recursion due to recursively nested playlists
1614 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1615 webpage_url = ie_result['webpage_url']
1616 if webpage_url in self._playlist_urls:
1617 self.to_screen(
1618 '[download] Skipping already downloaded playlist: %s'
1619 % ie_result.get('title') or ie_result.get('id'))
1620 return
1621
1622 self._playlist_level += 1
1623 self._playlist_urls.add(webpage_url)
1624 self._fill_common_fields(ie_result, False)
1625 self._sanitize_thumbnails(ie_result)
1626 try:
1627 return self.__process_playlist(ie_result, download)
1628 finally:
1629 self._playlist_level -= 1
1630 if not self._playlist_level:
1631 self._playlist_urls.clear()
1632 elif result_type == 'compat_list':
1633 self.report_warning(
1634 'Extractor %s returned a compat_list result. '
1635 'It needs to be updated.' % ie_result.get('extractor'))
1636
1637 def _fixup(r):
1638 self.add_extra_info(r, {
1639 'extractor': ie_result['extractor'],
1640 'webpage_url': ie_result['webpage_url'],
1641 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1642 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1643 'extractor_key': ie_result['extractor_key'],
1644 })
1645 return r
1646 ie_result['entries'] = [
1647 self.process_ie_result(_fixup(r), download, extra_info)
1648 for r in ie_result['entries']
1649 ]
1650 return ie_result
1651 else:
1652 raise Exception('Invalid result type: %s' % result_type)
1653
1654 def _ensure_dir_exists(self, path):
1655 return make_dir(path, self.report_error)
1656
1657 @staticmethod
1658 def _playlist_infodict(ie_result, **kwargs):
1659 return {
1660 **ie_result,
1661 'playlist': ie_result.get('title') or ie_result.get('id'),
1662 'playlist_id': ie_result.get('id'),
1663 'playlist_title': ie_result.get('title'),
1664 'playlist_uploader': ie_result.get('uploader'),
1665 'playlist_uploader_id': ie_result.get('uploader_id'),
1666 'playlist_index': 0,
1667 **kwargs,
1668 }
1669
1670 def __process_playlist(self, ie_result, download):
1671 # We process each entry in the playlist
1672 playlist = ie_result.get('title') or ie_result.get('id')
1673 self.to_screen('[download] Downloading playlist: %s' % playlist)
1674
1675 if 'entries' not in ie_result:
1676 raise EntryNotInPlaylist('There are no entries')
1677
1678 MissingEntry = object()
1679 incomplete_entries = bool(ie_result.get('requested_entries'))
1680 if incomplete_entries:
1681 def fill_missing_entries(entries, indices):
1682 ret = [MissingEntry] * max(indices)
1683 for i, entry in zip(indices, entries):
1684 ret[i - 1] = entry
1685 return ret
1686 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1687
1688 playlist_results = []
1689
1690 playliststart = self.params.get('playliststart', 1)
1691 playlistend = self.params.get('playlistend')
1692 # For backwards compatibility, interpret -1 as whole list
1693 if playlistend == -1:
1694 playlistend = None
1695
1696 playlistitems_str = self.params.get('playlist_items')
1697 playlistitems = None
1698 if playlistitems_str is not None:
1699 def iter_playlistitems(format):
1700 for string_segment in format.split(','):
1701 if '-' in string_segment:
1702 start, end = string_segment.split('-')
1703 for item in range(int(start), int(end) + 1):
1704 yield int(item)
1705 else:
1706 yield int(string_segment)
1707 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1708
1709 ie_entries = ie_result['entries']
1710 if isinstance(ie_entries, list):
1711 playlist_count = len(ie_entries)
1712 msg = f'Collected {playlist_count} videos; downloading %d of them'
1713 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1714
1715 def get_entry(i):
1716 return ie_entries[i - 1]
1717 else:
1718 msg = 'Downloading %d videos'
1719 if not isinstance(ie_entries, (PagedList, LazyList)):
1720 ie_entries = LazyList(ie_entries)
1721 elif isinstance(ie_entries, InAdvancePagedList):
1722 if ie_entries._pagesize == 1:
1723 playlist_count = ie_entries._pagecount
1724
1725 def get_entry(i):
1726 return YoutubeDL.__handle_extraction_exceptions(
1727 lambda self, i: ie_entries[i - 1]
1728 )(self, i)
1729
1730 entries, broken = [], False
1731 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1732 for i in items:
1733 if i == 0:
1734 continue
1735 if playlistitems is None and playlistend is not None and playlistend < i:
1736 break
1737 entry = None
1738 try:
1739 entry = get_entry(i)
1740 if entry is MissingEntry:
1741 raise EntryNotInPlaylist()
1742 except (IndexError, EntryNotInPlaylist):
1743 if incomplete_entries:
1744 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1745 elif not playlistitems:
1746 break
1747 entries.append(entry)
1748 try:
1749 if entry is not None:
1750 # TODO: Add auto-generated fields
1751 self._match_entry(entry, incomplete=True, silent=True)
1752 except (ExistingVideoReached, RejectedVideoReached):
1753 broken = True
1754 break
1755 ie_result['entries'] = entries
1756
1757 # Save playlist_index before re-ordering
1758 entries = [
1759 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1760 for i, entry in enumerate(entries, 1)
1761 if entry is not None]
1762 n_entries = len(entries)
1763
1764 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1765 ie_result['playlist_count'] = n_entries
1766
1767 if not playlistitems and (playliststart != 1 or playlistend):
1768 playlistitems = list(range(playliststart, playliststart + n_entries))
1769 ie_result['requested_entries'] = playlistitems
1770
1771 _infojson_written = False
1772 write_playlist_files = self.params.get('allow_playlist_files', True)
1773 if write_playlist_files and self.params.get('list_thumbnails'):
1774 self.list_thumbnails(ie_result)
1775 if write_playlist_files and not self.params.get('simulate'):
1776 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1777 _infojson_written = self._write_info_json(
1778 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1779 if _infojson_written is None:
1780 return
1781 if self._write_description('playlist', ie_result,
1782 self.prepare_filename(ie_copy, 'pl_description')) is None:
1783 return
1784 # TODO: This should be passed to ThumbnailsConvertor if necessary
1785 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1786
1787 if self.params.get('playlistreverse', False):
1788 entries = entries[::-1]
1789 if self.params.get('playlistrandom', False):
1790 random.shuffle(entries)
1791
1792 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1793
1794 self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
1795 failures = 0
1796 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1797 for i, entry_tuple in enumerate(entries, 1):
1798 playlist_index, entry = entry_tuple
1799 if 'playlist-index' in self.params['compat_opts']:
1800 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1801 self.to_screen('[download] Downloading video %s of %s' % (
1802 self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1803 # This __x_forwarded_for_ip thing is a bit ugly but requires
1804 # minimal changes
1805 if x_forwarded_for:
1806 entry['__x_forwarded_for_ip'] = x_forwarded_for
1807 extra = {
1808 'n_entries': n_entries,
1809 '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1810 'playlist_count': ie_result.get('playlist_count'),
1811 'playlist_index': playlist_index,
1812 'playlist_autonumber': i,
1813 'playlist': playlist,
1814 'playlist_id': ie_result.get('id'),
1815 'playlist_title': ie_result.get('title'),
1816 'playlist_uploader': ie_result.get('uploader'),
1817 'playlist_uploader_id': ie_result.get('uploader_id'),
1818 'extractor': ie_result['extractor'],
1819 'webpage_url': ie_result['webpage_url'],
1820 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1821 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1822 'extractor_key': ie_result['extractor_key'],
1823 }
1824
1825 if self._match_entry(entry, incomplete=True) is not None:
1826 continue
1827
1828 entry_result = self.__process_iterable_entry(entry, download, extra)
1829 if not entry_result:
1830 failures += 1
1831 if failures >= max_failures:
1832 self.report_error(
1833 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1834 break
1835 playlist_results.append(entry_result)
1836 ie_result['entries'] = playlist_results
1837
1838 # Write the updated info to json
1839 if _infojson_written is True and self._write_info_json(
1840 'updated playlist', ie_result,
1841 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1842 return
1843
1844 ie_result = self.run_all_pps('playlist', ie_result)
1845 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1846 return ie_result
1847
1848 @__handle_extraction_exceptions
1849 def __process_iterable_entry(self, entry, download, extra_info):
1850 return self.process_ie_result(
1851 entry, download=download, extra_info=extra_info)
1852
1853 def _build_format_filter(self, filter_spec):
1854 " Returns a function to filter the formats according to the filter_spec "
1855
1856 OPERATORS = {
1857 '<': operator.lt,
1858 '<=': operator.le,
1859 '>': operator.gt,
1860 '>=': operator.ge,
1861 '=': operator.eq,
1862 '!=': operator.ne,
1863 }
1864 operator_rex = re.compile(r'''(?x)\s*
1865 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1866 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1867 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1868 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1869 m = operator_rex.fullmatch(filter_spec)
1870 if m:
1871 try:
1872 comparison_value = int(m.group('value'))
1873 except ValueError:
1874 comparison_value = parse_filesize(m.group('value'))
1875 if comparison_value is None:
1876 comparison_value = parse_filesize(m.group('value') + 'B')
1877 if comparison_value is None:
1878 raise ValueError(
1879 'Invalid value %r in format specification %r' % (
1880 m.group('value'), filter_spec))
1881 op = OPERATORS[m.group('op')]
1882
1883 if not m:
1884 STR_OPERATORS = {
1885 '=': operator.eq,
1886 '^=': lambda attr, value: attr.startswith(value),
1887 '$=': lambda attr, value: attr.endswith(value),
1888 '*=': lambda attr, value: value in attr,
1889 '~=': lambda attr, value: value.search(attr) is not None
1890 }
1891 str_operator_rex = re.compile(r'''(?x)\s*
1892 (?P<key>[a-zA-Z0-9._-]+)\s*
1893 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1894 (?P<quote>["'])?
1895 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1896 (?(quote)(?P=quote))\s*
1897 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1898 m = str_operator_rex.fullmatch(filter_spec)
1899 if m:
1900 if m.group('op') == '~=':
1901 comparison_value = re.compile(m.group('value'))
1902 else:
1903 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1904 str_op = STR_OPERATORS[m.group('op')]
1905 if m.group('negation'):
1906 op = lambda attr, value: not str_op(attr, value)
1907 else:
1908 op = str_op
1909
1910 if not m:
1911 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1912
1913 def _filter(f):
1914 actual_value = f.get(m.group('key'))
1915 if actual_value is None:
1916 return m.group('none_inclusive')
1917 return op(actual_value, comparison_value)
1918 return _filter
1919
1920 def _check_formats(self, formats):
1921 for f in formats:
1922 self.to_screen('[info] Testing format %s' % f['format_id'])
1923 path = self.get_output_path('temp')
1924 if not self._ensure_dir_exists(f'{path}/'):
1925 continue
1926 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1927 temp_file.close()
1928 try:
1929 success, _ = self.dl(temp_file.name, f, test=True)
1930 except (DownloadError, OSError, ValueError) + network_exceptions:
1931 success = False
1932 finally:
1933 if os.path.exists(temp_file.name):
1934 try:
1935 os.remove(temp_file.name)
1936 except OSError:
1937 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1938 if success:
1939 yield f
1940 else:
1941 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1942
1943 def _default_format_spec(self, info_dict, download=True):
1944
1945 def can_merge():
1946 merger = FFmpegMergerPP(self)
1947 return merger.available and merger.can_merge()
1948
1949 prefer_best = (
1950 not self.params.get('simulate')
1951 and download
1952 and (
1953 not can_merge()
1954 or info_dict.get('is_live') and not self.params.get('live_from_start')
1955 or self.outtmpl_dict['default'] == '-'))
1956 compat = (
1957 prefer_best
1958 or self.params.get('allow_multiple_audio_streams', False)
1959 or 'format-spec' in self.params['compat_opts'])
1960
1961 return (
1962 'best/bestvideo+bestaudio' if prefer_best
1963 else 'bestvideo*+bestaudio/best' if not compat
1964 else 'bestvideo+bestaudio/best')
1965
1966 def build_format_selector(self, format_spec):
1967 def syntax_error(note, start):
1968 message = (
1969 'Invalid format specification: '
1970 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1971 return SyntaxError(message)
1972
1973 PICKFIRST = 'PICKFIRST'
1974 MERGE = 'MERGE'
1975 SINGLE = 'SINGLE'
1976 GROUP = 'GROUP'
1977 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1978
1979 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1980 'video': self.params.get('allow_multiple_video_streams', False)}
1981
1982 check_formats = self.params.get('check_formats') == 'selected'
1983
1984 def _parse_filter(tokens):
1985 filter_parts = []
1986 for type, string, start, _, _ in tokens:
1987 if type == tokenize.OP and string == ']':
1988 return ''.join(filter_parts)
1989 else:
1990 filter_parts.append(string)
1991
1992 def _remove_unused_ops(tokens):
1993 # Remove operators that we don't use and join them with the surrounding strings
1994 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1995 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1996 last_string, last_start, last_end, last_line = None, None, None, None
1997 for type, string, start, end, line in tokens:
1998 if type == tokenize.OP and string == '[':
1999 if last_string:
2000 yield tokenize.NAME, last_string, last_start, last_end, last_line
2001 last_string = None
2002 yield type, string, start, end, line
2003 # everything inside brackets will be handled by _parse_filter
2004 for type, string, start, end, line in tokens:
2005 yield type, string, start, end, line
2006 if type == tokenize.OP and string == ']':
2007 break
2008 elif type == tokenize.OP and string in ALLOWED_OPS:
2009 if last_string:
2010 yield tokenize.NAME, last_string, last_start, last_end, last_line
2011 last_string = None
2012 yield type, string, start, end, line
2013 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
2014 if not last_string:
2015 last_string = string
2016 last_start = start
2017 last_end = end
2018 else:
2019 last_string += string
2020 if last_string:
2021 yield tokenize.NAME, last_string, last_start, last_end, last_line
2022
2023 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2024 selectors = []
2025 current_selector = None
2026 for type, string, start, _, _ in tokens:
2027 # ENCODING is only defined in python 3.x
2028 if type == getattr(tokenize, 'ENCODING', None):
2029 continue
2030 elif type in [tokenize.NAME, tokenize.NUMBER]:
2031 current_selector = FormatSelector(SINGLE, string, [])
2032 elif type == tokenize.OP:
2033 if string == ')':
2034 if not inside_group:
2035 # ')' will be handled by the parentheses group
2036 tokens.restore_last_token()
2037 break
2038 elif inside_merge and string in ['/', ',']:
2039 tokens.restore_last_token()
2040 break
2041 elif inside_choice and string == ',':
2042 tokens.restore_last_token()
2043 break
2044 elif string == ',':
2045 if not current_selector:
2046 raise syntax_error('"," must follow a format selector', start)
2047 selectors.append(current_selector)
2048 current_selector = None
2049 elif string == '/':
2050 if not current_selector:
2051 raise syntax_error('"/" must follow a format selector', start)
2052 first_choice = current_selector
2053 second_choice = _parse_format_selection(tokens, inside_choice=True)
2054 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2055 elif string == '[':
2056 if not current_selector:
2057 current_selector = FormatSelector(SINGLE, 'best', [])
2058 format_filter = _parse_filter(tokens)
2059 current_selector.filters.append(format_filter)
2060 elif string == '(':
2061 if current_selector:
2062 raise syntax_error('Unexpected "("', start)
2063 group = _parse_format_selection(tokens, inside_group=True)
2064 current_selector = FormatSelector(GROUP, group, [])
2065 elif string == '+':
2066 if not current_selector:
2067 raise syntax_error('Unexpected "+"', start)
2068 selector_1 = current_selector
2069 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2070 if not selector_2:
2071 raise syntax_error('Expected a selector', start)
2072 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2073 else:
2074 raise syntax_error(f'Operator not recognized: "{string}"', start)
2075 elif type == tokenize.ENDMARKER:
2076 break
2077 if current_selector:
2078 selectors.append(current_selector)
2079 return selectors
2080
2081 def _merge(formats_pair):
2082 format_1, format_2 = formats_pair
2083
2084 formats_info = []
2085 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2086 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2087
2088 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2089 get_no_more = {'video': False, 'audio': False}
2090 for (i, fmt_info) in enumerate(formats_info):
2091 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2092 formats_info.pop(i)
2093 continue
2094 for aud_vid in ['audio', 'video']:
2095 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2096 if get_no_more[aud_vid]:
2097 formats_info.pop(i)
2098 break
2099 get_no_more[aud_vid] = True
2100
2101 if len(formats_info) == 1:
2102 return formats_info[0]
2103
2104 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2105 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2106
2107 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2108 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2109
2110 output_ext = self.params.get('merge_output_format')
2111 if not output_ext:
2112 if the_only_video:
2113 output_ext = the_only_video['ext']
2114 elif the_only_audio and not video_fmts:
2115 output_ext = the_only_audio['ext']
2116 else:
2117 output_ext = 'mkv'
2118
2119 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2120
2121 new_dict = {
2122 'requested_formats': formats_info,
2123 'format': '+'.join(filtered('format')),
2124 'format_id': '+'.join(filtered('format_id')),
2125 'ext': output_ext,
2126 'protocol': '+'.join(map(determine_protocol, formats_info)),
2127 'language': '+'.join(orderedSet(filtered('language'))) or None,
2128 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2129 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2130 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2131 }
2132
2133 if the_only_video:
2134 new_dict.update({
2135 'width': the_only_video.get('width'),
2136 'height': the_only_video.get('height'),
2137 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2138 'fps': the_only_video.get('fps'),
2139 'dynamic_range': the_only_video.get('dynamic_range'),
2140 'vcodec': the_only_video.get('vcodec'),
2141 'vbr': the_only_video.get('vbr'),
2142 'stretched_ratio': the_only_video.get('stretched_ratio'),
2143 })
2144
2145 if the_only_audio:
2146 new_dict.update({
2147 'acodec': the_only_audio.get('acodec'),
2148 'abr': the_only_audio.get('abr'),
2149 'asr': the_only_audio.get('asr'),
2150 })
2151
2152 return new_dict
2153
2154 def _check_formats(formats):
2155 if not check_formats:
2156 yield from formats
2157 return
2158 yield from self._check_formats(formats)
2159
2160 def _build_selector_function(selector):
2161 if isinstance(selector, list): # ,
2162 fs = [_build_selector_function(s) for s in selector]
2163
2164 def selector_function(ctx):
2165 for f in fs:
2166 yield from f(ctx)
2167 return selector_function
2168
2169 elif selector.type == GROUP: # ()
2170 selector_function = _build_selector_function(selector.selector)
2171
2172 elif selector.type == PICKFIRST: # /
2173 fs = [_build_selector_function(s) for s in selector.selector]
2174
2175 def selector_function(ctx):
2176 for f in fs:
2177 picked_formats = list(f(ctx))
2178 if picked_formats:
2179 return picked_formats
2180 return []
2181
2182 elif selector.type == MERGE: # +
2183 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2184
2185 def selector_function(ctx):
2186 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2187 yield _merge(pair)
2188
2189 elif selector.type == SINGLE: # atom
2190 format_spec = selector.selector or 'best'
2191
2192 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2193 if format_spec == 'all':
2194 def selector_function(ctx):
2195 yield from _check_formats(ctx['formats'][::-1])
2196 elif format_spec == 'mergeall':
2197 def selector_function(ctx):
2198 formats = list(_check_formats(
2199 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2200 if not formats:
2201 return
2202 merged_format = formats[-1]
2203 for f in formats[-2::-1]:
2204 merged_format = _merge((merged_format, f))
2205 yield merged_format
2206
2207 else:
2208 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2209 mobj = re.match(
2210 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2211 format_spec)
2212 if mobj is not None:
2213 format_idx = int_or_none(mobj.group('n'), default=1)
2214 format_reverse = mobj.group('bw')[0] == 'b'
2215 format_type = (mobj.group('type') or [None])[0]
2216 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2217 format_modified = mobj.group('mod') is not None
2218
2219 format_fallback = not format_type and not format_modified # for b, w
2220 _filter_f = (
2221 (lambda f: f.get('%scodec' % format_type) != 'none')
2222 if format_type and format_modified # bv*, ba*, wv*, wa*
2223 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2224 if format_type # bv, ba, wv, wa
2225 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2226 if not format_modified # b, w
2227 else lambda f: True) # b*, w*
2228 filter_f = lambda f: _filter_f(f) and (
2229 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2230 else:
2231 if format_spec in self._format_selection_exts['audio']:
2232 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2233 elif format_spec in self._format_selection_exts['video']:
2234 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2235 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2236 elif format_spec in self._format_selection_exts['storyboards']:
2237 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2238 else:
2239 filter_f = lambda f: f.get('format_id') == format_spec # id
2240
2241 def selector_function(ctx):
2242 formats = list(ctx['formats'])
2243 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2244 if not matches:
2245 if format_fallback and ctx['incomplete_formats']:
2246 # for extractors with incomplete formats (audio only (soundcloud)
2247 # or video only (imgur)) best/worst will fallback to
2248 # best/worst {video,audio}-only format
2249 matches = formats
2250 elif seperate_fallback and not ctx['has_merged_format']:
2251 # for compatibility with youtube-dl when there is no pre-merged format
2252 matches = list(filter(seperate_fallback, formats))
2253 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2254 try:
2255 yield matches[format_idx - 1]
2256 except LazyList.IndexError:
2257 return
2258
2259 filters = [self._build_format_filter(f) for f in selector.filters]
2260
2261 def final_selector(ctx):
2262 ctx_copy = dict(ctx)
2263 for _filter in filters:
2264 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2265 return selector_function(ctx_copy)
2266 return final_selector
2267
2268 stream = io.BytesIO(format_spec.encode())
2269 try:
2270 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2271 except tokenize.TokenError:
2272 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2273
2274 class TokenIterator:
2275 def __init__(self, tokens):
2276 self.tokens = tokens
2277 self.counter = 0
2278
2279 def __iter__(self):
2280 return self
2281
2282 def __next__(self):
2283 if self.counter >= len(self.tokens):
2284 raise StopIteration()
2285 value = self.tokens[self.counter]
2286 self.counter += 1
2287 return value
2288
2289 next = __next__
2290
2291 def restore_last_token(self):
2292 self.counter -= 1
2293
2294 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2295 return _build_selector_function(parsed_selector)
2296
2297 def _calc_headers(self, info_dict):
2298 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2299
2300 cookies = self._calc_cookies(info_dict['url'])
2301 if cookies:
2302 res['Cookie'] = cookies
2303
2304 if 'X-Forwarded-For' not in res:
2305 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2306 if x_forwarded_for_ip:
2307 res['X-Forwarded-For'] = x_forwarded_for_ip
2308
2309 return res
2310
2311 def _calc_cookies(self, url):
2312 pr = sanitized_Request(url)
2313 self.cookiejar.add_cookie_header(pr)
2314 return pr.get_header('Cookie')
2315
2316 def _sort_thumbnails(self, thumbnails):
2317 thumbnails.sort(key=lambda t: (
2318 t.get('preference') if t.get('preference') is not None else -1,
2319 t.get('width') if t.get('width') is not None else -1,
2320 t.get('height') if t.get('height') is not None else -1,
2321 t.get('id') if t.get('id') is not None else '',
2322 t.get('url')))
2323
2324 def _sanitize_thumbnails(self, info_dict):
2325 thumbnails = info_dict.get('thumbnails')
2326 if thumbnails is None:
2327 thumbnail = info_dict.get('thumbnail')
2328 if thumbnail:
2329 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2330 if not thumbnails:
2331 return
2332
2333 def check_thumbnails(thumbnails):
2334 for t in thumbnails:
2335 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2336 try:
2337 self.urlopen(HEADRequest(t['url']))
2338 except network_exceptions as err:
2339 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2340 continue
2341 yield t
2342
2343 self._sort_thumbnails(thumbnails)
2344 for i, t in enumerate(thumbnails):
2345 if t.get('id') is None:
2346 t['id'] = '%d' % i
2347 if t.get('width') and t.get('height'):
2348 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2349 t['url'] = sanitize_url(t['url'])
2350
2351 if self.params.get('check_formats') is True:
2352 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2353 else:
2354 info_dict['thumbnails'] = thumbnails
2355
2356 def _fill_common_fields(self, info_dict, is_video=True):
2357 # TODO: move sanitization here
2358 if is_video:
2359 # playlists are allowed to lack "title"
2360 title = info_dict.get('title', NO_DEFAULT)
2361 if title is NO_DEFAULT:
2362 raise ExtractorError('Missing "title" field in extractor result',
2363 video_id=info_dict['id'], ie=info_dict['extractor'])
2364 info_dict['fulltitle'] = title
2365 if not title:
2366 if title == '':
2367 self.write_debug('Extractor gave empty title. Creating a generic title')
2368 else:
2369 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2370 info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
2371
2372 if info_dict.get('duration') is not None:
2373 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2374
2375 for ts_key, date_key in (
2376 ('timestamp', 'upload_date'),
2377 ('release_timestamp', 'release_date'),
2378 ('modified_timestamp', 'modified_date'),
2379 ):
2380 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2381 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2382 # see http://bugs.python.org/issue1646728)
2383 with contextlib.suppress(ValueError, OverflowError, OSError):
2384 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2385 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2386
2387 live_keys = ('is_live', 'was_live')
2388 live_status = info_dict.get('live_status')
2389 if live_status is None:
2390 for key in live_keys:
2391 if info_dict.get(key) is False:
2392 continue
2393 if info_dict.get(key):
2394 live_status = key
2395 break
2396 if all(info_dict.get(key) is False for key in live_keys):
2397 live_status = 'not_live'
2398 if live_status:
2399 info_dict['live_status'] = live_status
2400 for key in live_keys:
2401 if info_dict.get(key) is None:
2402 info_dict[key] = (live_status == key)
2403
2404 # Auto generate title fields corresponding to the *_number fields when missing
2405 # in order to always have clean titles. This is very common for TV series.
2406 for field in ('chapter', 'season', 'episode'):
2407 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2408 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2409
2410 def _raise_pending_errors(self, info):
2411 err = info.pop('__pending_error', None)
2412 if err:
2413 self.report_error(err, tb=False)
2414
2415 def process_video_result(self, info_dict, download=True):
2416 assert info_dict.get('_type', 'video') == 'video'
2417 self._num_videos += 1
2418
2419 if 'id' not in info_dict:
2420 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2421 elif not info_dict.get('id'):
2422 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2423
2424 def report_force_conversion(field, field_not, conversion):
2425 self.report_warning(
2426 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2427 % (field, field_not, conversion))
2428
2429 def sanitize_string_field(info, string_field):
2430 field = info.get(string_field)
2431 if field is None or isinstance(field, compat_str):
2432 return
2433 report_force_conversion(string_field, 'a string', 'string')
2434 info[string_field] = compat_str(field)
2435
2436 def sanitize_numeric_fields(info):
2437 for numeric_field in self._NUMERIC_FIELDS:
2438 field = info.get(numeric_field)
2439 if field is None or isinstance(field, (int, float)):
2440 continue
2441 report_force_conversion(numeric_field, 'numeric', 'int')
2442 info[numeric_field] = int_or_none(field)
2443
2444 sanitize_string_field(info_dict, 'id')
2445 sanitize_numeric_fields(info_dict)
2446 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2447 self.report_warning('"duration" field is negative, there is an error in extractor')
2448
2449 if 'playlist' not in info_dict:
2450 # It isn't part of a playlist
2451 info_dict['playlist'] = None
2452 info_dict['playlist_index'] = None
2453
2454 self._sanitize_thumbnails(info_dict)
2455
2456 thumbnail = info_dict.get('thumbnail')
2457 thumbnails = info_dict.get('thumbnails')
2458 if thumbnail:
2459 info_dict['thumbnail'] = sanitize_url(thumbnail)
2460 elif thumbnails:
2461 info_dict['thumbnail'] = thumbnails[-1]['url']
2462
2463 if info_dict.get('display_id') is None and 'id' in info_dict:
2464 info_dict['display_id'] = info_dict['id']
2465
2466 self._fill_common_fields(info_dict)
2467
2468 for cc_kind in ('subtitles', 'automatic_captions'):
2469 cc = info_dict.get(cc_kind)
2470 if cc:
2471 for _, subtitle in cc.items():
2472 for subtitle_format in subtitle:
2473 if subtitle_format.get('url'):
2474 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2475 if subtitle_format.get('ext') is None:
2476 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2477
2478 automatic_captions = info_dict.get('automatic_captions')
2479 subtitles = info_dict.get('subtitles')
2480
2481 info_dict['requested_subtitles'] = self.process_subtitles(
2482 info_dict['id'], subtitles, automatic_captions)
2483
2484 if info_dict.get('formats') is None:
2485 # There's only one format available
2486 formats = [info_dict]
2487 else:
2488 formats = info_dict['formats']
2489
2490 # or None ensures --clean-infojson removes it
2491 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2492 if not self.params.get('allow_unplayable_formats'):
2493 formats = [f for f in formats if not f.get('has_drm')]
2494 if info_dict['_has_drm'] and all(
2495 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2496 self.report_warning(
2497 'This video is DRM protected and only images are available for download. '
2498 'Use --list-formats to see them')
2499
2500 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2501 if not get_from_start:
2502 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2503 if info_dict.get('is_live') and formats:
2504 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2505 if get_from_start and not formats:
2506 self.raise_no_formats(info_dict, msg=(
2507 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2508 'If you want to download from the current time, use --no-live-from-start'))
2509
2510 if not formats:
2511 self.raise_no_formats(info_dict)
2512
2513 def is_wellformed(f):
2514 url = f.get('url')
2515 if not url:
2516 self.report_warning(
2517 '"url" field is missing or empty - skipping format, '
2518 'there is an error in extractor')
2519 return False
2520 if isinstance(url, bytes):
2521 sanitize_string_field(f, 'url')
2522 return True
2523
2524 # Filter out malformed formats for better extraction robustness
2525 formats = list(filter(is_wellformed, formats))
2526
2527 formats_dict = {}
2528
2529 # We check that all the formats have the format and format_id fields
2530 for i, format in enumerate(formats):
2531 sanitize_string_field(format, 'format_id')
2532 sanitize_numeric_fields(format)
2533 format['url'] = sanitize_url(format['url'])
2534 if not format.get('format_id'):
2535 format['format_id'] = compat_str(i)
2536 else:
2537 # Sanitize format_id from characters used in format selector expression
2538 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2539 format_id = format['format_id']
2540 if format_id not in formats_dict:
2541 formats_dict[format_id] = []
2542 formats_dict[format_id].append(format)
2543
2544 # Make sure all formats have unique format_id
2545 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2546 for format_id, ambiguous_formats in formats_dict.items():
2547 ambigious_id = len(ambiguous_formats) > 1
2548 for i, format in enumerate(ambiguous_formats):
2549 if ambigious_id:
2550 format['format_id'] = '%s-%d' % (format_id, i)
2551 if format.get('ext') is None:
2552 format['ext'] = determine_ext(format['url']).lower()
2553 # Ensure there is no conflict between id and ext in format selection
2554 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2555 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2556 format['format_id'] = 'f%s' % format['format_id']
2557
2558 for i, format in enumerate(formats):
2559 if format.get('format') is None:
2560 format['format'] = '{id} - {res}{note}'.format(
2561 id=format['format_id'],
2562 res=self.format_resolution(format),
2563 note=format_field(format, 'format_note', ' (%s)'),
2564 )
2565 if format.get('protocol') is None:
2566 format['protocol'] = determine_protocol(format)
2567 if format.get('resolution') is None:
2568 format['resolution'] = self.format_resolution(format, default=None)
2569 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2570 format['dynamic_range'] = 'SDR'
2571 if (info_dict.get('duration') and format.get('tbr')
2572 and not format.get('filesize') and not format.get('filesize_approx')):
2573 format['filesize_approx'] = int(info_dict['duration'] * format['tbr'] * (1024 / 8))
2574
2575 # Add HTTP headers, so that external programs can use them from the
2576 # json output
2577 full_format_info = info_dict.copy()
2578 full_format_info.update(format)
2579 format['http_headers'] = self._calc_headers(full_format_info)
2580 # Remove private housekeeping stuff
2581 if '__x_forwarded_for_ip' in info_dict:
2582 del info_dict['__x_forwarded_for_ip']
2583
2584 if self.params.get('check_formats') is True:
2585 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2586
2587 if not formats or formats[0] is not info_dict:
2588 # only set the 'formats' fields if the original info_dict list them
2589 # otherwise we end up with a circular reference, the first (and unique)
2590 # element in the 'formats' field in info_dict is info_dict itself,
2591 # which can't be exported to json
2592 info_dict['formats'] = formats
2593
2594 info_dict, _ = self.pre_process(info_dict)
2595
2596 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2597 return info_dict
2598
2599 self.post_extract(info_dict)
2600 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2601
2602 # The pre-processors may have modified the formats
2603 formats = info_dict.get('formats', [info_dict])
2604
2605 list_only = self.params.get('simulate') is None and (
2606 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2607 interactive_format_selection = not list_only and self.format_selector == '-'
2608 if self.params.get('list_thumbnails'):
2609 self.list_thumbnails(info_dict)
2610 if self.params.get('listsubtitles'):
2611 if 'automatic_captions' in info_dict:
2612 self.list_subtitles(
2613 info_dict['id'], automatic_captions, 'automatic captions')
2614 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2615 if self.params.get('listformats') or interactive_format_selection:
2616 self.list_formats(info_dict)
2617 if list_only:
2618 # Without this printing, -F --print-json will not work
2619 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2620 return info_dict
2621
2622 format_selector = self.format_selector
2623 if format_selector is None:
2624 req_format = self._default_format_spec(info_dict, download=download)
2625 self.write_debug('Default format spec: %s' % req_format)
2626 format_selector = self.build_format_selector(req_format)
2627
2628 while True:
2629 if interactive_format_selection:
2630 req_format = input(
2631 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2632 try:
2633 format_selector = self.build_format_selector(req_format)
2634 except SyntaxError as err:
2635 self.report_error(err, tb=False, is_error=False)
2636 continue
2637
2638 formats_to_download = list(format_selector({
2639 'formats': formats,
2640 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2641 'incomplete_formats': (
2642 # All formats are video-only or
2643 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2644 # all formats are audio-only
2645 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2646 }))
2647 if interactive_format_selection and not formats_to_download:
2648 self.report_error('Requested format is not available', tb=False, is_error=False)
2649 continue
2650 break
2651
2652 if not formats_to_download:
2653 if not self.params.get('ignore_no_formats_error'):
2654 raise ExtractorError(
2655 'Requested format is not available. Use --list-formats for a list of available formats',
2656 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2657 self.report_warning('Requested format is not available')
2658 # Process what we can, even without any available formats.
2659 formats_to_download = [{}]
2660
2661 requested_ranges = self.params.get('download_ranges')
2662 if requested_ranges:
2663 requested_ranges = tuple(requested_ranges(info_dict, self))
2664
2665 best_format, downloaded_formats = formats_to_download[-1], []
2666 if download:
2667 if best_format:
2668 def to_screen(*msg):
2669 self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
2670
2671 to_screen(f'Downloading {len(formats_to_download)} format(s):',
2672 (f['format_id'] for f in formats_to_download))
2673 if requested_ranges:
2674 to_screen(f'Downloading {len(requested_ranges)} time ranges:',
2675 (f'{int(c["start_time"])}-{int(c["end_time"])}' for c in requested_ranges))
2676 max_downloads_reached = False
2677
2678 for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
2679 new_info = self._copy_infodict(info_dict)
2680 new_info.update(fmt)
2681 if chapter:
2682 new_info.update({
2683 'section_start': chapter.get('start_time'),
2684 'section_end': chapter.get('end_time', 0),
2685 'section_title': chapter.get('title'),
2686 'section_number': chapter.get('index'),
2687 })
2688 downloaded_formats.append(new_info)
2689 try:
2690 self.process_info(new_info)
2691 except MaxDownloadsReached:
2692 max_downloads_reached = True
2693 self._raise_pending_errors(new_info)
2694 # Remove copied info
2695 for key, val in tuple(new_info.items()):
2696 if info_dict.get(key) == val:
2697 new_info.pop(key)
2698 if max_downloads_reached:
2699 break
2700
2701 write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats}
2702 assert write_archive.issubset({True, False, 'ignore'})
2703 if True in write_archive and False not in write_archive:
2704 self.record_download_archive(info_dict)
2705
2706 info_dict['requested_downloads'] = downloaded_formats
2707 info_dict = self.run_all_pps('after_video', info_dict)
2708 if max_downloads_reached:
2709 raise MaxDownloadsReached()
2710
2711 # We update the info dict with the selected best quality format (backwards compatibility)
2712 info_dict.update(best_format)
2713 return info_dict
2714
2715 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2716 """Select the requested subtitles and their format"""
2717 available_subs, normal_sub_langs = {}, []
2718 if normal_subtitles and self.params.get('writesubtitles'):
2719 available_subs.update(normal_subtitles)
2720 normal_sub_langs = tuple(normal_subtitles.keys())
2721 if automatic_captions and self.params.get('writeautomaticsub'):
2722 for lang, cap_info in automatic_captions.items():
2723 if lang not in available_subs:
2724 available_subs[lang] = cap_info
2725
2726 if (not self.params.get('writesubtitles') and not
2727 self.params.get('writeautomaticsub') or not
2728 available_subs):
2729 return None
2730
2731 all_sub_langs = tuple(available_subs.keys())
2732 if self.params.get('allsubtitles', False):
2733 requested_langs = all_sub_langs
2734 elif self.params.get('subtitleslangs', False):
2735 # A list is used so that the order of languages will be the same as
2736 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2737 requested_langs = []
2738 for lang_re in self.params.get('subtitleslangs'):
2739 discard = lang_re[0] == '-'
2740 if discard:
2741 lang_re = lang_re[1:]
2742 if lang_re == 'all':
2743 if discard:
2744 requested_langs = []
2745 else:
2746 requested_langs.extend(all_sub_langs)
2747 continue
2748 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2749 if discard:
2750 for lang in current_langs:
2751 while lang in requested_langs:
2752 requested_langs.remove(lang)
2753 else:
2754 requested_langs.extend(current_langs)
2755 requested_langs = orderedSet(requested_langs)
2756 elif normal_sub_langs:
2757 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2758 else:
2759 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2760 if requested_langs:
2761 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2762
2763 formats_query = self.params.get('subtitlesformat', 'best')
2764 formats_preference = formats_query.split('/') if formats_query else []
2765 subs = {}
2766 for lang in requested_langs:
2767 formats = available_subs.get(lang)
2768 if formats is None:
2769 self.report_warning(f'{lang} subtitles not available for {video_id}')
2770 continue
2771 for ext in formats_preference:
2772 if ext == 'best':
2773 f = formats[-1]
2774 break
2775 matches = list(filter(lambda f: f['ext'] == ext, formats))
2776 if matches:
2777 f = matches[-1]
2778 break
2779 else:
2780 f = formats[-1]
2781 self.report_warning(
2782 'No subtitle format found matching "%s" for language %s, '
2783 'using %s' % (formats_query, lang, f['ext']))
2784 subs[lang] = f
2785 return subs
2786
2787 def _forceprint(self, key, info_dict):
2788 if info_dict is None:
2789 return
2790 info_copy = info_dict.copy()
2791 info_copy['formats_table'] = self.render_formats_table(info_dict)
2792 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2793 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2794 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2795
2796 def format_tmpl(tmpl):
2797 mobj = re.match(r'\w+(=?)$', tmpl)
2798 if mobj and mobj.group(1):
2799 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2800 elif mobj:
2801 return f'%({tmpl})s'
2802 return tmpl
2803
2804 for tmpl in self.params['forceprint'].get(key, []):
2805 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2806
2807 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2808 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2809 tmpl = format_tmpl(tmpl)
2810 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2811 if self._ensure_dir_exists(filename):
2812 with open(filename, 'a', encoding='utf-8') as f:
2813 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2814
2815 def __forced_printings(self, info_dict, filename, incomplete):
2816 def print_mandatory(field, actual_field=None):
2817 if actual_field is None:
2818 actual_field = field
2819 if (self.params.get('force%s' % field, False)
2820 and (not incomplete or info_dict.get(actual_field) is not None)):
2821 self.to_stdout(info_dict[actual_field])
2822
2823 def print_optional(field):
2824 if (self.params.get('force%s' % field, False)
2825 and info_dict.get(field) is not None):
2826 self.to_stdout(info_dict[field])
2827
2828 info_dict = info_dict.copy()
2829 if filename is not None:
2830 info_dict['filename'] = filename
2831 if info_dict.get('requested_formats') is not None:
2832 # For RTMP URLs, also include the playpath
2833 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2834 elif info_dict.get('url'):
2835 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2836
2837 if (self.params.get('forcejson')
2838 or self.params['forceprint'].get('video')
2839 or self.params['print_to_file'].get('video')):
2840 self.post_extract(info_dict)
2841 self._forceprint('video', info_dict)
2842
2843 print_mandatory('title')
2844 print_mandatory('id')
2845 print_mandatory('url', 'urls')
2846 print_optional('thumbnail')
2847 print_optional('description')
2848 print_optional('filename')
2849 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2850 self.to_stdout(formatSeconds(info_dict['duration']))
2851 print_mandatory('format')
2852
2853 if self.params.get('forcejson'):
2854 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2855
2856 def dl(self, name, info, subtitle=False, test=False):
2857 if not info.get('url'):
2858 self.raise_no_formats(info, True)
2859
2860 if test:
2861 verbose = self.params.get('verbose')
2862 params = {
2863 'test': True,
2864 'quiet': self.params.get('quiet') or not verbose,
2865 'verbose': verbose,
2866 'noprogress': not verbose,
2867 'nopart': True,
2868 'skip_unavailable_fragments': False,
2869 'keep_fragments': False,
2870 'overwrites': True,
2871 '_no_ytdl_file': True,
2872 }
2873 else:
2874 params = self.params
2875 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2876 if not test:
2877 for ph in self._progress_hooks:
2878 fd.add_progress_hook(ph)
2879 urls = '", "'.join(
2880 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2881 for f in info.get('requested_formats', []) or [info])
2882 self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"')
2883
2884 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2885 # But it may contain objects that are not deep-copyable
2886 new_info = self._copy_infodict(info)
2887 if new_info.get('http_headers') is None:
2888 new_info['http_headers'] = self._calc_headers(new_info)
2889 return fd.download(name, new_info, subtitle)
2890
2891 def existing_file(self, filepaths, *, default_overwrite=True):
2892 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2893 if existing_files and not self.params.get('overwrites', default_overwrite):
2894 return existing_files[0]
2895
2896 for file in existing_files:
2897 self.report_file_delete(file)
2898 os.remove(file)
2899 return None
2900
2901 def process_info(self, info_dict):
2902 """Process a single resolved IE result. (Modifies it in-place)"""
2903
2904 assert info_dict.get('_type', 'video') == 'video'
2905 original_infodict = info_dict
2906
2907 if 'format' not in info_dict and 'ext' in info_dict:
2908 info_dict['format'] = info_dict['ext']
2909
2910 # This is mostly just for backward compatibility of process_info
2911 # As a side-effect, this allows for format-specific filters
2912 if self._match_entry(info_dict) is not None:
2913 info_dict['__write_download_archive'] = 'ignore'
2914 return
2915
2916 # Does nothing under normal operation - for backward compatibility of process_info
2917 self.post_extract(info_dict)
2918 self._num_downloads += 1
2919
2920 # info_dict['_filename'] needs to be set for backward compatibility
2921 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2922 temp_filename = self.prepare_filename(info_dict, 'temp')
2923 files_to_move = {}
2924
2925 # Forced printings
2926 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2927
2928 def check_max_downloads():
2929 if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'):
2930 raise MaxDownloadsReached()
2931
2932 if self.params.get('simulate'):
2933 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2934 check_max_downloads()
2935 return
2936
2937 if full_filename is None:
2938 return
2939 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2940 return
2941 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2942 return
2943
2944 if self._write_description('video', info_dict,
2945 self.prepare_filename(info_dict, 'description')) is None:
2946 return
2947
2948 sub_files = self._write_subtitles(info_dict, temp_filename)
2949 if sub_files is None:
2950 return
2951 files_to_move.update(dict(sub_files))
2952
2953 thumb_files = self._write_thumbnails(
2954 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2955 if thumb_files is None:
2956 return
2957 files_to_move.update(dict(thumb_files))
2958
2959 infofn = self.prepare_filename(info_dict, 'infojson')
2960 _infojson_written = self._write_info_json('video', info_dict, infofn)
2961 if _infojson_written:
2962 info_dict['infojson_filename'] = infofn
2963 # For backward compatibility, even though it was a private field
2964 info_dict['__infojson_filename'] = infofn
2965 elif _infojson_written is None:
2966 return
2967
2968 # Note: Annotations are deprecated
2969 annofn = None
2970 if self.params.get('writeannotations', False):
2971 annofn = self.prepare_filename(info_dict, 'annotation')
2972 if annofn:
2973 if not self._ensure_dir_exists(encodeFilename(annofn)):
2974 return
2975 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2976 self.to_screen('[info] Video annotations are already present')
2977 elif not info_dict.get('annotations'):
2978 self.report_warning('There are no annotations to write.')
2979 else:
2980 try:
2981 self.to_screen('[info] Writing video annotations to: ' + annofn)
2982 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2983 annofile.write(info_dict['annotations'])
2984 except (KeyError, TypeError):
2985 self.report_warning('There are no annotations to write.')
2986 except OSError:
2987 self.report_error('Cannot write annotations file: ' + annofn)
2988 return
2989
2990 # Write internet shortcut files
2991 def _write_link_file(link_type):
2992 url = try_get(info_dict['webpage_url'], iri_to_uri)
2993 if not url:
2994 self.report_warning(
2995 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2996 return True
2997 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2998 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2999 return False
3000 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
3001 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
3002 return True
3003 try:
3004 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
3005 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
3006 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
3007 template_vars = {'url': url}
3008 if link_type == 'desktop':
3009 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
3010 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
3011 except OSError:
3012 self.report_error(f'Cannot write internet shortcut {linkfn}')
3013 return False
3014 return True
3015
3016 write_links = {
3017 'url': self.params.get('writeurllink'),
3018 'webloc': self.params.get('writewebloclink'),
3019 'desktop': self.params.get('writedesktoplink'),
3020 }
3021 if self.params.get('writelink'):
3022 link_type = ('webloc' if sys.platform == 'darwin'
3023 else 'desktop' if sys.platform.startswith('linux')
3024 else 'url')
3025 write_links[link_type] = True
3026
3027 if any(should_write and not _write_link_file(link_type)
3028 for link_type, should_write in write_links.items()):
3029 return
3030
3031 def replace_info_dict(new_info):
3032 nonlocal info_dict
3033 if new_info == info_dict:
3034 return
3035 info_dict.clear()
3036 info_dict.update(new_info)
3037
3038 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
3039 replace_info_dict(new_info)
3040
3041 if self.params.get('skip_download'):
3042 info_dict['filepath'] = temp_filename
3043 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3044 info_dict['__files_to_move'] = files_to_move
3045 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3046 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3047 else:
3048 # Download
3049 info_dict.setdefault('__postprocessors', [])
3050 try:
3051
3052 def existing_video_file(*filepaths):
3053 ext = info_dict.get('ext')
3054 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3055 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3056 default_overwrite=False)
3057 if file:
3058 info_dict['ext'] = os.path.splitext(file)[1][1:]
3059 return file
3060
3061 success = True
3062 merger, fd = FFmpegMergerPP(self), None
3063 if info_dict.get('url'):
3064 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3065 if fd is not FFmpegFD and (
3066 info_dict.get('section_start') or info_dict.get('section_end')):
3067 msg = ('This format cannot be partially downloaded' if merger.available
3068 else 'You have requested downloading the video partially, but ffmpeg is not installed')
3069 self.report_error(f'{msg}. Aborting')
3070 return
3071
3072 if info_dict.get('requested_formats') is not None:
3073
3074 def compatible_formats(formats):
3075 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3076 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3077 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3078 if len(video_formats) > 2 or len(audio_formats) > 2:
3079 return False
3080
3081 # Check extension
3082 exts = {format.get('ext') for format in formats}
3083 COMPATIBLE_EXTS = (
3084 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3085 {'webm'},
3086 )
3087 for ext_sets in COMPATIBLE_EXTS:
3088 if ext_sets.issuperset(exts):
3089 return True
3090 # TODO: Check acodec/vcodec
3091 return False
3092
3093 requested_formats = info_dict['requested_formats']
3094 old_ext = info_dict['ext']
3095 if self.params.get('merge_output_format') is None:
3096 if not compatible_formats(requested_formats):
3097 info_dict['ext'] = 'mkv'
3098 self.report_warning(
3099 'Requested formats are incompatible for merge and will be merged into mkv')
3100 if (info_dict['ext'] == 'webm'
3101 and info_dict.get('thumbnails')
3102 # check with type instead of pp_key, __name__, or isinstance
3103 # since we dont want any custom PPs to trigger this
3104 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721
3105 info_dict['ext'] = 'mkv'
3106 self.report_warning(
3107 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3108 new_ext = info_dict['ext']
3109
3110 def correct_ext(filename, ext=new_ext):
3111 if filename == '-':
3112 return filename
3113 filename_real_ext = os.path.splitext(filename)[1][1:]
3114 filename_wo_ext = (
3115 os.path.splitext(filename)[0]
3116 if filename_real_ext in (old_ext, new_ext)
3117 else filename)
3118 return f'{filename_wo_ext}.{ext}'
3119
3120 # Ensure filename always has a correct extension for successful merge
3121 full_filename = correct_ext(full_filename)
3122 temp_filename = correct_ext(temp_filename)
3123 dl_filename = existing_video_file(full_filename, temp_filename)
3124 info_dict['__real_download'] = False
3125
3126 downloaded = []
3127 if dl_filename is not None:
3128 self.report_file_already_downloaded(dl_filename)
3129 elif fd:
3130 for f in requested_formats if fd != FFmpegFD else []:
3131 f['filepath'] = fname = prepend_extension(
3132 correct_ext(temp_filename, info_dict['ext']),
3133 'f%s' % f['format_id'], info_dict['ext'])
3134 downloaded.append(fname)
3135 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3136 success, real_download = self.dl(temp_filename, info_dict)
3137 info_dict['__real_download'] = real_download
3138 else:
3139 if self.params.get('allow_unplayable_formats'):
3140 self.report_warning(
3141 'You have requested merging of multiple formats '
3142 'while also allowing unplayable formats to be downloaded. '
3143 'The formats won\'t be merged to prevent data corruption.')
3144 elif not merger.available:
3145 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3146 if not self.params.get('ignoreerrors'):
3147 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3148 return
3149 self.report_warning(f'{msg}. The formats won\'t be merged')
3150
3151 if temp_filename == '-':
3152 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3153 else 'but the formats are incompatible for simultaneous download' if merger.available
3154 else 'but ffmpeg is not installed')
3155 self.report_warning(
3156 f'You have requested downloading multiple formats to stdout {reason}. '
3157 'The formats will be streamed one after the other')
3158 fname = temp_filename
3159 for f in requested_formats:
3160 new_info = dict(info_dict)
3161 del new_info['requested_formats']
3162 new_info.update(f)
3163 if temp_filename != '-':
3164 fname = prepend_extension(
3165 correct_ext(temp_filename, new_info['ext']),
3166 'f%s' % f['format_id'], new_info['ext'])
3167 if not self._ensure_dir_exists(fname):
3168 return
3169 f['filepath'] = fname
3170 downloaded.append(fname)
3171 partial_success, real_download = self.dl(fname, new_info)
3172 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3173 success = success and partial_success
3174
3175 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3176 info_dict['__postprocessors'].append(merger)
3177 info_dict['__files_to_merge'] = downloaded
3178 # Even if there were no downloads, it is being merged only now
3179 info_dict['__real_download'] = True
3180 else:
3181 for file in downloaded:
3182 files_to_move[file] = None
3183 else:
3184 # Just a single file
3185 dl_filename = existing_video_file(full_filename, temp_filename)
3186 if dl_filename is None or dl_filename == temp_filename:
3187 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3188 # So we should try to resume the download
3189 success, real_download = self.dl(temp_filename, info_dict)
3190 info_dict['__real_download'] = real_download
3191 else:
3192 self.report_file_already_downloaded(dl_filename)
3193
3194 dl_filename = dl_filename or temp_filename
3195 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3196
3197 except network_exceptions as err:
3198 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3199 return
3200 except OSError as err:
3201 raise UnavailableVideoError(err)
3202 except (ContentTooShortError, ) as err:
3203 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3204 return
3205
3206 self._raise_pending_errors(info_dict)
3207 if success and full_filename != '-':
3208
3209 def fixup():
3210 do_fixup = True
3211 fixup_policy = self.params.get('fixup')
3212 vid = info_dict['id']
3213
3214 if fixup_policy in ('ignore', 'never'):
3215 return
3216 elif fixup_policy == 'warn':
3217 do_fixup = 'warn'
3218 elif fixup_policy != 'force':
3219 assert fixup_policy in ('detect_or_warn', None)
3220 if not info_dict.get('__real_download'):
3221 do_fixup = False
3222
3223 def ffmpeg_fixup(cndn, msg, cls):
3224 if not (do_fixup and cndn):
3225 return
3226 elif do_fixup == 'warn':
3227 self.report_warning(f'{vid}: {msg}')
3228 return
3229 pp = cls(self)
3230 if pp.available:
3231 info_dict['__postprocessors'].append(pp)
3232 else:
3233 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3234
3235 stretched_ratio = info_dict.get('stretched_ratio')
3236 ffmpeg_fixup(
3237 stretched_ratio not in (1, None),
3238 f'Non-uniform pixel ratio {stretched_ratio}',
3239 FFmpegFixupStretchedPP)
3240
3241 ffmpeg_fixup(
3242 (info_dict.get('requested_formats') is None
3243 and info_dict.get('container') == 'm4a_dash'
3244 and info_dict.get('ext') == 'm4a'),
3245 'writing DASH m4a. Only some players support this container',
3246 FFmpegFixupM4aPP)
3247
3248 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3249 downloader = downloader.FD_NAME if downloader else None
3250
3251 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3252 ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts')
3253 or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
3254 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3255 FFmpegFixupM3u8PP)
3256 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3257 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3258
3259 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3260 ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP)
3261
3262 fixup()
3263 try:
3264 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3265 except PostProcessingError as err:
3266 self.report_error('Postprocessing: %s' % str(err))
3267 return
3268 try:
3269 for ph in self._post_hooks:
3270 ph(info_dict['filepath'])
3271 except Exception as err:
3272 self.report_error('post hooks: %s' % str(err))
3273 return
3274 info_dict['__write_download_archive'] = True
3275
3276 assert info_dict is original_infodict # Make sure the info_dict was modified in-place
3277 if self.params.get('force_write_download_archive'):
3278 info_dict['__write_download_archive'] = True
3279 check_max_downloads()
3280
3281 def __download_wrapper(self, func):
3282 @functools.wraps(func)
3283 def wrapper(*args, **kwargs):
3284 try:
3285 res = func(*args, **kwargs)
3286 except UnavailableVideoError as e:
3287 self.report_error(e)
3288 except DownloadCancelled as e:
3289 self.to_screen(f'[info] {e}')
3290 if not self.params.get('break_per_url'):
3291 raise
3292 else:
3293 if self.params.get('dump_single_json', False):
3294 self.post_extract(res)
3295 self.to_stdout(json.dumps(self.sanitize_info(res)))
3296 return wrapper
3297
3298 def download(self, url_list):
3299 """Download a given list of URLs."""
3300 url_list = variadic(url_list) # Passing a single URL is a common mistake
3301 outtmpl = self.outtmpl_dict['default']
3302 if (len(url_list) > 1
3303 and outtmpl != '-'
3304 and '%' not in outtmpl
3305 and self.params.get('max_downloads') != 1):
3306 raise SameFileError(outtmpl)
3307
3308 for url in url_list:
3309 self.__download_wrapper(self.extract_info)(
3310 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3311
3312 return self._download_retcode
3313
3314 def download_with_info_file(self, info_filename):
3315 with contextlib.closing(fileinput.FileInput(
3316 [info_filename], mode='r',
3317 openhook=fileinput.hook_encoded('utf-8'))) as f:
3318 # FileInput doesn't have a read method, we can't call json.load
3319 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3320 try:
3321 self.__download_wrapper(self.process_ie_result)(info, download=True)
3322 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3323 if not isinstance(e, EntryNotInPlaylist):
3324 self.to_stderr('\r')
3325 webpage_url = info.get('webpage_url')
3326 if webpage_url is not None:
3327 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3328 return self.download([webpage_url])
3329 else:
3330 raise
3331 return self._download_retcode
3332
3333 @staticmethod
3334 def sanitize_info(info_dict, remove_private_keys=False):
3335 ''' Sanitize the infodict for converting to json '''
3336 if info_dict is None:
3337 return info_dict
3338 info_dict.setdefault('epoch', int(time.time()))
3339 info_dict.setdefault('_type', 'video')
3340
3341 if remove_private_keys:
3342 reject = lambda k, v: v is None or k.startswith('__') or k in {
3343 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3344 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3345 }
3346 else:
3347 reject = lambda k, v: False
3348
3349 def filter_fn(obj):
3350 if isinstance(obj, dict):
3351 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3352 elif isinstance(obj, (list, tuple, set, LazyList)):
3353 return list(map(filter_fn, obj))
3354 elif obj is None or isinstance(obj, (str, int, float, bool)):
3355 return obj
3356 else:
3357 return repr(obj)
3358
3359 return filter_fn(info_dict)
3360
3361 @staticmethod
3362 def filter_requested_info(info_dict, actually_filter=True):
3363 ''' Alias of sanitize_info for backward compatibility '''
3364 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3365
3366 def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
3367 for filename in set(filter(None, files_to_delete)):
3368 if msg:
3369 self.to_screen(msg % filename)
3370 try:
3371 os.remove(filename)
3372 except OSError:
3373 self.report_warning(f'Unable to delete file {filename}')
3374 if filename in info.get('__files_to_move', []): # NB: Delete even if None
3375 del info['__files_to_move'][filename]
3376
3377 @staticmethod
3378 def post_extract(info_dict):
3379 def actual_post_extract(info_dict):
3380 if info_dict.get('_type') in ('playlist', 'multi_video'):
3381 for video_dict in info_dict.get('entries', {}):
3382 actual_post_extract(video_dict or {})
3383 return
3384
3385 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3386 info_dict.update(post_extractor())
3387
3388 actual_post_extract(info_dict or {})
3389
3390 def run_pp(self, pp, infodict):
3391 files_to_delete = []
3392 if '__files_to_move' not in infodict:
3393 infodict['__files_to_move'] = {}
3394 try:
3395 files_to_delete, infodict = pp.run(infodict)
3396 except PostProcessingError as e:
3397 # Must be True and not 'only_download'
3398 if self.params.get('ignoreerrors') is True:
3399 self.report_error(e)
3400 return infodict
3401 raise
3402
3403 if not files_to_delete:
3404 return infodict
3405 if self.params.get('keepvideo', False):
3406 for f in files_to_delete:
3407 infodict['__files_to_move'].setdefault(f, '')
3408 else:
3409 self._delete_downloaded_files(
3410 *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
3411 return infodict
3412
3413 def run_all_pps(self, key, info, *, additional_pps=None):
3414 self._forceprint(key, info)
3415 for pp in (additional_pps or []) + self._pps[key]:
3416 info = self.run_pp(pp, info)
3417 return info
3418
3419 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3420 info = dict(ie_info)
3421 info['__files_to_move'] = files_to_move or {}
3422 try:
3423 info = self.run_all_pps(key, info)
3424 except PostProcessingError as err:
3425 msg = f'Preprocessing: {err}'
3426 info.setdefault('__pending_error', msg)
3427 self.report_error(msg, is_error=False)
3428 return info, info.pop('__files_to_move', None)
3429
3430 def post_process(self, filename, info, files_to_move=None):
3431 """Run all the postprocessors on the given file."""
3432 info['filepath'] = filename
3433 info['__files_to_move'] = files_to_move or {}
3434 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3435 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3436 del info['__files_to_move']
3437 return self.run_all_pps('after_move', info)
3438
3439 def _make_archive_id(self, info_dict):
3440 video_id = info_dict.get('id')
3441 if not video_id:
3442 return
3443 # Future-proof against any change in case
3444 # and backwards compatibility with prior versions
3445 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3446 if extractor is None:
3447 url = str_or_none(info_dict.get('url'))
3448 if not url:
3449 return
3450 # Try to find matching extractor for the URL and take its ie_key
3451 for ie_key, ie in self._ies.items():
3452 if ie.suitable(url):
3453 extractor = ie_key
3454 break
3455 else:
3456 return
3457 return f'{extractor.lower()} {video_id}'
3458
3459 def in_download_archive(self, info_dict):
3460 fn = self.params.get('download_archive')
3461 if fn is None:
3462 return False
3463
3464 vid_id = self._make_archive_id(info_dict)
3465 if not vid_id:
3466 return False # Incomplete video information
3467
3468 return vid_id in self.archive
3469
3470 def record_download_archive(self, info_dict):
3471 fn = self.params.get('download_archive')
3472 if fn is None:
3473 return
3474 vid_id = self._make_archive_id(info_dict)
3475 assert vid_id
3476 self.write_debug(f'Adding to archive: {vid_id}')
3477 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3478 archive_file.write(vid_id + '\n')
3479 self.archive.add(vid_id)
3480
3481 @staticmethod
3482 def format_resolution(format, default='unknown'):
3483 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3484 return 'audio only'
3485 if format.get('resolution') is not None:
3486 return format['resolution']
3487 if format.get('width') and format.get('height'):
3488 return '%dx%d' % (format['width'], format['height'])
3489 elif format.get('height'):
3490 return '%sp' % format['height']
3491 elif format.get('width'):
3492 return '%dx?' % format['width']
3493 return default
3494
3495 def _list_format_headers(self, *headers):
3496 if self.params.get('listformats_table', True) is not False:
3497 return [self._format_out(header, self.Styles.HEADERS) for header in headers]
3498 return headers
3499
3500 def _format_note(self, fdict):
3501 res = ''
3502 if fdict.get('ext') in ['f4f', 'f4m']:
3503 res += '(unsupported)'
3504 if fdict.get('language'):
3505 if res:
3506 res += ' '
3507 res += '[%s]' % fdict['language']
3508 if fdict.get('format_note') is not None:
3509 if res:
3510 res += ' '
3511 res += fdict['format_note']
3512 if fdict.get('tbr') is not None:
3513 if res:
3514 res += ', '
3515 res += '%4dk' % fdict['tbr']
3516 if fdict.get('container') is not None:
3517 if res:
3518 res += ', '
3519 res += '%s container' % fdict['container']
3520 if (fdict.get('vcodec') is not None
3521 and fdict.get('vcodec') != 'none'):
3522 if res:
3523 res += ', '
3524 res += fdict['vcodec']
3525 if fdict.get('vbr') is not None:
3526 res += '@'
3527 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3528 res += 'video@'
3529 if fdict.get('vbr') is not None:
3530 res += '%4dk' % fdict['vbr']
3531 if fdict.get('fps') is not None:
3532 if res:
3533 res += ', '
3534 res += '%sfps' % fdict['fps']
3535 if fdict.get('acodec') is not None:
3536 if res:
3537 res += ', '
3538 if fdict['acodec'] == 'none':
3539 res += 'video only'
3540 else:
3541 res += '%-5s' % fdict['acodec']
3542 elif fdict.get('abr') is not None:
3543 if res:
3544 res += ', '
3545 res += 'audio'
3546 if fdict.get('abr') is not None:
3547 res += '@%3dk' % fdict['abr']
3548 if fdict.get('asr') is not None:
3549 res += ' (%5dHz)' % fdict['asr']
3550 if fdict.get('filesize') is not None:
3551 if res:
3552 res += ', '
3553 res += format_bytes(fdict['filesize'])
3554 elif fdict.get('filesize_approx') is not None:
3555 if res:
3556 res += ', '
3557 res += '~' + format_bytes(fdict['filesize_approx'])
3558 return res
3559
3560 def render_formats_table(self, info_dict):
3561 if not info_dict.get('formats') and not info_dict.get('url'):
3562 return None
3563
3564 formats = info_dict.get('formats', [info_dict])
3565 if not self.params.get('listformats_table', True) is not False:
3566 table = [
3567 [
3568 format_field(f, 'format_id'),
3569 format_field(f, 'ext'),
3570 self.format_resolution(f),
3571 self._format_note(f)
3572 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3573 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3574
3575 delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3576 table = [
3577 [
3578 self._format_out(format_field(f, 'format_id'), self.Styles.ID),
3579 format_field(f, 'ext'),
3580 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3581 format_field(f, 'fps', '\t%d'),
3582 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3583 delim,
3584 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3585 format_field(f, 'tbr', '\t%dk'),
3586 shorten_protocol_name(f.get('protocol', '')),
3587 delim,
3588 format_field(f, 'vcodec', default='unknown').replace(
3589 'none', 'images' if f.get('acodec') == 'none'
3590 else self._format_out('audio only', self.Styles.SUPPRESS)),
3591 format_field(f, 'vbr', '\t%dk'),
3592 format_field(f, 'acodec', default='unknown').replace(
3593 'none', '' if f.get('vcodec') == 'none'
3594 else self._format_out('video only', self.Styles.SUPPRESS)),
3595 format_field(f, 'abr', '\t%dk'),
3596 format_field(f, 'asr', '\t%dHz'),
3597 join_nonempty(
3598 self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3599 format_field(f, 'language', '[%s]'),
3600 join_nonempty(format_field(f, 'format_note'),
3601 format_field(f, 'container', ignore=(None, f.get('ext'))),
3602 delim=', '),
3603 delim=' '),
3604 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3605 header_line = self._list_format_headers(
3606 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3607 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3608
3609 return render_table(
3610 header_line, table, hide_empty=True,
3611 delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3612
3613 def render_thumbnails_table(self, info_dict):
3614 thumbnails = list(info_dict.get('thumbnails') or [])
3615 if not thumbnails:
3616 return None
3617 return render_table(
3618 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3619 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3620
3621 def render_subtitles_table(self, video_id, subtitles):
3622 def _row(lang, formats):
3623 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3624 if len(set(names)) == 1:
3625 names = [] if names[0] == 'unknown' else names[:1]
3626 return [lang, ', '.join(names), ', '.join(exts)]
3627
3628 if not subtitles:
3629 return None
3630 return render_table(
3631 self._list_format_headers('Language', 'Name', 'Formats'),
3632 [_row(lang, formats) for lang, formats in subtitles.items()],
3633 hide_empty=True)
3634
3635 def __list_table(self, video_id, name, func, *args):
3636 table = func(*args)
3637 if not table:
3638 self.to_screen(f'{video_id} has no {name}')
3639 return
3640 self.to_screen(f'[info] Available {name} for {video_id}:')
3641 self.to_stdout(table)
3642
3643 def list_formats(self, info_dict):
3644 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3645
3646 def list_thumbnails(self, info_dict):
3647 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3648
3649 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3650 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3651
3652 def urlopen(self, req):
3653 """ Start an HTTP download """
3654 if isinstance(req, str):
3655 req = sanitized_Request(req)
3656 return self._opener.open(req, timeout=self._socket_timeout)
3657
3658 def print_debug_header(self):
3659 if not self.params.get('verbose'):
3660 return
3661
3662 def get_encoding(stream):
3663 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3664 if not supports_terminal_sequences(stream):
3665 from .utils import WINDOWS_VT_MODE # Must be imported locally
3666 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3667 return ret
3668
3669 encoding_str = 'Encodings: locale %s, fs %s, pref %s, %s' % (
3670 locale.getpreferredencoding(),
3671 sys.getfilesystemencoding(),
3672 self.get_encoding(),
3673 ', '.join(
3674 f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_
3675 if stream is not None and key != 'console')
3676 )
3677
3678 logger = self.params.get('logger')
3679 if logger:
3680 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3681 write_debug(encoding_str)
3682 else:
3683 write_string(f'[debug] {encoding_str}\n', encoding=None)
3684 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3685
3686 source = detect_variant()
3687 write_debug(join_nonempty(
3688 'yt-dlp version', __version__,
3689 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3690 '' if source == 'unknown' else f'({source})',
3691 delim=' '))
3692 if not _LAZY_LOADER:
3693 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3694 write_debug('Lazy loading extractors is forcibly disabled')
3695 else:
3696 write_debug('Lazy loading extractors is disabled')
3697 if plugin_extractors or plugin_postprocessors:
3698 write_debug('Plugins: %s' % [
3699 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3700 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3701 if self.params['compat_opts']:
3702 write_debug('Compatibility options: %s' % ', '.join(self.params['compat_opts']))
3703
3704 if source == 'source':
3705 try:
3706 sp = Popen(
3707 ['git', 'rev-parse', '--short', 'HEAD'],
3708 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3709 cwd=os.path.dirname(os.path.abspath(__file__)))
3710 out, err = sp.communicate_or_kill()
3711 out = out.decode().strip()
3712 if re.match('[0-9a-f]+', out):
3713 write_debug('Git HEAD: %s' % out)
3714 except Exception:
3715 with contextlib.suppress(Exception):
3716 sys.exc_clear()
3717
3718 def python_implementation():
3719 impl_name = platform.python_implementation()
3720 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3721 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3722 return impl_name
3723
3724 write_debug('Python version %s (%s %s) - %s' % (
3725 platform.python_version(),
3726 python_implementation(),
3727 platform.architecture()[0],
3728 platform_name()))
3729
3730 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3731 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3732 if ffmpeg_features:
3733 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3734
3735 exe_versions['rtmpdump'] = rtmpdump_version()
3736 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3737 exe_str = ', '.join(
3738 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3739 ) or 'none'
3740 write_debug('exe versions: %s' % exe_str)
3741
3742 from .compat.compat_utils import get_package_info
3743 from .dependencies import available_dependencies
3744
3745 write_debug('Optional libraries: %s' % (', '.join(sorted({
3746 join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
3747 })) or 'none'))
3748
3749 self._setup_opener()
3750 proxy_map = {}
3751 for handler in self._opener.handlers:
3752 if hasattr(handler, 'proxies'):
3753 proxy_map.update(handler.proxies)
3754 write_debug(f'Proxy map: {proxy_map}')
3755
3756 # Not implemented
3757 if False and self.params.get('call_home'):
3758 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode()
3759 write_debug('Public IP address: %s' % ipaddr)
3760 latest_version = self.urlopen(
3761 'https://yt-dl.org/latest/version').read().decode()
3762 if version_tuple(latest_version) > version_tuple(__version__):
3763 self.report_warning(
3764 'You are using an outdated version (newest version: %s)! '
3765 'See https://yt-dl.org/update if you need help updating.' %
3766 latest_version)
3767
3768 def _setup_opener(self):
3769 if hasattr(self, '_opener'):
3770 return
3771 timeout_val = self.params.get('socket_timeout')
3772 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3773
3774 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3775 opts_cookiefile = self.params.get('cookiefile')
3776 opts_proxy = self.params.get('proxy')
3777
3778 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3779
3780 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3781 if opts_proxy is not None:
3782 if opts_proxy == '':
3783 proxies = {}
3784 else:
3785 proxies = {'http': opts_proxy, 'https': opts_proxy}
3786 else:
3787 proxies = compat_urllib_request.getproxies()
3788 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3789 if 'http' in proxies and 'https' not in proxies:
3790 proxies['https'] = proxies['http']
3791 proxy_handler = PerRequestProxyHandler(proxies)
3792
3793 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3794 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3795 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3796 redirect_handler = YoutubeDLRedirectHandler()
3797 data_handler = urllib.request.DataHandler()
3798
3799 # When passing our own FileHandler instance, build_opener won't add the
3800 # default FileHandler and allows us to disable the file protocol, which
3801 # can be used for malicious purposes (see
3802 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3803 file_handler = compat_urllib_request.FileHandler()
3804
3805 def file_open(*args, **kwargs):
3806 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3807 file_handler.file_open = file_open
3808
3809 opener = compat_urllib_request.build_opener(
3810 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3811
3812 # Delete the default user-agent header, which would otherwise apply in
3813 # cases where our custom HTTP handler doesn't come into play
3814 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3815 opener.addheaders = []
3816 self._opener = opener
3817
3818 def encode(self, s):
3819 if isinstance(s, bytes):
3820 return s # Already encoded
3821
3822 try:
3823 return s.encode(self.get_encoding())
3824 except UnicodeEncodeError as err:
3825 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3826 raise
3827
3828 def get_encoding(self):
3829 encoding = self.params.get('encoding')
3830 if encoding is None:
3831 encoding = preferredencoding()
3832 return encoding
3833
3834 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3835 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3836 if overwrite is None:
3837 overwrite = self.params.get('overwrites', True)
3838 if not self.params.get('writeinfojson'):
3839 return False
3840 elif not infofn:
3841 self.write_debug(f'Skipping writing {label} infojson')
3842 return False
3843 elif not self._ensure_dir_exists(infofn):
3844 return None
3845 elif not overwrite and os.path.exists(infofn):
3846 self.to_screen(f'[info] {label.title()} metadata is already present')
3847 return 'exists'
3848
3849 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3850 try:
3851 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3852 return True
3853 except OSError:
3854 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3855 return None
3856
3857 def _write_description(self, label, ie_result, descfn):
3858 ''' Write description and returns True = written, False = skip, None = error '''
3859 if not self.params.get('writedescription'):
3860 return False
3861 elif not descfn:
3862 self.write_debug(f'Skipping writing {label} description')
3863 return False
3864 elif not self._ensure_dir_exists(descfn):
3865 return None
3866 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3867 self.to_screen(f'[info] {label.title()} description is already present')
3868 elif ie_result.get('description') is None:
3869 self.report_warning(f'There\'s no {label} description to write')
3870 return False
3871 else:
3872 try:
3873 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3874 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3875 descfile.write(ie_result['description'])
3876 except OSError:
3877 self.report_error(f'Cannot write {label} description file {descfn}')
3878 return None
3879 return True
3880
3881 def _write_subtitles(self, info_dict, filename):
3882 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3883 ret = []
3884 subtitles = info_dict.get('requested_subtitles')
3885 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3886 # subtitles download errors are already managed as troubles in relevant IE
3887 # that way it will silently go on when used with unsupporting IE
3888 return ret
3889
3890 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3891 if not sub_filename_base:
3892 self.to_screen('[info] Skipping writing video subtitles')
3893 return ret
3894 for sub_lang, sub_info in subtitles.items():
3895 sub_format = sub_info['ext']
3896 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3897 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3898 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3899 if existing_sub:
3900 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3901 sub_info['filepath'] = existing_sub
3902 ret.append((existing_sub, sub_filename_final))
3903 continue
3904
3905 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3906 if sub_info.get('data') is not None:
3907 try:
3908 # Use newline='' to prevent conversion of newline characters
3909 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3910 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3911 subfile.write(sub_info['data'])
3912 sub_info['filepath'] = sub_filename
3913 ret.append((sub_filename, sub_filename_final))
3914 continue
3915 except OSError:
3916 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3917 return None
3918
3919 try:
3920 sub_copy = sub_info.copy()
3921 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3922 self.dl(sub_filename, sub_copy, subtitle=True)
3923 sub_info['filepath'] = sub_filename
3924 ret.append((sub_filename, sub_filename_final))
3925 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3926 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3927 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3928 if not self.params.get('ignoreerrors'):
3929 self.report_error(msg)
3930 raise DownloadError(msg)
3931 self.report_warning(msg)
3932 return ret
3933
3934 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3935 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3936 write_all = self.params.get('write_all_thumbnails', False)
3937 thumbnails, ret = [], []
3938 if write_all or self.params.get('writethumbnail', False):
3939 thumbnails = info_dict.get('thumbnails') or []
3940 multiple = write_all and len(thumbnails) > 1
3941
3942 if thumb_filename_base is None:
3943 thumb_filename_base = filename
3944 if thumbnails and not thumb_filename_base:
3945 self.write_debug(f'Skipping writing {label} thumbnail')
3946 return ret
3947
3948 for idx, t in list(enumerate(thumbnails))[::-1]:
3949 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3950 thumb_display_id = f'{label} thumbnail {t["id"]}'
3951 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3952 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3953
3954 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3955 if existing_thumb:
3956 self.to_screen('[info] %s is already present' % (
3957 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3958 t['filepath'] = existing_thumb
3959 ret.append((existing_thumb, thumb_filename_final))
3960 else:
3961 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3962 try:
3963 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3964 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3965 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3966 shutil.copyfileobj(uf, thumbf)
3967 ret.append((thumb_filename, thumb_filename_final))
3968 t['filepath'] = thumb_filename
3969 except network_exceptions as err:
3970 thumbnails.pop(idx)
3971 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3972 if ret and not write_all:
3973 break
3974 return ret