]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/YoutubeDL.py
[build] Fix `--onedir` on macOS
[yt-dlp.git] / yt_dlp / YoutubeDL.py
... / ...
CommitLineData
1#!/usr/bin/env python3
2import collections
3import contextlib
4import datetime
5import errno
6import fileinput
7import functools
8import io
9import itertools
10import json
11import locale
12import operator
13import os
14import platform
15import random
16import re
17import shutil
18import subprocess
19import sys
20import tempfile
21import time
22import tokenize
23import traceback
24import unicodedata
25import urllib.request
26from string import ascii_letters
27
28from .cache import Cache
29from .compat import (
30 compat_get_terminal_size,
31 compat_os_name,
32 compat_shlex_quote,
33 compat_str,
34 compat_urllib_error,
35 compat_urllib_request,
36 windows_enable_vt_mode,
37)
38from .cookies import load_cookies
39from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
40from .downloader.rtmp import rtmpdump_version
41from .extractor import _LAZY_LOADER
42from .extractor import _PLUGIN_CLASSES as plugin_extractors
43from .extractor import gen_extractor_classes, get_info_extractor
44from .extractor.openload import PhantomJSwrapper
45from .minicurses import format_text
46from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
47from .postprocessor import (
48 EmbedThumbnailPP,
49 FFmpegFixupDuplicateMoovPP,
50 FFmpegFixupDurationPP,
51 FFmpegFixupM3u8PP,
52 FFmpegFixupM4aPP,
53 FFmpegFixupStretchedPP,
54 FFmpegFixupTimestampPP,
55 FFmpegMergerPP,
56 FFmpegPostProcessor,
57 MoveFilesAfterDownloadPP,
58 get_postprocessor,
59)
60from .update import detect_variant
61from .utils import (
62 DEFAULT_OUTTMPL,
63 LINK_TEMPLATES,
64 NO_DEFAULT,
65 OUTTMPL_TYPES,
66 POSTPROCESS_WHEN,
67 STR_FORMAT_RE_TMPL,
68 STR_FORMAT_TYPES,
69 ContentTooShortError,
70 DateRange,
71 DownloadCancelled,
72 DownloadError,
73 EntryNotInPlaylist,
74 ExistingVideoReached,
75 ExtractorError,
76 GeoRestrictedError,
77 HEADRequest,
78 InAdvancePagedList,
79 ISO3166Utils,
80 LazyList,
81 MaxDownloadsReached,
82 Namespace,
83 PagedList,
84 PerRequestProxyHandler,
85 Popen,
86 PostProcessingError,
87 ReExtractInfo,
88 RejectedVideoReached,
89 SameFileError,
90 UnavailableVideoError,
91 YoutubeDLCookieProcessor,
92 YoutubeDLHandler,
93 YoutubeDLRedirectHandler,
94 age_restricted,
95 args_to_str,
96 date_from_str,
97 determine_ext,
98 determine_protocol,
99 encode_compat_str,
100 encodeFilename,
101 error_to_compat_str,
102 expand_path,
103 filter_dict,
104 float_or_none,
105 format_bytes,
106 format_decimal_suffix,
107 format_field,
108 formatSeconds,
109 get_domain,
110 int_or_none,
111 iri_to_uri,
112 join_nonempty,
113 locked_file,
114 make_dir,
115 make_HTTPS_handler,
116 merge_headers,
117 network_exceptions,
118 number_of_digits,
119 orderedSet,
120 parse_filesize,
121 platform_name,
122 preferredencoding,
123 prepend_extension,
124 register_socks_protocols,
125 remove_terminal_sequences,
126 render_table,
127 replace_extension,
128 sanitize_filename,
129 sanitize_path,
130 sanitize_url,
131 sanitized_Request,
132 std_headers,
133 str_or_none,
134 strftime_or_none,
135 subtitles_filename,
136 supports_terminal_sequences,
137 timetuple_from_msec,
138 to_high_limit_path,
139 traverse_obj,
140 try_get,
141 url_basename,
142 variadic,
143 version_tuple,
144 write_json_file,
145 write_string,
146)
147from .version import RELEASE_GIT_HEAD, __version__
148
149if compat_os_name == 'nt':
150 import ctypes
151
152
153class YoutubeDL:
154 """YoutubeDL class.
155
156 YoutubeDL objects are the ones responsible of downloading the
157 actual video file and writing it to disk if the user has requested
158 it, among some other tasks. In most cases there should be one per
159 program. As, given a video URL, the downloader doesn't know how to
160 extract all the needed information, task that InfoExtractors do, it
161 has to pass the URL to one of them.
162
163 For this, YoutubeDL objects have a method that allows
164 InfoExtractors to be registered in a given order. When it is passed
165 a URL, the YoutubeDL object handles it to the first InfoExtractor it
166 finds that reports being able to handle it. The InfoExtractor extracts
167 all the information about the video or videos the URL refers to, and
168 YoutubeDL process the extracted information, possibly using a File
169 Downloader to download the video.
170
171 YoutubeDL objects accept a lot of parameters. In order not to saturate
172 the object constructor with arguments, it receives a dictionary of
173 options instead. These options are available through the params
174 attribute for the InfoExtractors to use. The YoutubeDL also
175 registers itself as the downloader in charge for the InfoExtractors
176 that are added to it, so this is a "mutual registration".
177
178 Available options:
179
180 username: Username for authentication purposes.
181 password: Password for authentication purposes.
182 videopassword: Password for accessing a video.
183 ap_mso: Adobe Pass multiple-system operator identifier.
184 ap_username: Multiple-system operator account username.
185 ap_password: Multiple-system operator account password.
186 usenetrc: Use netrc for authentication instead.
187 verbose: Print additional info to stdout.
188 quiet: Do not print messages to stdout.
189 no_warnings: Do not print out anything for warnings.
190 forceprint: A dict with keys WHEN mapped to a list of templates to
191 print to stdout. The allowed keys are video or any of the
192 items in utils.POSTPROCESS_WHEN.
193 For compatibility, a single list is also accepted
194 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
195 a list of tuples with (template, filename)
196 forceurl: Force printing final URL. (Deprecated)
197 forcetitle: Force printing title. (Deprecated)
198 forceid: Force printing ID. (Deprecated)
199 forcethumbnail: Force printing thumbnail URL. (Deprecated)
200 forcedescription: Force printing description. (Deprecated)
201 forcefilename: Force printing final filename. (Deprecated)
202 forceduration: Force printing duration. (Deprecated)
203 forcejson: Force printing info_dict as JSON.
204 dump_single_json: Force printing the info_dict of the whole playlist
205 (or video) as a single JSON line.
206 force_write_download_archive: Force writing download archive regardless
207 of 'skip_download' or 'simulate'.
208 simulate: Do not download the video files. If unset (or None),
209 simulate only if listsubtitles, listformats or list_thumbnails is used
210 format: Video format code. see "FORMAT SELECTION" for more details.
211 You can also pass a function. The function takes 'ctx' as
212 argument and returns the formats to download.
213 See "build_format_selector" for an implementation
214 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
215 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
216 extracting metadata even if the video is not actually
217 available for download (experimental)
218 format_sort: A list of fields by which to sort the video formats.
219 See "Sorting Formats" for more details.
220 format_sort_force: Force the given format_sort. see "Sorting Formats"
221 for more details.
222 prefer_free_formats: Whether to prefer video formats with free containers
223 over non-free ones of same quality.
224 allow_multiple_video_streams: Allow multiple video streams to be merged
225 into a single file
226 allow_multiple_audio_streams: Allow multiple audio streams to be merged
227 into a single file
228 check_formats Whether to test if the formats are downloadable.
229 Can be True (check all), False (check none),
230 'selected' (check selected formats),
231 or None (check only if requested by extractor)
232 paths: Dictionary of output paths. The allowed keys are 'home'
233 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
234 outtmpl: Dictionary of templates for output names. Allowed keys
235 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
236 For compatibility with youtube-dl, a single string can also be used
237 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
238 restrictfilenames: Do not allow "&" and spaces in file names
239 trim_file_name: Limit length of filename (extension excluded)
240 windowsfilenames: Force the filenames to be windows compatible
241 ignoreerrors: Do not stop on download/postprocessing errors.
242 Can be 'only_download' to ignore only download errors.
243 Default is 'only_download' for CLI, but False for API
244 skip_playlist_after_errors: Number of allowed failures until the rest of
245 the playlist is skipped
246 force_generic_extractor: Force downloader to use the generic extractor
247 overwrites: Overwrite all video and metadata files if True,
248 overwrite only non-video files if None
249 and don't overwrite any file if False
250 For compatibility with youtube-dl,
251 "nooverwrites" may also be used instead
252 playliststart: Playlist item to start at.
253 playlistend: Playlist item to end at.
254 playlist_items: Specific indices of playlist to download.
255 playlistreverse: Download playlist items in reverse order.
256 playlistrandom: Download playlist items in random order.
257 matchtitle: Download only matching titles.
258 rejecttitle: Reject downloads for matching titles.
259 logger: Log messages to a logging.Logger instance.
260 logtostderr: Log messages to stderr instead of stdout.
261 consoletitle: Display progress in console window's titlebar.
262 writedescription: Write the video description to a .description file
263 writeinfojson: Write the video description to a .info.json file
264 clean_infojson: Remove private fields from the infojson
265 getcomments: Extract video comments. This will not be written to disk
266 unless writeinfojson is also given
267 writeannotations: Write the video annotations to a .annotations.xml file
268 writethumbnail: Write the thumbnail image to a file
269 allow_playlist_files: Whether to write playlists' description, infojson etc
270 also to disk when using the 'write*' options
271 write_all_thumbnails: Write all thumbnail formats to files
272 writelink: Write an internet shortcut file, depending on the
273 current platform (.url/.webloc/.desktop)
274 writeurllink: Write a Windows internet shortcut file (.url)
275 writewebloclink: Write a macOS internet shortcut file (.webloc)
276 writedesktoplink: Write a Linux internet shortcut file (.desktop)
277 writesubtitles: Write the video subtitles to a file
278 writeautomaticsub: Write the automatically generated subtitles to a file
279 allsubtitles: Deprecated - Use subtitleslangs = ['all']
280 Downloads all the subtitles of the video
281 (requires writesubtitles or writeautomaticsub)
282 listsubtitles: Lists all available subtitles for the video
283 subtitlesformat: The format code for subtitles
284 subtitleslangs: List of languages of the subtitles to download (can be regex).
285 The list may contain "all" to refer to all the available
286 subtitles. The language can be prefixed with a "-" to
287 exclude it from the requested languages. Eg: ['all', '-live_chat']
288 keepvideo: Keep the video file after post-processing
289 daterange: A DateRange object, download only if the upload_date is in the range.
290 skip_download: Skip the actual download of the video file
291 cachedir: Location of the cache files in the filesystem.
292 False to disable filesystem cache.
293 noplaylist: Download single video instead of a playlist if in doubt.
294 age_limit: An integer representing the user's age in years.
295 Unsuitable videos for the given age are skipped.
296 min_views: An integer representing the minimum view count the video
297 must have in order to not be skipped.
298 Videos without view count information are always
299 downloaded. None for no limit.
300 max_views: An integer representing the maximum view count.
301 Videos that are more popular than that are not
302 downloaded.
303 Videos without view count information are always
304 downloaded. None for no limit.
305 download_archive: File name of a file where all downloads are recorded.
306 Videos already present in the file are not downloaded
307 again.
308 break_on_existing: Stop the download process after attempting to download a
309 file that is in the archive.
310 break_on_reject: Stop the download process when encountering a video that
311 has been filtered out.
312 break_per_url: Whether break_on_reject and break_on_existing
313 should act on each input URL as opposed to for the entire queue
314 cookiefile: File name where cookies should be read from and dumped to
315 cookiesfrombrowser: A tuple containing the name of the browser, the profile
316 name/pathfrom where cookies are loaded, and the name of the
317 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
318 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
319 support RFC 5746 secure renegotiation
320 nocheckcertificate: Do not verify SSL certificates
321 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
322 At the moment, this is only supported by YouTube.
323 http_headers: A dictionary of custom headers to be used for all requests
324 proxy: URL of the proxy server to use
325 geo_verification_proxy: URL of the proxy to use for IP address verification
326 on geo-restricted sites.
327 socket_timeout: Time to wait for unresponsive hosts, in seconds
328 bidi_workaround: Work around buggy terminals without bidirectional text
329 support, using fridibi
330 debug_printtraffic:Print out sent and received HTTP traffic
331 include_ads: Download ads as well (deprecated)
332 default_search: Prepend this string if an input url is not valid.
333 'auto' for elaborate guessing
334 encoding: Use this encoding instead of the system-specified.
335 extract_flat: Do not resolve URLs, return the immediate result.
336 Pass in 'in_playlist' to only show this behavior for
337 playlist items.
338 wait_for_video: If given, wait for scheduled streams to become available.
339 The value should be a tuple containing the range
340 (min_secs, max_secs) to wait between retries
341 postprocessors: A list of dictionaries, each with an entry
342 * key: The name of the postprocessor. See
343 yt_dlp/postprocessor/__init__.py for a list.
344 * when: When to run the postprocessor. Allowed values are
345 the entries of utils.POSTPROCESS_WHEN
346 Assumed to be 'post_process' if not given
347 post_hooks: Deprecated - Register a custom postprocessor instead
348 A list of functions that get called as the final step
349 for each video file, after all postprocessors have been
350 called. The filename will be passed as the only argument.
351 progress_hooks: A list of functions that get called on download
352 progress, with a dictionary with the entries
353 * status: One of "downloading", "error", or "finished".
354 Check this first and ignore unknown values.
355 * info_dict: The extracted info_dict
356
357 If status is one of "downloading", or "finished", the
358 following properties may also be present:
359 * filename: The final filename (always present)
360 * tmpfilename: The filename we're currently writing to
361 * downloaded_bytes: Bytes on disk
362 * total_bytes: Size of the whole file, None if unknown
363 * total_bytes_estimate: Guess of the eventual file size,
364 None if unavailable.
365 * elapsed: The number of seconds since download started.
366 * eta: The estimated time in seconds, None if unknown
367 * speed: The download speed in bytes/second, None if
368 unknown
369 * fragment_index: The counter of the currently
370 downloaded video fragment.
371 * fragment_count: The number of fragments (= individual
372 files that will be merged)
373
374 Progress hooks are guaranteed to be called at least once
375 (with status "finished") if the download is successful.
376 postprocessor_hooks: A list of functions that get called on postprocessing
377 progress, with a dictionary with the entries
378 * status: One of "started", "processing", or "finished".
379 Check this first and ignore unknown values.
380 * postprocessor: Name of the postprocessor
381 * info_dict: The extracted info_dict
382
383 Progress hooks are guaranteed to be called at least twice
384 (with status "started" and "finished") if the processing is successful.
385 merge_output_format: Extension to use when merging formats.
386 final_ext: Expected final extension; used to detect when the file was
387 already downloaded and converted
388 fixup: Automatically correct known faults of the file.
389 One of:
390 - "never": do nothing
391 - "warn": only emit a warning
392 - "detect_or_warn": check whether we can do anything
393 about it, warn otherwise (default)
394 source_address: Client-side IP address to bind to.
395 call_home: Boolean, true iff we are allowed to contact the
396 yt-dlp servers for debugging. (BROKEN)
397 sleep_interval_requests: Number of seconds to sleep between requests
398 during extraction
399 sleep_interval: Number of seconds to sleep before each download when
400 used alone or a lower bound of a range for randomized
401 sleep before each download (minimum possible number
402 of seconds to sleep) when used along with
403 max_sleep_interval.
404 max_sleep_interval:Upper bound of a range for randomized sleep before each
405 download (maximum possible number of seconds to sleep).
406 Must only be used along with sleep_interval.
407 Actual sleep time will be a random float from range
408 [sleep_interval; max_sleep_interval].
409 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
410 listformats: Print an overview of available video formats and exit.
411 list_thumbnails: Print a table of all thumbnails and exit.
412 match_filter: A function that gets called with the info_dict of
413 every video.
414 If it returns a message, the video is ignored.
415 If it returns None, the video is downloaded.
416 If it returns utils.NO_DEFAULT, the user is interactively
417 asked whether to download the video.
418 match_filter_func in utils.py is one example for this.
419 no_color: Do not emit color codes in output.
420 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
421 HTTP header
422 geo_bypass_country:
423 Two-letter ISO 3166-2 country code that will be used for
424 explicit geographic restriction bypassing via faking
425 X-Forwarded-For HTTP header
426 geo_bypass_ip_block:
427 IP range in CIDR notation that will be used similarly to
428 geo_bypass_country
429
430 The following options determine which downloader is picked:
431 external_downloader: A dictionary of protocol keys and the executable of the
432 external downloader to use for it. The allowed protocols
433 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
434 Set the value to 'native' to use the native downloader
435 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
436 or {'m3u8': 'ffmpeg'} instead.
437 Use the native HLS downloader instead of ffmpeg/avconv
438 if True, otherwise use ffmpeg/avconv if False, otherwise
439 use downloader suggested by extractor if None.
440 compat_opts: Compatibility options. See "Differences in default behavior".
441 The following options do not work when used through the API:
442 filename, abort-on-error, multistreams, no-live-chat, format-sort
443 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
444 Refer __init__.py for their implementation
445 progress_template: Dictionary of templates for progress outputs.
446 Allowed keys are 'download', 'postprocess',
447 'download-title' (console title) and 'postprocess-title'.
448 The template is mapped on a dictionary with keys 'progress' and 'info'
449
450 The following parameters are not used by YoutubeDL itself, they are used by
451 the downloader (see yt_dlp/downloader/common.py):
452 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
453 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
454 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
455 external_downloader_args, concurrent_fragment_downloads.
456
457 The following options are used by the post processors:
458 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
459 otherwise prefer ffmpeg. (avconv support is deprecated)
460 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
461 to the binary or its containing directory.
462 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
463 and a list of additional command-line arguments for the
464 postprocessor/executable. The dict can also have "PP+EXE" keys
465 which are used when the given exe is used by the given PP.
466 Use 'default' as the name for arguments to passed to all PP
467 For compatibility with youtube-dl, a single list of args
468 can also be used
469
470 The following options are used by the extractors:
471 extractor_retries: Number of times to retry for known errors
472 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
473 hls_split_discontinuity: Split HLS playlists to different formats at
474 discontinuities such as ad breaks (default: False)
475 extractor_args: A dictionary of arguments to be passed to the extractors.
476 See "EXTRACTOR ARGUMENTS" for details.
477 Eg: {'youtube': {'skip': ['dash', 'hls']}}
478 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
479 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
480 If True (default), DASH manifests and related
481 data will be downloaded and processed by extractor.
482 You can reduce network I/O by disabling it if you don't
483 care about DASH. (only for youtube)
484 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
485 If True (default), HLS manifests and related
486 data will be downloaded and processed by extractor.
487 You can reduce network I/O by disabling it if you don't
488 care about HLS. (only for youtube)
489 """
490
491 _NUMERIC_FIELDS = {
492 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
493 'timestamp', 'release_timestamp',
494 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
495 'average_rating', 'comment_count', 'age_limit',
496 'start_time', 'end_time',
497 'chapter_number', 'season_number', 'episode_number',
498 'track_number', 'disc_number', 'release_year',
499 }
500
501 _format_fields = {
502 # NB: Keep in sync with the docstring of extractor/common.py
503 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note',
504 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
505 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
506 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
507 'preference', 'language', 'language_preference', 'quality', 'source_preference',
508 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
509 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
510 }
511 _format_selection_exts = {
512 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
513 'video': {'mp4', 'flv', 'webm', '3gp'},
514 'storyboards': {'mhtml'},
515 }
516
517 def __init__(self, params=None, auto_init=True):
518 """Create a FileDownloader object with the given options.
519 @param auto_init Whether to load the default extractors and print header (if verbose).
520 Set to 'no_verbose_header' to not print the header
521 """
522 if params is None:
523 params = {}
524 self.params = params
525 self._ies = {}
526 self._ies_instances = {}
527 self._pps = {k: [] for k in POSTPROCESS_WHEN}
528 self._printed_messages = set()
529 self._first_webpage_request = True
530 self._post_hooks = []
531 self._progress_hooks = []
532 self._postprocessor_hooks = []
533 self._download_retcode = 0
534 self._num_downloads = 0
535 self._num_videos = 0
536 self._playlist_level = 0
537 self._playlist_urls = set()
538 self.cache = Cache(self)
539
540 windows_enable_vt_mode()
541 self._out_files = {
542 'error': sys.stderr,
543 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
544 'console': None if compat_os_name == 'nt' else next(
545 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
546 }
547 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
548 self._allow_colors = {
549 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
550 for type_ in ('screen', 'error')
551 }
552
553 if sys.version_info < (3, 6):
554 self.report_warning(
555 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
556
557 if self.params.get('allow_unplayable_formats'):
558 self.report_warning(
559 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
560 'This is a developer option intended for debugging. \n'
561 ' If you experience any issues while using this option, '
562 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
563
564 def check_deprecated(param, option, suggestion):
565 if self.params.get(param) is not None:
566 self.report_warning(f'{option} is deprecated. Use {suggestion} instead')
567 return True
568 return False
569
570 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
571 if self.params.get('geo_verification_proxy') is None:
572 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
573
574 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
575 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
576 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
577
578 for msg in self.params.get('_warnings', []):
579 self.report_warning(msg)
580 for msg in self.params.get('_deprecation_warnings', []):
581 self.deprecation_warning(msg)
582
583 if 'list-formats' in self.params.get('compat_opts', []):
584 self.params['listformats_table'] = False
585
586 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
587 # nooverwrites was unnecessarily changed to overwrites
588 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
589 # This ensures compatibility with both keys
590 self.params['overwrites'] = not self.params['nooverwrites']
591 elif self.params.get('overwrites') is None:
592 self.params.pop('overwrites', None)
593 else:
594 self.params['nooverwrites'] = not self.params['overwrites']
595
596 self.params.setdefault('forceprint', {})
597 self.params.setdefault('print_to_file', {})
598
599 # Compatibility with older syntax
600 if not isinstance(params['forceprint'], dict):
601 self.params['forceprint'] = {'video': params['forceprint']}
602
603 if self.params.get('bidi_workaround', False):
604 try:
605 import pty
606 master, slave = pty.openpty()
607 width = compat_get_terminal_size().columns
608 if width is None:
609 width_args = []
610 else:
611 width_args = ['-w', str(width)]
612 sp_kwargs = dict(
613 stdin=subprocess.PIPE,
614 stdout=slave,
615 stderr=self._out_files['error'])
616 try:
617 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
618 except OSError:
619 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
620 self._output_channel = os.fdopen(master, 'rb')
621 except OSError as ose:
622 if ose.errno == errno.ENOENT:
623 self.report_warning(
624 'Could not find fribidi executable, ignoring --bidi-workaround. '
625 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
626 else:
627 raise
628
629 if auto_init:
630 if auto_init != 'no_verbose_header':
631 self.print_debug_header()
632 self.add_default_info_extractors()
633
634 if (sys.platform != 'win32'
635 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
636 and not self.params.get('restrictfilenames', False)):
637 # Unicode filesystem API will throw errors (#1474, #13027)
638 self.report_warning(
639 'Assuming --restrict-filenames since file system encoding '
640 'cannot encode all characters. '
641 'Set the LC_ALL environment variable to fix this.')
642 self.params['restrictfilenames'] = True
643
644 self.outtmpl_dict = self.parse_outtmpl()
645
646 # Creating format selector here allows us to catch syntax errors before the extraction
647 self.format_selector = (
648 self.params.get('format') if self.params.get('format') in (None, '-')
649 else self.params['format'] if callable(self.params['format'])
650 else self.build_format_selector(self.params['format']))
651
652 # Set http_headers defaults according to std_headers
653 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
654
655 hooks = {
656 'post_hooks': self.add_post_hook,
657 'progress_hooks': self.add_progress_hook,
658 'postprocessor_hooks': self.add_postprocessor_hook,
659 }
660 for opt, fn in hooks.items():
661 for ph in self.params.get(opt, []):
662 fn(ph)
663
664 for pp_def_raw in self.params.get('postprocessors', []):
665 pp_def = dict(pp_def_raw)
666 when = pp_def.pop('when', 'post_process')
667 self.add_post_processor(
668 get_postprocessor(pp_def.pop('key'))(self, **pp_def),
669 when=when)
670
671 self._setup_opener()
672 register_socks_protocols()
673
674 def preload_download_archive(fn):
675 """Preload the archive, if any is specified"""
676 if fn is None:
677 return False
678 self.write_debug(f'Loading archive file {fn!r}')
679 try:
680 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
681 for line in archive_file:
682 self.archive.add(line.strip())
683 except OSError as ioe:
684 if ioe.errno != errno.ENOENT:
685 raise
686 return False
687 return True
688
689 self.archive = set()
690 preload_download_archive(self.params.get('download_archive'))
691
692 def warn_if_short_id(self, argv):
693 # short YouTube ID starting with dash?
694 idxs = [
695 i for i, a in enumerate(argv)
696 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
697 if idxs:
698 correct_argv = (
699 ['yt-dlp']
700 + [a for i, a in enumerate(argv) if i not in idxs]
701 + ['--'] + [argv[i] for i in idxs]
702 )
703 self.report_warning(
704 'Long argument string detected. '
705 'Use -- to separate parameters and URLs, like this:\n%s' %
706 args_to_str(correct_argv))
707
708 def add_info_extractor(self, ie):
709 """Add an InfoExtractor object to the end of the list."""
710 ie_key = ie.ie_key()
711 self._ies[ie_key] = ie
712 if not isinstance(ie, type):
713 self._ies_instances[ie_key] = ie
714 ie.set_downloader(self)
715
716 def _get_info_extractor_class(self, ie_key):
717 ie = self._ies.get(ie_key)
718 if ie is None:
719 ie = get_info_extractor(ie_key)
720 self.add_info_extractor(ie)
721 return ie
722
723 def get_info_extractor(self, ie_key):
724 """
725 Get an instance of an IE with name ie_key, it will try to get one from
726 the _ies list, if there's no instance it will create a new one and add
727 it to the extractor list.
728 """
729 ie = self._ies_instances.get(ie_key)
730 if ie is None:
731 ie = get_info_extractor(ie_key)()
732 self.add_info_extractor(ie)
733 return ie
734
735 def add_default_info_extractors(self):
736 """
737 Add the InfoExtractors returned by gen_extractors to the end of the list
738 """
739 for ie in gen_extractor_classes():
740 self.add_info_extractor(ie)
741
742 def add_post_processor(self, pp, when='post_process'):
743 """Add a PostProcessor object to the end of the chain."""
744 self._pps[when].append(pp)
745 pp.set_downloader(self)
746
747 def add_post_hook(self, ph):
748 """Add the post hook"""
749 self._post_hooks.append(ph)
750
751 def add_progress_hook(self, ph):
752 """Add the download progress hook"""
753 self._progress_hooks.append(ph)
754
755 def add_postprocessor_hook(self, ph):
756 """Add the postprocessing progress hook"""
757 self._postprocessor_hooks.append(ph)
758 for pps in self._pps.values():
759 for pp in pps:
760 pp.add_progress_hook(ph)
761
762 def _bidi_workaround(self, message):
763 if not hasattr(self, '_output_channel'):
764 return message
765
766 assert hasattr(self, '_output_process')
767 assert isinstance(message, compat_str)
768 line_count = message.count('\n') + 1
769 self._output_process.stdin.write((message + '\n').encode('utf-8'))
770 self._output_process.stdin.flush()
771 res = ''.join(self._output_channel.readline().decode('utf-8')
772 for _ in range(line_count))
773 return res[:-len('\n')]
774
775 def _write_string(self, message, out=None, only_once=False):
776 if only_once:
777 if message in self._printed_messages:
778 return
779 self._printed_messages.add(message)
780 write_string(message, out=out, encoding=self.params.get('encoding'))
781
782 def to_stdout(self, message, skip_eol=False, quiet=None):
783 """Print message to stdout"""
784 if quiet is not None:
785 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
786 self._write_string(
787 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
788 self._out_files['print'])
789
790 def to_screen(self, message, skip_eol=False, quiet=None):
791 """Print message to screen if not in quiet mode"""
792 if self.params.get('logger'):
793 self.params['logger'].debug(message)
794 return
795 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
796 return
797 self._write_string(
798 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
799 self._out_files['screen'])
800
801 def to_stderr(self, message, only_once=False):
802 """Print message to stderr"""
803 assert isinstance(message, compat_str)
804 if self.params.get('logger'):
805 self.params['logger'].error(message)
806 else:
807 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
808
809 def _send_console_code(self, code):
810 if compat_os_name == 'nt' or not self._out_files['console']:
811 return
812 self._write_string(code, self._out_files['console'])
813
814 def to_console_title(self, message):
815 if not self.params.get('consoletitle', False):
816 return
817 message = remove_terminal_sequences(message)
818 if compat_os_name == 'nt':
819 if ctypes.windll.kernel32.GetConsoleWindow():
820 # c_wchar_p() might not be necessary if `message` is
821 # already of type unicode()
822 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
823 else:
824 self._send_console_code(f'\033]0;{message}\007')
825
826 def save_console_title(self):
827 if not self.params.get('consoletitle') or self.params.get('simulate'):
828 return
829 self._send_console_code('\033[22;0t') # Save the title on stack
830
831 def restore_console_title(self):
832 if not self.params.get('consoletitle') or self.params.get('simulate'):
833 return
834 self._send_console_code('\033[23;0t') # Restore the title from stack
835
836 def __enter__(self):
837 self.save_console_title()
838 return self
839
840 def __exit__(self, *args):
841 self.restore_console_title()
842
843 if self.params.get('cookiefile') is not None:
844 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
845
846 def trouble(self, message=None, tb=None, is_error=True):
847 """Determine action to take when a download problem appears.
848
849 Depending on if the downloader has been configured to ignore
850 download errors or not, this method may throw an exception or
851 not when errors are found, after printing the message.
852
853 @param tb If given, is additional traceback information
854 @param is_error Whether to raise error according to ignorerrors
855 """
856 if message is not None:
857 self.to_stderr(message)
858 if self.params.get('verbose'):
859 if tb is None:
860 if sys.exc_info()[0]: # if .trouble has been called from an except block
861 tb = ''
862 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
863 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
864 tb += encode_compat_str(traceback.format_exc())
865 else:
866 tb_data = traceback.format_list(traceback.extract_stack())
867 tb = ''.join(tb_data)
868 if tb:
869 self.to_stderr(tb)
870 if not is_error:
871 return
872 if not self.params.get('ignoreerrors'):
873 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
874 exc_info = sys.exc_info()[1].exc_info
875 else:
876 exc_info = sys.exc_info()
877 raise DownloadError(message, exc_info)
878 self._download_retcode = 1
879
880 Styles = Namespace(
881 HEADERS='yellow',
882 EMPHASIS='light blue',
883 FILENAME='green',
884 ID='green',
885 DELIM='blue',
886 ERROR='red',
887 WARNING='yellow',
888 SUPPRESS='light black',
889 )
890
891 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
892 text = str(text)
893 if test_encoding:
894 original_text = text
895 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
896 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
897 text = text.encode(encoding, 'ignore').decode(encoding)
898 if fallback is not None and text != original_text:
899 text = fallback
900 return format_text(text, f) if allow_colors else text if fallback is None else fallback
901
902 def _format_screen(self, *args, **kwargs):
903 return self._format_text(
904 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
905
906 def _format_err(self, *args, **kwargs):
907 return self._format_text(
908 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
909
910 def report_warning(self, message, only_once=False):
911 '''
912 Print the message to stderr, it will be prefixed with 'WARNING:'
913 If stderr is a tty file the 'WARNING:' will be colored
914 '''
915 if self.params.get('logger') is not None:
916 self.params['logger'].warning(message)
917 else:
918 if self.params.get('no_warnings'):
919 return
920 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
921
922 def deprecation_warning(self, message):
923 if self.params.get('logger') is not None:
924 self.params['logger'].warning(f'DeprecationWarning: {message}')
925 else:
926 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
927
928 def report_error(self, message, *args, **kwargs):
929 '''
930 Do the same as trouble, but prefixes the message with 'ERROR:', colored
931 in red if stderr is a tty file.
932 '''
933 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
934
935 def write_debug(self, message, only_once=False):
936 '''Log debug message or Print message to stderr'''
937 if not self.params.get('verbose', False):
938 return
939 message = '[debug] %s' % message
940 if self.params.get('logger'):
941 self.params['logger'].debug(message)
942 else:
943 self.to_stderr(message, only_once)
944
945 def report_file_already_downloaded(self, file_name):
946 """Report file has already been fully downloaded."""
947 try:
948 self.to_screen('[download] %s has already been downloaded' % file_name)
949 except UnicodeEncodeError:
950 self.to_screen('[download] The file has already been downloaded')
951
952 def report_file_delete(self, file_name):
953 """Report that existing file will be deleted."""
954 try:
955 self.to_screen('Deleting existing file %s' % file_name)
956 except UnicodeEncodeError:
957 self.to_screen('Deleting existing file')
958
959 def raise_no_formats(self, info, forced=False, *, msg=None):
960 has_drm = info.get('_has_drm')
961 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
962 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
963 if forced or not ignored:
964 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
965 expected=has_drm or ignored or expected)
966 else:
967 self.report_warning(msg)
968
969 def parse_outtmpl(self):
970 outtmpl_dict = self.params.get('outtmpl', {})
971 if not isinstance(outtmpl_dict, dict):
972 outtmpl_dict = {'default': outtmpl_dict}
973 # Remove spaces in the default template
974 if self.params.get('restrictfilenames'):
975 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
976 else:
977 sanitize = lambda x: x
978 outtmpl_dict.update({
979 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
980 if outtmpl_dict.get(k) is None})
981 for _, val in outtmpl_dict.items():
982 if isinstance(val, bytes):
983 self.report_warning('Parameter outtmpl is bytes, but should be a unicode string')
984 return outtmpl_dict
985
986 def get_output_path(self, dir_type='', filename=None):
987 paths = self.params.get('paths', {})
988 assert isinstance(paths, dict)
989 path = os.path.join(
990 expand_path(paths.get('home', '').strip()),
991 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
992 filename or '')
993 return sanitize_path(path, force=self.params.get('windowsfilenames'))
994
995 @staticmethod
996 def _outtmpl_expandpath(outtmpl):
997 # expand_path translates '%%' into '%' and '$$' into '$'
998 # correspondingly that is not what we want since we need to keep
999 # '%%' intact for template dict substitution step. Working around
1000 # with boundary-alike separator hack.
1001 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1002 outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$')
1003
1004 # outtmpl should be expand_path'ed before template dict substitution
1005 # because meta fields may contain env variables we don't want to
1006 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1007 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1008 return expand_path(outtmpl).replace(sep, '')
1009
1010 @staticmethod
1011 def escape_outtmpl(outtmpl):
1012 ''' Escape any remaining strings like %s, %abc% etc. '''
1013 return re.sub(
1014 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1015 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1016 outtmpl)
1017
1018 @classmethod
1019 def validate_outtmpl(cls, outtmpl):
1020 ''' @return None or Exception object '''
1021 outtmpl = re.sub(
1022 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1023 lambda mobj: f'{mobj.group(0)[:-1]}s',
1024 cls._outtmpl_expandpath(outtmpl))
1025 try:
1026 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1027 return None
1028 except ValueError as err:
1029 return err
1030
1031 @staticmethod
1032 def _copy_infodict(info_dict):
1033 info_dict = dict(info_dict)
1034 info_dict.pop('__postprocessors', None)
1035 return info_dict
1036
1037 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1038 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1039 @param sanitize Whether to sanitize the output as a filename.
1040 For backward compatibility, a function can also be passed
1041 """
1042
1043 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1044
1045 info_dict = self._copy_infodict(info_dict)
1046 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1047 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1048 if info_dict.get('duration', None) is not None
1049 else None)
1050 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1051 info_dict['video_autonumber'] = self._num_videos
1052 if info_dict.get('resolution') is None:
1053 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1054
1055 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1056 # of %(field)s to %(field)0Nd for backward compatibility
1057 field_size_compat_map = {
1058 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
1059 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1060 'autonumber': self.params.get('autonumber_size') or 5,
1061 }
1062
1063 TMPL_DICT = {}
1064 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1065 MATH_FUNCTIONS = {
1066 '+': float.__add__,
1067 '-': float.__sub__,
1068 }
1069 # Field is of the form key1.key2...
1070 # where keys (except first) can be string, int or slice
1071 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1072 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1073 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1074 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1075 (?P<negate>-)?
1076 (?P<fields>{field})
1077 (?P<maths>(?:{math_op}{math_field})*)
1078 (?:>(?P<strf_format>.+?))?
1079 (?P<remaining>
1080 (?P<alternate>(?<!\\),[^|&)]+)?
1081 (?:&(?P<replacement>.*?))?
1082 (?:\|(?P<default>.*?))?
1083 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1084
1085 def _traverse_infodict(k):
1086 k = k.split('.')
1087 if k[0] == '':
1088 k.pop(0)
1089 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1090
1091 def get_value(mdict):
1092 # Object traversal
1093 value = _traverse_infodict(mdict['fields'])
1094 # Negative
1095 if mdict['negate']:
1096 value = float_or_none(value)
1097 if value is not None:
1098 value *= -1
1099 # Do maths
1100 offset_key = mdict['maths']
1101 if offset_key:
1102 value = float_or_none(value)
1103 operator = None
1104 while offset_key:
1105 item = re.match(
1106 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1107 offset_key).group(0)
1108 offset_key = offset_key[len(item):]
1109 if operator is None:
1110 operator = MATH_FUNCTIONS[item]
1111 continue
1112 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1113 offset = float_or_none(item)
1114 if offset is None:
1115 offset = float_or_none(_traverse_infodict(item))
1116 try:
1117 value = operator(value, multiplier * offset)
1118 except (TypeError, ZeroDivisionError):
1119 return None
1120 operator = None
1121 # Datetime formatting
1122 if mdict['strf_format']:
1123 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1124
1125 return value
1126
1127 na = self.params.get('outtmpl_na_placeholder', 'NA')
1128
1129 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1130 return sanitize_filename(str(value), restricted=restricted, is_id=(
1131 bool(re.search(r'(^|[_.])id(\.|$)', key))
1132 if 'filename-sanitization' in self.params.get('compat_opts', [])
1133 else NO_DEFAULT))
1134
1135 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1136 sanitize = bool(sanitize)
1137
1138 def _dumpjson_default(obj):
1139 if isinstance(obj, (set, LazyList)):
1140 return list(obj)
1141 return repr(obj)
1142
1143 def create_key(outer_mobj):
1144 if not outer_mobj.group('has_key'):
1145 return outer_mobj.group(0)
1146 key = outer_mobj.group('key')
1147 mobj = re.match(INTERNAL_FORMAT_RE, key)
1148 initial_field = mobj.group('fields') if mobj else ''
1149 value, replacement, default = None, None, na
1150 while mobj:
1151 mobj = mobj.groupdict()
1152 default = mobj['default'] if mobj['default'] is not None else default
1153 value = get_value(mobj)
1154 replacement = mobj['replacement']
1155 if value is None and mobj['alternate']:
1156 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1157 else:
1158 break
1159
1160 fmt = outer_mobj.group('format')
1161 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1162 fmt = f'0{field_size_compat_map[key]:d}d'
1163
1164 value = default if value is None else value if replacement is None else replacement
1165
1166 flags = outer_mobj.group('conversion') or ''
1167 str_fmt = f'{fmt[:-1]}s'
1168 if fmt[-1] == 'l': # list
1169 delim = '\n' if '#' in flags else ', '
1170 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1171 elif fmt[-1] == 'j': # json
1172 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1173 elif fmt[-1] == 'q': # quoted
1174 value = map(str, variadic(value) if '#' in flags else [value])
1175 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1176 elif fmt[-1] == 'B': # bytes
1177 value = f'%{str_fmt}'.encode() % str(value).encode('utf-8')
1178 value, fmt = value.decode('utf-8', 'ignore'), 's'
1179 elif fmt[-1] == 'U': # unicode normalized
1180 value, fmt = unicodedata.normalize(
1181 # "+" = compatibility equivalence, "#" = NFD
1182 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1183 value), str_fmt
1184 elif fmt[-1] == 'D': # decimal suffix
1185 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1186 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1187 factor=1024 if '#' in flags else 1000)
1188 elif fmt[-1] == 'S': # filename sanitization
1189 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1190 elif fmt[-1] == 'c':
1191 if value:
1192 value = str(value)[0]
1193 else:
1194 fmt = str_fmt
1195 elif fmt[-1] not in 'rs': # numeric
1196 value = float_or_none(value)
1197 if value is None:
1198 value, fmt = default, 's'
1199
1200 if sanitize:
1201 if fmt[-1] == 'r':
1202 # If value is an object, sanitize might convert it to a string
1203 # So we convert it to repr first
1204 value, fmt = repr(value), str_fmt
1205 if fmt[-1] in 'csr':
1206 value = sanitizer(initial_field, value)
1207
1208 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1209 TMPL_DICT[key] = value
1210 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1211
1212 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1213
1214 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1215 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1216 return self.escape_outtmpl(outtmpl) % info_dict
1217
1218 def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None):
1219 assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive'
1220 if outtmpl is None:
1221 outtmpl = self.outtmpl_dict.get(tmpl_type or 'default', self.outtmpl_dict['default'])
1222 try:
1223 outtmpl = self._outtmpl_expandpath(outtmpl)
1224 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225 if not filename:
1226 return None
1227
1228 if tmpl_type in ('', 'temp'):
1229 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231 filename = replace_extension(filename, ext, final_ext)
1232 elif tmpl_type:
1233 force_ext = OUTTMPL_TYPES[tmpl_type]
1234 if force_ext:
1235 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name = self.params.get('trim_file_name', False)
1239 if trim_file_name:
1240 no_ext, *ext = filename.rsplit('.', 2)
1241 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243 return filename
1244 except ValueError as err:
1245 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246 return None
1247
1248 def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False):
1249 """Generate the output filename"""
1250 if outtmpl:
1251 assert not dir_type, 'outtmpl and dir_type are mutually exclusive'
1252 dir_type = None
1253 filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl)
1254 if not filename and dir_type not in ('', 'temp'):
1255 return ''
1256
1257 if warn:
1258 if not self.params.get('paths'):
1259 pass
1260 elif filename == '-':
1261 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1262 elif os.path.isabs(filename):
1263 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1264 if filename == '-' or not filename:
1265 return filename
1266
1267 return self.get_output_path(dir_type, filename)
1268
1269 def _match_entry(self, info_dict, incomplete=False, silent=False):
1270 """ Returns None if the file should be downloaded """
1271
1272 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1273
1274 def check_filter():
1275 if 'title' in info_dict:
1276 # This can happen when we're just evaluating the playlist
1277 title = info_dict['title']
1278 matchtitle = self.params.get('matchtitle', False)
1279 if matchtitle:
1280 if not re.search(matchtitle, title, re.IGNORECASE):
1281 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1282 rejecttitle = self.params.get('rejecttitle', False)
1283 if rejecttitle:
1284 if re.search(rejecttitle, title, re.IGNORECASE):
1285 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1286 date = info_dict.get('upload_date')
1287 if date is not None:
1288 dateRange = self.params.get('daterange', DateRange())
1289 if date not in dateRange:
1290 return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}'
1291 view_count = info_dict.get('view_count')
1292 if view_count is not None:
1293 min_views = self.params.get('min_views')
1294 if min_views is not None and view_count < min_views:
1295 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1296 max_views = self.params.get('max_views')
1297 if max_views is not None and view_count > max_views:
1298 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1299 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1300 return 'Skipping "%s" because it is age restricted' % video_title
1301
1302 match_filter = self.params.get('match_filter')
1303 if match_filter is not None:
1304 try:
1305 ret = match_filter(info_dict, incomplete=incomplete)
1306 except TypeError:
1307 # For backward compatibility
1308 ret = None if incomplete else match_filter(info_dict)
1309 if ret is NO_DEFAULT:
1310 while True:
1311 filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
1312 reply = input(self._format_screen(
1313 f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
1314 if reply in {'y', ''}:
1315 return None
1316 elif reply == 'n':
1317 return f'Skipping {video_title}'
1318 return True
1319 elif ret is not None:
1320 return ret
1321 return None
1322
1323 if self.in_download_archive(info_dict):
1324 reason = '%s has already been recorded in the archive' % video_title
1325 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1326 else:
1327 reason = check_filter()
1328 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1329 if reason is not None:
1330 if not silent:
1331 self.to_screen('[download] ' + reason)
1332 if self.params.get(break_opt, False):
1333 raise break_err()
1334 return reason
1335
1336 @staticmethod
1337 def add_extra_info(info_dict, extra_info):
1338 '''Set the keys from extra_info in info dict if they are missing'''
1339 for key, value in extra_info.items():
1340 info_dict.setdefault(key, value)
1341
1342 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1343 process=True, force_generic_extractor=False):
1344 """
1345 Return a list with a dictionary for each video extracted.
1346
1347 Arguments:
1348 url -- URL to extract
1349
1350 Keyword arguments:
1351 download -- whether to download videos during extraction
1352 ie_key -- extractor key hint
1353 extra_info -- dictionary containing the extra values to add to each result
1354 process -- whether to resolve all unresolved references (URLs, playlist items),
1355 must be True for download to work.
1356 force_generic_extractor -- force using the generic extractor
1357 """
1358
1359 if extra_info is None:
1360 extra_info = {}
1361
1362 if not ie_key and force_generic_extractor:
1363 ie_key = 'Generic'
1364
1365 if ie_key:
1366 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1367 else:
1368 ies = self._ies
1369
1370 for ie_key, ie in ies.items():
1371 if not ie.suitable(url):
1372 continue
1373
1374 if not ie.working():
1375 self.report_warning('The program functionality for this site has been marked as broken, '
1376 'and will probably not work.')
1377
1378 temp_id = ie.get_temp_id(url)
1379 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1380 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1381 if self.params.get('break_on_existing', False):
1382 raise ExistingVideoReached()
1383 break
1384 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1385 else:
1386 self.report_error('no suitable InfoExtractor for URL %s' % url)
1387
1388 def __handle_extraction_exceptions(func):
1389 @functools.wraps(func)
1390 def wrapper(self, *args, **kwargs):
1391 while True:
1392 try:
1393 return func(self, *args, **kwargs)
1394 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1395 raise
1396 except ReExtractInfo as e:
1397 if e.expected:
1398 self.to_screen(f'{e}; Re-extracting data')
1399 else:
1400 self.to_stderr('\r')
1401 self.report_warning(f'{e}; Re-extracting data')
1402 continue
1403 except GeoRestrictedError as e:
1404 msg = e.msg
1405 if e.countries:
1406 msg += '\nThis video is available in %s.' % ', '.join(
1407 map(ISO3166Utils.short2full, e.countries))
1408 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1409 self.report_error(msg)
1410 except ExtractorError as e: # An error we somewhat expected
1411 self.report_error(str(e), e.format_traceback())
1412 except Exception as e:
1413 if self.params.get('ignoreerrors'):
1414 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1415 else:
1416 raise
1417 break
1418 return wrapper
1419
1420 def _wait_for_video(self, ie_result):
1421 if (not self.params.get('wait_for_video')
1422 or ie_result.get('_type', 'video') != 'video'
1423 or ie_result.get('formats') or ie_result.get('url')):
1424 return
1425
1426 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1427 last_msg = ''
1428
1429 def progress(msg):
1430 nonlocal last_msg
1431 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1432 last_msg = msg
1433
1434 min_wait, max_wait = self.params.get('wait_for_video')
1435 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1436 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1437 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1438 self.report_warning('Release time of video is not known')
1439 elif (diff or 0) <= 0:
1440 self.report_warning('Video should already be available according to extracted info')
1441 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1442 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1443
1444 wait_till = time.time() + diff
1445 try:
1446 while True:
1447 diff = wait_till - time.time()
1448 if diff <= 0:
1449 progress('')
1450 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1451 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1452 time.sleep(1)
1453 except KeyboardInterrupt:
1454 progress('')
1455 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1456 except BaseException as e:
1457 if not isinstance(e, ReExtractInfo):
1458 self.to_screen('')
1459 raise
1460
1461 @__handle_extraction_exceptions
1462 def __extract_info(self, url, ie, download, extra_info, process):
1463 ie_result = ie.extract(url)
1464 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1465 return
1466 if isinstance(ie_result, list):
1467 # Backwards compatibility: old IE result format
1468 ie_result = {
1469 '_type': 'compat_list',
1470 'entries': ie_result,
1471 }
1472 if extra_info.get('original_url'):
1473 ie_result.setdefault('original_url', extra_info['original_url'])
1474 self.add_default_extra_info(ie_result, ie, url)
1475 if process:
1476 self._wait_for_video(ie_result)
1477 return self.process_ie_result(ie_result, download, extra_info)
1478 else:
1479 return ie_result
1480
1481 def add_default_extra_info(self, ie_result, ie, url):
1482 if url is not None:
1483 self.add_extra_info(ie_result, {
1484 'webpage_url': url,
1485 'original_url': url,
1486 })
1487 webpage_url = ie_result.get('webpage_url')
1488 if webpage_url:
1489 self.add_extra_info(ie_result, {
1490 'webpage_url_basename': url_basename(webpage_url),
1491 'webpage_url_domain': get_domain(webpage_url),
1492 })
1493 if ie is not None:
1494 self.add_extra_info(ie_result, {
1495 'extractor': ie.IE_NAME,
1496 'extractor_key': ie.ie_key(),
1497 })
1498
1499 def process_ie_result(self, ie_result, download=True, extra_info=None):
1500 """
1501 Take the result of the ie(may be modified) and resolve all unresolved
1502 references (URLs, playlist items).
1503
1504 It will also download the videos if 'download'.
1505 Returns the resolved ie_result.
1506 """
1507 if extra_info is None:
1508 extra_info = {}
1509 result_type = ie_result.get('_type', 'video')
1510
1511 if result_type in ('url', 'url_transparent'):
1512 ie_result['url'] = sanitize_url(ie_result['url'])
1513 if ie_result.get('original_url'):
1514 extra_info.setdefault('original_url', ie_result['original_url'])
1515
1516 extract_flat = self.params.get('extract_flat', False)
1517 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1518 or extract_flat is True):
1519 info_copy = ie_result.copy()
1520 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1521 if ie and not ie_result.get('id'):
1522 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1523 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1524 self.add_extra_info(info_copy, extra_info)
1525 info_copy, _ = self.pre_process(info_copy)
1526 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1527 if self.params.get('force_write_download_archive', False):
1528 self.record_download_archive(info_copy)
1529 return ie_result
1530
1531 if result_type == 'video':
1532 self.add_extra_info(ie_result, extra_info)
1533 ie_result = self.process_video_result(ie_result, download=download)
1534 additional_urls = (ie_result or {}).get('additional_urls')
1535 if additional_urls:
1536 # TODO: Improve MetadataParserPP to allow setting a list
1537 if isinstance(additional_urls, compat_str):
1538 additional_urls = [additional_urls]
1539 self.to_screen(
1540 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1541 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1542 ie_result['additional_entries'] = [
1543 self.extract_info(
1544 url, download, extra_info=extra_info,
1545 force_generic_extractor=self.params.get('force_generic_extractor'))
1546 for url in additional_urls
1547 ]
1548 return ie_result
1549 elif result_type == 'url':
1550 # We have to add extra_info to the results because it may be
1551 # contained in a playlist
1552 return self.extract_info(
1553 ie_result['url'], download,
1554 ie_key=ie_result.get('ie_key'),
1555 extra_info=extra_info)
1556 elif result_type == 'url_transparent':
1557 # Use the information from the embedding page
1558 info = self.extract_info(
1559 ie_result['url'], ie_key=ie_result.get('ie_key'),
1560 extra_info=extra_info, download=False, process=False)
1561
1562 # extract_info may return None when ignoreerrors is enabled and
1563 # extraction failed with an error, don't crash and return early
1564 # in this case
1565 if not info:
1566 return info
1567
1568 new_result = info.copy()
1569 new_result.update(filter_dict(ie_result, lambda k, v: (
1570 v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
1571
1572 # Extracted info may not be a video result (i.e.
1573 # info.get('_type', 'video') != video) but rather an url or
1574 # url_transparent. In such cases outer metadata (from ie_result)
1575 # should be propagated to inner one (info). For this to happen
1576 # _type of info should be overridden with url_transparent. This
1577 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1578 if new_result.get('_type') == 'url':
1579 new_result['_type'] = 'url_transparent'
1580
1581 return self.process_ie_result(
1582 new_result, download=download, extra_info=extra_info)
1583 elif result_type in ('playlist', 'multi_video'):
1584 # Protect from infinite recursion due to recursively nested playlists
1585 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1586 webpage_url = ie_result['webpage_url']
1587 if webpage_url in self._playlist_urls:
1588 self.to_screen(
1589 '[download] Skipping already downloaded playlist: %s'
1590 % ie_result.get('title') or ie_result.get('id'))
1591 return
1592
1593 self._playlist_level += 1
1594 self._playlist_urls.add(webpage_url)
1595 self._fill_common_fields(ie_result, False)
1596 self._sanitize_thumbnails(ie_result)
1597 try:
1598 return self.__process_playlist(ie_result, download)
1599 finally:
1600 self._playlist_level -= 1
1601 if not self._playlist_level:
1602 self._playlist_urls.clear()
1603 elif result_type == 'compat_list':
1604 self.report_warning(
1605 'Extractor %s returned a compat_list result. '
1606 'It needs to be updated.' % ie_result.get('extractor'))
1607
1608 def _fixup(r):
1609 self.add_extra_info(r, {
1610 'extractor': ie_result['extractor'],
1611 'webpage_url': ie_result['webpage_url'],
1612 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1613 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1614 'extractor_key': ie_result['extractor_key'],
1615 })
1616 return r
1617 ie_result['entries'] = [
1618 self.process_ie_result(_fixup(r), download, extra_info)
1619 for r in ie_result['entries']
1620 ]
1621 return ie_result
1622 else:
1623 raise Exception('Invalid result type: %s' % result_type)
1624
1625 def _ensure_dir_exists(self, path):
1626 return make_dir(path, self.report_error)
1627
1628 @staticmethod
1629 def _playlist_infodict(ie_result, **kwargs):
1630 return {
1631 **ie_result,
1632 'playlist': ie_result.get('title') or ie_result.get('id'),
1633 'playlist_id': ie_result.get('id'),
1634 'playlist_title': ie_result.get('title'),
1635 'playlist_uploader': ie_result.get('uploader'),
1636 'playlist_uploader_id': ie_result.get('uploader_id'),
1637 'playlist_index': 0,
1638 **kwargs,
1639 }
1640
1641 def __process_playlist(self, ie_result, download):
1642 # We process each entry in the playlist
1643 playlist = ie_result.get('title') or ie_result.get('id')
1644 self.to_screen('[download] Downloading playlist: %s' % playlist)
1645
1646 if 'entries' not in ie_result:
1647 raise EntryNotInPlaylist('There are no entries')
1648
1649 MissingEntry = object()
1650 incomplete_entries = bool(ie_result.get('requested_entries'))
1651 if incomplete_entries:
1652 def fill_missing_entries(entries, indices):
1653 ret = [MissingEntry] * max(indices)
1654 for i, entry in zip(indices, entries):
1655 ret[i - 1] = entry
1656 return ret
1657 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1658
1659 playlist_results = []
1660
1661 playliststart = self.params.get('playliststart', 1)
1662 playlistend = self.params.get('playlistend')
1663 # For backwards compatibility, interpret -1 as whole list
1664 if playlistend == -1:
1665 playlistend = None
1666
1667 playlistitems_str = self.params.get('playlist_items')
1668 playlistitems = None
1669 if playlistitems_str is not None:
1670 def iter_playlistitems(format):
1671 for string_segment in format.split(','):
1672 if '-' in string_segment:
1673 start, end = string_segment.split('-')
1674 for item in range(int(start), int(end) + 1):
1675 yield int(item)
1676 else:
1677 yield int(string_segment)
1678 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1679
1680 ie_entries = ie_result['entries']
1681 if isinstance(ie_entries, list):
1682 playlist_count = len(ie_entries)
1683 msg = f'Collected {playlist_count} videos; downloading %d of them'
1684 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1685
1686 def get_entry(i):
1687 return ie_entries[i - 1]
1688 else:
1689 msg = 'Downloading %d videos'
1690 if not isinstance(ie_entries, (PagedList, LazyList)):
1691 ie_entries = LazyList(ie_entries)
1692 elif isinstance(ie_entries, InAdvancePagedList):
1693 if ie_entries._pagesize == 1:
1694 playlist_count = ie_entries._pagecount
1695
1696 def get_entry(i):
1697 return YoutubeDL.__handle_extraction_exceptions(
1698 lambda self, i: ie_entries[i - 1]
1699 )(self, i)
1700
1701 entries, broken = [], False
1702 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1703 for i in items:
1704 if i == 0:
1705 continue
1706 if playlistitems is None and playlistend is not None and playlistend < i:
1707 break
1708 entry = None
1709 try:
1710 entry = get_entry(i)
1711 if entry is MissingEntry:
1712 raise EntryNotInPlaylist()
1713 except (IndexError, EntryNotInPlaylist):
1714 if incomplete_entries:
1715 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1716 elif not playlistitems:
1717 break
1718 entries.append(entry)
1719 try:
1720 if entry is not None:
1721 # TODO: Add auto-generated fields
1722 self._match_entry(entry, incomplete=True, silent=True)
1723 except (ExistingVideoReached, RejectedVideoReached):
1724 broken = True
1725 break
1726 ie_result['entries'] = entries
1727
1728 # Save playlist_index before re-ordering
1729 entries = [
1730 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1731 for i, entry in enumerate(entries, 1)
1732 if entry is not None]
1733 n_entries = len(entries)
1734
1735 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1736 ie_result['playlist_count'] = n_entries
1737
1738 if not playlistitems and (playliststart != 1 or playlistend):
1739 playlistitems = list(range(playliststart, playliststart + n_entries))
1740 ie_result['requested_entries'] = playlistitems
1741
1742 _infojson_written = False
1743 write_playlist_files = self.params.get('allow_playlist_files', True)
1744 if write_playlist_files and self.params.get('list_thumbnails'):
1745 self.list_thumbnails(ie_result)
1746 if write_playlist_files and not self.params.get('simulate'):
1747 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1748 _infojson_written = self._write_info_json(
1749 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1750 if _infojson_written is None:
1751 return
1752 if self._write_description('playlist', ie_result,
1753 self.prepare_filename(ie_copy, 'pl_description')) is None:
1754 return
1755 # TODO: This should be passed to ThumbnailsConvertor if necessary
1756 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1757
1758 if self.params.get('playlistreverse', False):
1759 entries = entries[::-1]
1760 if self.params.get('playlistrandom', False):
1761 random.shuffle(entries)
1762
1763 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1764
1765 self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}')
1766 failures = 0
1767 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1768 for i, entry_tuple in enumerate(entries, 1):
1769 playlist_index, entry = entry_tuple
1770 if 'playlist-index' in self.params.get('compat_opts', []):
1771 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1772 self.to_screen('[download] Downloading video %s of %s' % (
1773 self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
1774 # This __x_forwarded_for_ip thing is a bit ugly but requires
1775 # minimal changes
1776 if x_forwarded_for:
1777 entry['__x_forwarded_for_ip'] = x_forwarded_for
1778 extra = {
1779 'n_entries': n_entries,
1780 '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1781 'playlist_count': ie_result.get('playlist_count'),
1782 'playlist_index': playlist_index,
1783 'playlist_autonumber': i,
1784 'playlist': playlist,
1785 'playlist_id': ie_result.get('id'),
1786 'playlist_title': ie_result.get('title'),
1787 'playlist_uploader': ie_result.get('uploader'),
1788 'playlist_uploader_id': ie_result.get('uploader_id'),
1789 'extractor': ie_result['extractor'],
1790 'webpage_url': ie_result['webpage_url'],
1791 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1792 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1793 'extractor_key': ie_result['extractor_key'],
1794 }
1795
1796 if self._match_entry(entry, incomplete=True) is not None:
1797 continue
1798
1799 entry_result = self.__process_iterable_entry(entry, download, extra)
1800 if not entry_result:
1801 failures += 1
1802 if failures >= max_failures:
1803 self.report_error(
1804 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1805 break
1806 playlist_results.append(entry_result)
1807 ie_result['entries'] = playlist_results
1808
1809 # Write the updated info to json
1810 if _infojson_written is True and self._write_info_json(
1811 'updated playlist', ie_result,
1812 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1813 return
1814
1815 ie_result = self.run_all_pps('playlist', ie_result)
1816 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1817 return ie_result
1818
1819 @__handle_extraction_exceptions
1820 def __process_iterable_entry(self, entry, download, extra_info):
1821 return self.process_ie_result(
1822 entry, download=download, extra_info=extra_info)
1823
1824 def _build_format_filter(self, filter_spec):
1825 " Returns a function to filter the formats according to the filter_spec "
1826
1827 OPERATORS = {
1828 '<': operator.lt,
1829 '<=': operator.le,
1830 '>': operator.gt,
1831 '>=': operator.ge,
1832 '=': operator.eq,
1833 '!=': operator.ne,
1834 }
1835 operator_rex = re.compile(r'''(?x)\s*
1836 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1837 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1838 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1839 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1840 m = operator_rex.fullmatch(filter_spec)
1841 if m:
1842 try:
1843 comparison_value = int(m.group('value'))
1844 except ValueError:
1845 comparison_value = parse_filesize(m.group('value'))
1846 if comparison_value is None:
1847 comparison_value = parse_filesize(m.group('value') + 'B')
1848 if comparison_value is None:
1849 raise ValueError(
1850 'Invalid value %r in format specification %r' % (
1851 m.group('value'), filter_spec))
1852 op = OPERATORS[m.group('op')]
1853
1854 if not m:
1855 STR_OPERATORS = {
1856 '=': operator.eq,
1857 '^=': lambda attr, value: attr.startswith(value),
1858 '$=': lambda attr, value: attr.endswith(value),
1859 '*=': lambda attr, value: value in attr,
1860 '~=': lambda attr, value: value.search(attr) is not None
1861 }
1862 str_operator_rex = re.compile(r'''(?x)\s*
1863 (?P<key>[a-zA-Z0-9._-]+)\s*
1864 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1865 (?P<quote>["'])?
1866 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1867 (?(quote)(?P=quote))\s*
1868 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1869 m = str_operator_rex.fullmatch(filter_spec)
1870 if m:
1871 if m.group('op') == '~=':
1872 comparison_value = re.compile(m.group('value'))
1873 else:
1874 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1875 str_op = STR_OPERATORS[m.group('op')]
1876 if m.group('negation'):
1877 op = lambda attr, value: not str_op(attr, value)
1878 else:
1879 op = str_op
1880
1881 if not m:
1882 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1883
1884 def _filter(f):
1885 actual_value = f.get(m.group('key'))
1886 if actual_value is None:
1887 return m.group('none_inclusive')
1888 return op(actual_value, comparison_value)
1889 return _filter
1890
1891 def _check_formats(self, formats):
1892 for f in formats:
1893 self.to_screen('[info] Testing format %s' % f['format_id'])
1894 path = self.get_output_path('temp')
1895 if not self._ensure_dir_exists(f'{path}/'):
1896 continue
1897 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1898 temp_file.close()
1899 try:
1900 success, _ = self.dl(temp_file.name, f, test=True)
1901 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1902 success = False
1903 finally:
1904 if os.path.exists(temp_file.name):
1905 try:
1906 os.remove(temp_file.name)
1907 except OSError:
1908 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1909 if success:
1910 yield f
1911 else:
1912 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1913
1914 def _default_format_spec(self, info_dict, download=True):
1915
1916 def can_merge():
1917 merger = FFmpegMergerPP(self)
1918 return merger.available and merger.can_merge()
1919
1920 prefer_best = (
1921 not self.params.get('simulate')
1922 and download
1923 and (
1924 not can_merge()
1925 or info_dict.get('is_live', False)
1926 or self.outtmpl_dict['default'] == '-'))
1927 compat = (
1928 prefer_best
1929 or self.params.get('allow_multiple_audio_streams', False)
1930 or 'format-spec' in self.params.get('compat_opts', []))
1931
1932 return (
1933 'best/bestvideo+bestaudio' if prefer_best
1934 else 'bestvideo*+bestaudio/best' if not compat
1935 else 'bestvideo+bestaudio/best')
1936
1937 def build_format_selector(self, format_spec):
1938 def syntax_error(note, start):
1939 message = (
1940 'Invalid format specification: '
1941 '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1]))
1942 return SyntaxError(message)
1943
1944 PICKFIRST = 'PICKFIRST'
1945 MERGE = 'MERGE'
1946 SINGLE = 'SINGLE'
1947 GROUP = 'GROUP'
1948 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1949
1950 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1951 'video': self.params.get('allow_multiple_video_streams', False)}
1952
1953 check_formats = self.params.get('check_formats') == 'selected'
1954
1955 def _parse_filter(tokens):
1956 filter_parts = []
1957 for type, string, start, _, _ in tokens:
1958 if type == tokenize.OP and string == ']':
1959 return ''.join(filter_parts)
1960 else:
1961 filter_parts.append(string)
1962
1963 def _remove_unused_ops(tokens):
1964 # Remove operators that we don't use and join them with the surrounding strings
1965 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1966 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1967 last_string, last_start, last_end, last_line = None, None, None, None
1968 for type, string, start, end, line in tokens:
1969 if type == tokenize.OP and string == '[':
1970 if last_string:
1971 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972 last_string = None
1973 yield type, string, start, end, line
1974 # everything inside brackets will be handled by _parse_filter
1975 for type, string, start, end, line in tokens:
1976 yield type, string, start, end, line
1977 if type == tokenize.OP and string == ']':
1978 break
1979 elif type == tokenize.OP and string in ALLOWED_OPS:
1980 if last_string:
1981 yield tokenize.NAME, last_string, last_start, last_end, last_line
1982 last_string = None
1983 yield type, string, start, end, line
1984 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1985 if not last_string:
1986 last_string = string
1987 last_start = start
1988 last_end = end
1989 else:
1990 last_string += string
1991 if last_string:
1992 yield tokenize.NAME, last_string, last_start, last_end, last_line
1993
1994 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1995 selectors = []
1996 current_selector = None
1997 for type, string, start, _, _ in tokens:
1998 # ENCODING is only defined in python 3.x
1999 if type == getattr(tokenize, 'ENCODING', None):
2000 continue
2001 elif type in [tokenize.NAME, tokenize.NUMBER]:
2002 current_selector = FormatSelector(SINGLE, string, [])
2003 elif type == tokenize.OP:
2004 if string == ')':
2005 if not inside_group:
2006 # ')' will be handled by the parentheses group
2007 tokens.restore_last_token()
2008 break
2009 elif inside_merge and string in ['/', ',']:
2010 tokens.restore_last_token()
2011 break
2012 elif inside_choice and string == ',':
2013 tokens.restore_last_token()
2014 break
2015 elif string == ',':
2016 if not current_selector:
2017 raise syntax_error('"," must follow a format selector', start)
2018 selectors.append(current_selector)
2019 current_selector = None
2020 elif string == '/':
2021 if not current_selector:
2022 raise syntax_error('"/" must follow a format selector', start)
2023 first_choice = current_selector
2024 second_choice = _parse_format_selection(tokens, inside_choice=True)
2025 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2026 elif string == '[':
2027 if not current_selector:
2028 current_selector = FormatSelector(SINGLE, 'best', [])
2029 format_filter = _parse_filter(tokens)
2030 current_selector.filters.append(format_filter)
2031 elif string == '(':
2032 if current_selector:
2033 raise syntax_error('Unexpected "("', start)
2034 group = _parse_format_selection(tokens, inside_group=True)
2035 current_selector = FormatSelector(GROUP, group, [])
2036 elif string == '+':
2037 if not current_selector:
2038 raise syntax_error('Unexpected "+"', start)
2039 selector_1 = current_selector
2040 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2041 if not selector_2:
2042 raise syntax_error('Expected a selector', start)
2043 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2044 else:
2045 raise syntax_error(f'Operator not recognized: "{string}"', start)
2046 elif type == tokenize.ENDMARKER:
2047 break
2048 if current_selector:
2049 selectors.append(current_selector)
2050 return selectors
2051
2052 def _merge(formats_pair):
2053 format_1, format_2 = formats_pair
2054
2055 formats_info = []
2056 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2057 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2058
2059 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2060 get_no_more = {'video': False, 'audio': False}
2061 for (i, fmt_info) in enumerate(formats_info):
2062 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2063 formats_info.pop(i)
2064 continue
2065 for aud_vid in ['audio', 'video']:
2066 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2067 if get_no_more[aud_vid]:
2068 formats_info.pop(i)
2069 break
2070 get_no_more[aud_vid] = True
2071
2072 if len(formats_info) == 1:
2073 return formats_info[0]
2074
2075 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2076 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2077
2078 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2079 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2080
2081 output_ext = self.params.get('merge_output_format')
2082 if not output_ext:
2083 if the_only_video:
2084 output_ext = the_only_video['ext']
2085 elif the_only_audio and not video_fmts:
2086 output_ext = the_only_audio['ext']
2087 else:
2088 output_ext = 'mkv'
2089
2090 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2091
2092 new_dict = {
2093 'requested_formats': formats_info,
2094 'format': '+'.join(filtered('format')),
2095 'format_id': '+'.join(filtered('format_id')),
2096 'ext': output_ext,
2097 'protocol': '+'.join(map(determine_protocol, formats_info)),
2098 'language': '+'.join(orderedSet(filtered('language'))) or None,
2099 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2100 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2101 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2102 }
2103
2104 if the_only_video:
2105 new_dict.update({
2106 'width': the_only_video.get('width'),
2107 'height': the_only_video.get('height'),
2108 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2109 'fps': the_only_video.get('fps'),
2110 'dynamic_range': the_only_video.get('dynamic_range'),
2111 'vcodec': the_only_video.get('vcodec'),
2112 'vbr': the_only_video.get('vbr'),
2113 'stretched_ratio': the_only_video.get('stretched_ratio'),
2114 })
2115
2116 if the_only_audio:
2117 new_dict.update({
2118 'acodec': the_only_audio.get('acodec'),
2119 'abr': the_only_audio.get('abr'),
2120 'asr': the_only_audio.get('asr'),
2121 })
2122
2123 return new_dict
2124
2125 def _check_formats(formats):
2126 if not check_formats:
2127 yield from formats
2128 return
2129 yield from self._check_formats(formats)
2130
2131 def _build_selector_function(selector):
2132 if isinstance(selector, list): # ,
2133 fs = [_build_selector_function(s) for s in selector]
2134
2135 def selector_function(ctx):
2136 for f in fs:
2137 yield from f(ctx)
2138 return selector_function
2139
2140 elif selector.type == GROUP: # ()
2141 selector_function = _build_selector_function(selector.selector)
2142
2143 elif selector.type == PICKFIRST: # /
2144 fs = [_build_selector_function(s) for s in selector.selector]
2145
2146 def selector_function(ctx):
2147 for f in fs:
2148 picked_formats = list(f(ctx))
2149 if picked_formats:
2150 return picked_formats
2151 return []
2152
2153 elif selector.type == MERGE: # +
2154 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2155
2156 def selector_function(ctx):
2157 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2158 yield _merge(pair)
2159
2160 elif selector.type == SINGLE: # atom
2161 format_spec = selector.selector or 'best'
2162
2163 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2164 if format_spec == 'all':
2165 def selector_function(ctx):
2166 yield from _check_formats(ctx['formats'][::-1])
2167 elif format_spec == 'mergeall':
2168 def selector_function(ctx):
2169 formats = list(_check_formats(
2170 f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none'))
2171 if not formats:
2172 return
2173 merged_format = formats[-1]
2174 for f in formats[-2::-1]:
2175 merged_format = _merge((merged_format, f))
2176 yield merged_format
2177
2178 else:
2179 format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1
2180 mobj = re.match(
2181 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2182 format_spec)
2183 if mobj is not None:
2184 format_idx = int_or_none(mobj.group('n'), default=1)
2185 format_reverse = mobj.group('bw')[0] == 'b'
2186 format_type = (mobj.group('type') or [None])[0]
2187 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2188 format_modified = mobj.group('mod') is not None
2189
2190 format_fallback = not format_type and not format_modified # for b, w
2191 _filter_f = (
2192 (lambda f: f.get('%scodec' % format_type) != 'none')
2193 if format_type and format_modified # bv*, ba*, wv*, wa*
2194 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2195 if format_type # bv, ba, wv, wa
2196 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2197 if not format_modified # b, w
2198 else lambda f: True) # b*, w*
2199 filter_f = lambda f: _filter_f(f) and (
2200 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2201 else:
2202 if format_spec in self._format_selection_exts['audio']:
2203 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2204 elif format_spec in self._format_selection_exts['video']:
2205 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2206 seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none'
2207 elif format_spec in self._format_selection_exts['storyboards']:
2208 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2209 else:
2210 filter_f = lambda f: f.get('format_id') == format_spec # id
2211
2212 def selector_function(ctx):
2213 formats = list(ctx['formats'])
2214 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2215 if not matches:
2216 if format_fallback and ctx['incomplete_formats']:
2217 # for extractors with incomplete formats (audio only (soundcloud)
2218 # or video only (imgur)) best/worst will fallback to
2219 # best/worst {video,audio}-only format
2220 matches = formats
2221 elif seperate_fallback and not ctx['has_merged_format']:
2222 # for compatibility with youtube-dl when there is no pre-merged format
2223 matches = list(filter(seperate_fallback, formats))
2224 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2225 try:
2226 yield matches[format_idx - 1]
2227 except LazyList.IndexError:
2228 return
2229
2230 filters = [self._build_format_filter(f) for f in selector.filters]
2231
2232 def final_selector(ctx):
2233 ctx_copy = dict(ctx)
2234 for _filter in filters:
2235 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2236 return selector_function(ctx_copy)
2237 return final_selector
2238
2239 stream = io.BytesIO(format_spec.encode('utf-8'))
2240 try:
2241 tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline)))
2242 except tokenize.TokenError:
2243 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2244
2245 class TokenIterator:
2246 def __init__(self, tokens):
2247 self.tokens = tokens
2248 self.counter = 0
2249
2250 def __iter__(self):
2251 return self
2252
2253 def __next__(self):
2254 if self.counter >= len(self.tokens):
2255 raise StopIteration()
2256 value = self.tokens[self.counter]
2257 self.counter += 1
2258 return value
2259
2260 next = __next__
2261
2262 def restore_last_token(self):
2263 self.counter -= 1
2264
2265 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2266 return _build_selector_function(parsed_selector)
2267
2268 def _calc_headers(self, info_dict):
2269 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2270
2271 cookies = self._calc_cookies(info_dict)
2272 if cookies:
2273 res['Cookie'] = cookies
2274
2275 if 'X-Forwarded-For' not in res:
2276 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2277 if x_forwarded_for_ip:
2278 res['X-Forwarded-For'] = x_forwarded_for_ip
2279
2280 return res
2281
2282 def _calc_cookies(self, info_dict):
2283 pr = sanitized_Request(info_dict['url'])
2284 self.cookiejar.add_cookie_header(pr)
2285 return pr.get_header('Cookie')
2286
2287 def _sort_thumbnails(self, thumbnails):
2288 thumbnails.sort(key=lambda t: (
2289 t.get('preference') if t.get('preference') is not None else -1,
2290 t.get('width') if t.get('width') is not None else -1,
2291 t.get('height') if t.get('height') is not None else -1,
2292 t.get('id') if t.get('id') is not None else '',
2293 t.get('url')))
2294
2295 def _sanitize_thumbnails(self, info_dict):
2296 thumbnails = info_dict.get('thumbnails')
2297 if thumbnails is None:
2298 thumbnail = info_dict.get('thumbnail')
2299 if thumbnail:
2300 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2301 if not thumbnails:
2302 return
2303
2304 def check_thumbnails(thumbnails):
2305 for t in thumbnails:
2306 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2307 try:
2308 self.urlopen(HEADRequest(t['url']))
2309 except network_exceptions as err:
2310 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2311 continue
2312 yield t
2313
2314 self._sort_thumbnails(thumbnails)
2315 for i, t in enumerate(thumbnails):
2316 if t.get('id') is None:
2317 t['id'] = '%d' % i
2318 if t.get('width') and t.get('height'):
2319 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2320 t['url'] = sanitize_url(t['url'])
2321
2322 if self.params.get('check_formats') is True:
2323 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2324 else:
2325 info_dict['thumbnails'] = thumbnails
2326
2327 def _fill_common_fields(self, info_dict, is_video=True):
2328 # TODO: move sanitization here
2329 if is_video:
2330 # playlists are allowed to lack "title"
2331 info_dict['fulltitle'] = info_dict.get('title')
2332 if 'title' not in info_dict:
2333 raise ExtractorError('Missing "title" field in extractor result',
2334 video_id=info_dict['id'], ie=info_dict['extractor'])
2335 elif not info_dict.get('title'):
2336 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2337 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2338
2339 if info_dict.get('duration') is not None:
2340 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2341
2342 for ts_key, date_key in (
2343 ('timestamp', 'upload_date'),
2344 ('release_timestamp', 'release_date'),
2345 ('modified_timestamp', 'modified_date'),
2346 ):
2347 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2348 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2349 # see http://bugs.python.org/issue1646728)
2350 with contextlib.suppress(ValueError, OverflowError, OSError):
2351 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2352 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2353
2354 live_keys = ('is_live', 'was_live')
2355 live_status = info_dict.get('live_status')
2356 if live_status is None:
2357 for key in live_keys:
2358 if info_dict.get(key) is False:
2359 continue
2360 if info_dict.get(key):
2361 live_status = key
2362 break
2363 if all(info_dict.get(key) is False for key in live_keys):
2364 live_status = 'not_live'
2365 if live_status:
2366 info_dict['live_status'] = live_status
2367 for key in live_keys:
2368 if info_dict.get(key) is None:
2369 info_dict[key] = (live_status == key)
2370
2371 # Auto generate title fields corresponding to the *_number fields when missing
2372 # in order to always have clean titles. This is very common for TV series.
2373 for field in ('chapter', 'season', 'episode'):
2374 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2375 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2376
2377 def process_video_result(self, info_dict, download=True):
2378 assert info_dict.get('_type', 'video') == 'video'
2379 self._num_videos += 1
2380
2381 if 'id' not in info_dict:
2382 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2383 elif not info_dict.get('id'):
2384 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2385
2386 def report_force_conversion(field, field_not, conversion):
2387 self.report_warning(
2388 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2389 % (field, field_not, conversion))
2390
2391 def sanitize_string_field(info, string_field):
2392 field = info.get(string_field)
2393 if field is None or isinstance(field, compat_str):
2394 return
2395 report_force_conversion(string_field, 'a string', 'string')
2396 info[string_field] = compat_str(field)
2397
2398 def sanitize_numeric_fields(info):
2399 for numeric_field in self._NUMERIC_FIELDS:
2400 field = info.get(numeric_field)
2401 if field is None or isinstance(field, (int, float)):
2402 continue
2403 report_force_conversion(numeric_field, 'numeric', 'int')
2404 info[numeric_field] = int_or_none(field)
2405
2406 sanitize_string_field(info_dict, 'id')
2407 sanitize_numeric_fields(info_dict)
2408 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2409 self.report_warning('"duration" field is negative, there is an error in extractor')
2410
2411 if 'playlist' not in info_dict:
2412 # It isn't part of a playlist
2413 info_dict['playlist'] = None
2414 info_dict['playlist_index'] = None
2415
2416 self._sanitize_thumbnails(info_dict)
2417
2418 thumbnail = info_dict.get('thumbnail')
2419 thumbnails = info_dict.get('thumbnails')
2420 if thumbnail:
2421 info_dict['thumbnail'] = sanitize_url(thumbnail)
2422 elif thumbnails:
2423 info_dict['thumbnail'] = thumbnails[-1]['url']
2424
2425 if info_dict.get('display_id') is None and 'id' in info_dict:
2426 info_dict['display_id'] = info_dict['id']
2427
2428 self._fill_common_fields(info_dict)
2429
2430 for cc_kind in ('subtitles', 'automatic_captions'):
2431 cc = info_dict.get(cc_kind)
2432 if cc:
2433 for _, subtitle in cc.items():
2434 for subtitle_format in subtitle:
2435 if subtitle_format.get('url'):
2436 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2437 if subtitle_format.get('ext') is None:
2438 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2439
2440 automatic_captions = info_dict.get('automatic_captions')
2441 subtitles = info_dict.get('subtitles')
2442
2443 info_dict['requested_subtitles'] = self.process_subtitles(
2444 info_dict['id'], subtitles, automatic_captions)
2445
2446 if info_dict.get('formats') is None:
2447 # There's only one format available
2448 formats = [info_dict]
2449 else:
2450 formats = info_dict['formats']
2451
2452 # or None ensures --clean-infojson removes it
2453 info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
2454 if not self.params.get('allow_unplayable_formats'):
2455 formats = [f for f in formats if not f.get('has_drm')]
2456 if info_dict['_has_drm'] and all(
2457 f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
2458 self.report_warning(
2459 'This video is DRM protected and only images are available for download. '
2460 'Use --list-formats to see them')
2461
2462 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2463 if not get_from_start:
2464 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2465 if info_dict.get('is_live') and formats:
2466 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2467 if get_from_start and not formats:
2468 self.raise_no_formats(info_dict, msg=(
2469 '--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2470 'If you want to download from the current time, use --no-live-from-start'))
2471
2472 if not formats:
2473 self.raise_no_formats(info_dict)
2474
2475 def is_wellformed(f):
2476 url = f.get('url')
2477 if not url:
2478 self.report_warning(
2479 '"url" field is missing or empty - skipping format, '
2480 'there is an error in extractor')
2481 return False
2482 if isinstance(url, bytes):
2483 sanitize_string_field(f, 'url')
2484 return True
2485
2486 # Filter out malformed formats for better extraction robustness
2487 formats = list(filter(is_wellformed, formats))
2488
2489 formats_dict = {}
2490
2491 # We check that all the formats have the format and format_id fields
2492 for i, format in enumerate(formats):
2493 sanitize_string_field(format, 'format_id')
2494 sanitize_numeric_fields(format)
2495 format['url'] = sanitize_url(format['url'])
2496 if not format.get('format_id'):
2497 format['format_id'] = compat_str(i)
2498 else:
2499 # Sanitize format_id from characters used in format selector expression
2500 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2501 format_id = format['format_id']
2502 if format_id not in formats_dict:
2503 formats_dict[format_id] = []
2504 formats_dict[format_id].append(format)
2505
2506 # Make sure all formats have unique format_id
2507 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2508 for format_id, ambiguous_formats in formats_dict.items():
2509 ambigious_id = len(ambiguous_formats) > 1
2510 for i, format in enumerate(ambiguous_formats):
2511 if ambigious_id:
2512 format['format_id'] = '%s-%d' % (format_id, i)
2513 if format.get('ext') is None:
2514 format['ext'] = determine_ext(format['url']).lower()
2515 # Ensure there is no conflict between id and ext in format selection
2516 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2517 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2518 format['format_id'] = 'f%s' % format['format_id']
2519
2520 for i, format in enumerate(formats):
2521 if format.get('format') is None:
2522 format['format'] = '{id} - {res}{note}'.format(
2523 id=format['format_id'],
2524 res=self.format_resolution(format),
2525 note=format_field(format, 'format_note', ' (%s)'),
2526 )
2527 if format.get('protocol') is None:
2528 format['protocol'] = determine_protocol(format)
2529 if format.get('resolution') is None:
2530 format['resolution'] = self.format_resolution(format, default=None)
2531 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2532 format['dynamic_range'] = 'SDR'
2533 if (info_dict.get('duration') and format.get('tbr')
2534 and not format.get('filesize') and not format.get('filesize_approx')):
2535 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2536
2537 # Add HTTP headers, so that external programs can use them from the
2538 # json output
2539 full_format_info = info_dict.copy()
2540 full_format_info.update(format)
2541 format['http_headers'] = self._calc_headers(full_format_info)
2542 # Remove private housekeeping stuff
2543 if '__x_forwarded_for_ip' in info_dict:
2544 del info_dict['__x_forwarded_for_ip']
2545
2546 if self.params.get('check_formats') is True:
2547 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2548
2549 if not formats or formats[0] is not info_dict:
2550 # only set the 'formats' fields if the original info_dict list them
2551 # otherwise we end up with a circular reference, the first (and unique)
2552 # element in the 'formats' field in info_dict is info_dict itself,
2553 # which can't be exported to json
2554 info_dict['formats'] = formats
2555
2556 info_dict, _ = self.pre_process(info_dict)
2557
2558 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2559 return info_dict
2560
2561 self.post_extract(info_dict)
2562 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2563
2564 # The pre-processors may have modified the formats
2565 formats = info_dict.get('formats', [info_dict])
2566
2567 list_only = self.params.get('simulate') is None and (
2568 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2569 interactive_format_selection = not list_only and self.format_selector == '-'
2570 if self.params.get('list_thumbnails'):
2571 self.list_thumbnails(info_dict)
2572 if self.params.get('listsubtitles'):
2573 if 'automatic_captions' in info_dict:
2574 self.list_subtitles(
2575 info_dict['id'], automatic_captions, 'automatic captions')
2576 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2577 if self.params.get('listformats') or interactive_format_selection:
2578 self.list_formats(info_dict)
2579 if list_only:
2580 # Without this printing, -F --print-json will not work
2581 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2582 return
2583
2584 format_selector = self.format_selector
2585 if format_selector is None:
2586 req_format = self._default_format_spec(info_dict, download=download)
2587 self.write_debug('Default format spec: %s' % req_format)
2588 format_selector = self.build_format_selector(req_format)
2589
2590 while True:
2591 if interactive_format_selection:
2592 req_format = input(
2593 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2594 try:
2595 format_selector = self.build_format_selector(req_format)
2596 except SyntaxError as err:
2597 self.report_error(err, tb=False, is_error=False)
2598 continue
2599
2600 formats_to_download = list(format_selector({
2601 'formats': formats,
2602 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats),
2603 'incomplete_formats': (
2604 # All formats are video-only or
2605 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2606 # all formats are audio-only
2607 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)),
2608 }))
2609 if interactive_format_selection and not formats_to_download:
2610 self.report_error('Requested format is not available', tb=False, is_error=False)
2611 continue
2612 break
2613
2614 if not formats_to_download:
2615 if not self.params.get('ignore_no_formats_error'):
2616 raise ExtractorError(
2617 'Requested format is not available. Use --list-formats for a list of available formats',
2618 expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
2619 self.report_warning('Requested format is not available')
2620 # Process what we can, even without any available formats.
2621 formats_to_download = [{}]
2622
2623 best_format = formats_to_download[-1]
2624 if download:
2625 if best_format:
2626 self.to_screen(
2627 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2628 + ', '.join([f['format_id'] for f in formats_to_download]))
2629 max_downloads_reached = False
2630 for i, fmt in enumerate(formats_to_download):
2631 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2632 new_info.update(fmt)
2633 try:
2634 self.process_info(new_info)
2635 except MaxDownloadsReached:
2636 max_downloads_reached = True
2637 # Remove copied info
2638 for key, val in tuple(new_info.items()):
2639 if info_dict.get(key) == val:
2640 new_info.pop(key)
2641 if max_downloads_reached:
2642 break
2643
2644 write_archive = {f.get('__write_download_archive', False) for f in formats_to_download}
2645 assert write_archive.issubset({True, False, 'ignore'})
2646 if True in write_archive and False not in write_archive:
2647 self.record_download_archive(info_dict)
2648
2649 info_dict['requested_downloads'] = formats_to_download
2650 info_dict = self.run_all_pps('after_video', info_dict)
2651 if max_downloads_reached:
2652 raise MaxDownloadsReached()
2653
2654 # We update the info dict with the selected best quality format (backwards compatibility)
2655 info_dict.update(best_format)
2656 return info_dict
2657
2658 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2659 """Select the requested subtitles and their format"""
2660 available_subs, normal_sub_langs = {}, []
2661 if normal_subtitles and self.params.get('writesubtitles'):
2662 available_subs.update(normal_subtitles)
2663 normal_sub_langs = tuple(normal_subtitles.keys())
2664 if automatic_captions and self.params.get('writeautomaticsub'):
2665 for lang, cap_info in automatic_captions.items():
2666 if lang not in available_subs:
2667 available_subs[lang] = cap_info
2668
2669 if (not self.params.get('writesubtitles') and not
2670 self.params.get('writeautomaticsub') or not
2671 available_subs):
2672 return None
2673
2674 all_sub_langs = tuple(available_subs.keys())
2675 if self.params.get('allsubtitles', False):
2676 requested_langs = all_sub_langs
2677 elif self.params.get('subtitleslangs', False):
2678 # A list is used so that the order of languages will be the same as
2679 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2680 requested_langs = []
2681 for lang_re in self.params.get('subtitleslangs'):
2682 discard = lang_re[0] == '-'
2683 if discard:
2684 lang_re = lang_re[1:]
2685 if lang_re == 'all':
2686 if discard:
2687 requested_langs = []
2688 else:
2689 requested_langs.extend(all_sub_langs)
2690 continue
2691 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2692 if discard:
2693 for lang in current_langs:
2694 while lang in requested_langs:
2695 requested_langs.remove(lang)
2696 else:
2697 requested_langs.extend(current_langs)
2698 requested_langs = orderedSet(requested_langs)
2699 elif normal_sub_langs:
2700 requested_langs = ['en'] if 'en' in normal_sub_langs else normal_sub_langs[:1]
2701 else:
2702 requested_langs = ['en'] if 'en' in all_sub_langs else all_sub_langs[:1]
2703 if requested_langs:
2704 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2705
2706 formats_query = self.params.get('subtitlesformat', 'best')
2707 formats_preference = formats_query.split('/') if formats_query else []
2708 subs = {}
2709 for lang in requested_langs:
2710 formats = available_subs.get(lang)
2711 if formats is None:
2712 self.report_warning(f'{lang} subtitles not available for {video_id}')
2713 continue
2714 for ext in formats_preference:
2715 if ext == 'best':
2716 f = formats[-1]
2717 break
2718 matches = list(filter(lambda f: f['ext'] == ext, formats))
2719 if matches:
2720 f = matches[-1]
2721 break
2722 else:
2723 f = formats[-1]
2724 self.report_warning(
2725 'No subtitle format found matching "%s" for language %s, '
2726 'using %s' % (formats_query, lang, f['ext']))
2727 subs[lang] = f
2728 return subs
2729
2730 def _forceprint(self, key, info_dict):
2731 if info_dict is None:
2732 return
2733 info_copy = info_dict.copy()
2734 info_copy['formats_table'] = self.render_formats_table(info_dict)
2735 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2736 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2737 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2738
2739 def format_tmpl(tmpl):
2740 mobj = re.match(r'\w+(=?)$', tmpl)
2741 if mobj and mobj.group(1):
2742 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2743 elif mobj:
2744 return f'%({tmpl})s'
2745 return tmpl
2746
2747 for tmpl in self.params['forceprint'].get(key, []):
2748 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2749
2750 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2751 filename = self.prepare_filename(info_dict, outtmpl=file_tmpl)
2752 tmpl = format_tmpl(tmpl)
2753 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2754 if self._ensure_dir_exists(filename):
2755 with open(filename, 'a', encoding='utf-8') as f:
2756 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2757
2758 def __forced_printings(self, info_dict, filename, incomplete):
2759 def print_mandatory(field, actual_field=None):
2760 if actual_field is None:
2761 actual_field = field
2762 if (self.params.get('force%s' % field, False)
2763 and (not incomplete or info_dict.get(actual_field) is not None)):
2764 self.to_stdout(info_dict[actual_field])
2765
2766 def print_optional(field):
2767 if (self.params.get('force%s' % field, False)
2768 and info_dict.get(field) is not None):
2769 self.to_stdout(info_dict[field])
2770
2771 info_dict = info_dict.copy()
2772 if filename is not None:
2773 info_dict['filename'] = filename
2774 if info_dict.get('requested_formats') is not None:
2775 # For RTMP URLs, also include the playpath
2776 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2777 elif info_dict.get('url'):
2778 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2779
2780 if (self.params.get('forcejson')
2781 or self.params['forceprint'].get('video')
2782 or self.params['print_to_file'].get('video')):
2783 self.post_extract(info_dict)
2784 self._forceprint('video', info_dict)
2785
2786 print_mandatory('title')
2787 print_mandatory('id')
2788 print_mandatory('url', 'urls')
2789 print_optional('thumbnail')
2790 print_optional('description')
2791 print_optional('filename')
2792 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2793 self.to_stdout(formatSeconds(info_dict['duration']))
2794 print_mandatory('format')
2795
2796 if self.params.get('forcejson'):
2797 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2798
2799 def dl(self, name, info, subtitle=False, test=False):
2800 if not info.get('url'):
2801 self.raise_no_formats(info, True)
2802
2803 if test:
2804 verbose = self.params.get('verbose')
2805 params = {
2806 'test': True,
2807 'quiet': self.params.get('quiet') or not verbose,
2808 'verbose': verbose,
2809 'noprogress': not verbose,
2810 'nopart': True,
2811 'skip_unavailable_fragments': False,
2812 'keep_fragments': False,
2813 'overwrites': True,
2814 '_no_ytdl_file': True,
2815 }
2816 else:
2817 params = self.params
2818 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2819 if not test:
2820 for ph in self._progress_hooks:
2821 fd.add_progress_hook(ph)
2822 urls = '", "'.join(
2823 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2824 for f in info.get('requested_formats', []) or [info])
2825 self.write_debug('Invoking downloader on "%s"' % urls)
2826
2827 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2828 # But it may contain objects that are not deep-copyable
2829 new_info = self._copy_infodict(info)
2830 if new_info.get('http_headers') is None:
2831 new_info['http_headers'] = self._calc_headers(new_info)
2832 return fd.download(name, new_info, subtitle)
2833
2834 def existing_file(self, filepaths, *, default_overwrite=True):
2835 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2836 if existing_files and not self.params.get('overwrites', default_overwrite):
2837 return existing_files[0]
2838
2839 for file in existing_files:
2840 self.report_file_delete(file)
2841 os.remove(file)
2842 return None
2843
2844 def process_info(self, info_dict):
2845 """Process a single resolved IE result. (Modifies it in-place)"""
2846
2847 assert info_dict.get('_type', 'video') == 'video'
2848 original_infodict = info_dict
2849
2850 if 'format' not in info_dict and 'ext' in info_dict:
2851 info_dict['format'] = info_dict['ext']
2852
2853 # This is mostly just for backward compatibility of process_info
2854 # As a side-effect, this allows for format-specific filters
2855 if self._match_entry(info_dict) is not None:
2856 info_dict['__write_download_archive'] = 'ignore'
2857 return
2858
2859 # Does nothing under normal operation - for backward compatibility of process_info
2860 self.post_extract(info_dict)
2861 self._num_downloads += 1
2862
2863 # info_dict['_filename'] needs to be set for backward compatibility
2864 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2865 temp_filename = self.prepare_filename(info_dict, 'temp')
2866 files_to_move = {}
2867
2868 # Forced printings
2869 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2870
2871 if self.params.get('simulate'):
2872 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2873 return
2874
2875 if full_filename is None:
2876 return
2877 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2878 return
2879 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2880 return
2881
2882 if self._write_description('video', info_dict,
2883 self.prepare_filename(info_dict, 'description')) is None:
2884 return
2885
2886 sub_files = self._write_subtitles(info_dict, temp_filename)
2887 if sub_files is None:
2888 return
2889 files_to_move.update(dict(sub_files))
2890
2891 thumb_files = self._write_thumbnails(
2892 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2893 if thumb_files is None:
2894 return
2895 files_to_move.update(dict(thumb_files))
2896
2897 infofn = self.prepare_filename(info_dict, 'infojson')
2898 _infojson_written = self._write_info_json('video', info_dict, infofn)
2899 if _infojson_written:
2900 info_dict['infojson_filename'] = infofn
2901 # For backward compatibility, even though it was a private field
2902 info_dict['__infojson_filename'] = infofn
2903 elif _infojson_written is None:
2904 return
2905
2906 # Note: Annotations are deprecated
2907 annofn = None
2908 if self.params.get('writeannotations', False):
2909 annofn = self.prepare_filename(info_dict, 'annotation')
2910 if annofn:
2911 if not self._ensure_dir_exists(encodeFilename(annofn)):
2912 return
2913 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2914 self.to_screen('[info] Video annotations are already present')
2915 elif not info_dict.get('annotations'):
2916 self.report_warning('There are no annotations to write.')
2917 else:
2918 try:
2919 self.to_screen('[info] Writing video annotations to: ' + annofn)
2920 with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2921 annofile.write(info_dict['annotations'])
2922 except (KeyError, TypeError):
2923 self.report_warning('There are no annotations to write.')
2924 except OSError:
2925 self.report_error('Cannot write annotations file: ' + annofn)
2926 return
2927
2928 # Write internet shortcut files
2929 def _write_link_file(link_type):
2930 url = try_get(info_dict['webpage_url'], iri_to_uri)
2931 if not url:
2932 self.report_warning(
2933 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2934 return True
2935 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2936 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2937 return False
2938 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2939 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2940 return True
2941 try:
2942 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2943 with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2944 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2945 template_vars = {'url': url}
2946 if link_type == 'desktop':
2947 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2948 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2949 except OSError:
2950 self.report_error(f'Cannot write internet shortcut {linkfn}')
2951 return False
2952 return True
2953
2954 write_links = {
2955 'url': self.params.get('writeurllink'),
2956 'webloc': self.params.get('writewebloclink'),
2957 'desktop': self.params.get('writedesktoplink'),
2958 }
2959 if self.params.get('writelink'):
2960 link_type = ('webloc' if sys.platform == 'darwin'
2961 else 'desktop' if sys.platform.startswith('linux')
2962 else 'url')
2963 write_links[link_type] = True
2964
2965 if any(should_write and not _write_link_file(link_type)
2966 for link_type, should_write in write_links.items()):
2967 return
2968
2969 def replace_info_dict(new_info):
2970 nonlocal info_dict
2971 if new_info == info_dict:
2972 return
2973 info_dict.clear()
2974 info_dict.update(new_info)
2975
2976 try:
2977 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2978 replace_info_dict(new_info)
2979 except PostProcessingError as err:
2980 self.report_error('Preprocessing: %s' % str(err))
2981 return
2982
2983 if self.params.get('skip_download'):
2984 info_dict['filepath'] = temp_filename
2985 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2986 info_dict['__files_to_move'] = files_to_move
2987 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2988 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2989 else:
2990 # Download
2991 info_dict.setdefault('__postprocessors', [])
2992 try:
2993
2994 def existing_video_file(*filepaths):
2995 ext = info_dict.get('ext')
2996 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2997 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2998 default_overwrite=False)
2999 if file:
3000 info_dict['ext'] = os.path.splitext(file)[1][1:]
3001 return file
3002
3003 success = True
3004 if info_dict.get('requested_formats') is not None:
3005
3006 def compatible_formats(formats):
3007 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3008 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3009 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3010 if len(video_formats) > 2 or len(audio_formats) > 2:
3011 return False
3012
3013 # Check extension
3014 exts = {format.get('ext') for format in formats}
3015 COMPATIBLE_EXTS = (
3016 {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'},
3017 {'webm'},
3018 )
3019 for ext_sets in COMPATIBLE_EXTS:
3020 if ext_sets.issuperset(exts):
3021 return True
3022 # TODO: Check acodec/vcodec
3023 return False
3024
3025 requested_formats = info_dict['requested_formats']
3026 old_ext = info_dict['ext']
3027 if self.params.get('merge_output_format') is None:
3028 if not compatible_formats(requested_formats):
3029 info_dict['ext'] = 'mkv'
3030 self.report_warning(
3031 'Requested formats are incompatible for merge and will be merged into mkv')
3032 if (info_dict['ext'] == 'webm'
3033 and info_dict.get('thumbnails')
3034 # check with type instead of pp_key, __name__, or isinstance
3035 # since we dont want any custom PPs to trigger this
3036 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3037 info_dict['ext'] = 'mkv'
3038 self.report_warning(
3039 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3040 new_ext = info_dict['ext']
3041
3042 def correct_ext(filename, ext=new_ext):
3043 if filename == '-':
3044 return filename
3045 filename_real_ext = os.path.splitext(filename)[1][1:]
3046 filename_wo_ext = (
3047 os.path.splitext(filename)[0]
3048 if filename_real_ext in (old_ext, new_ext)
3049 else filename)
3050 return f'{filename_wo_ext}.{ext}'
3051
3052 # Ensure filename always has a correct extension for successful merge
3053 full_filename = correct_ext(full_filename)
3054 temp_filename = correct_ext(temp_filename)
3055 dl_filename = existing_video_file(full_filename, temp_filename)
3056 info_dict['__real_download'] = False
3057
3058 downloaded = []
3059 merger = FFmpegMergerPP(self)
3060
3061 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3062 if dl_filename is not None:
3063 self.report_file_already_downloaded(dl_filename)
3064 elif fd:
3065 for f in requested_formats if fd != FFmpegFD else []:
3066 f['filepath'] = fname = prepend_extension(
3067 correct_ext(temp_filename, info_dict['ext']),
3068 'f%s' % f['format_id'], info_dict['ext'])
3069 downloaded.append(fname)
3070 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3071 success, real_download = self.dl(temp_filename, info_dict)
3072 info_dict['__real_download'] = real_download
3073 else:
3074 if self.params.get('allow_unplayable_formats'):
3075 self.report_warning(
3076 'You have requested merging of multiple formats '
3077 'while also allowing unplayable formats to be downloaded. '
3078 'The formats won\'t be merged to prevent data corruption.')
3079 elif not merger.available:
3080 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3081 if not self.params.get('ignoreerrors'):
3082 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3083 return
3084 self.report_warning(f'{msg}. The formats won\'t be merged')
3085
3086 if temp_filename == '-':
3087 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3088 else 'but the formats are incompatible for simultaneous download' if merger.available
3089 else 'but ffmpeg is not installed')
3090 self.report_warning(
3091 f'You have requested downloading multiple formats to stdout {reason}. '
3092 'The formats will be streamed one after the other')
3093 fname = temp_filename
3094 for f in requested_formats:
3095 new_info = dict(info_dict)
3096 del new_info['requested_formats']
3097 new_info.update(f)
3098 if temp_filename != '-':
3099 fname = prepend_extension(
3100 correct_ext(temp_filename, new_info['ext']),
3101 'f%s' % f['format_id'], new_info['ext'])
3102 if not self._ensure_dir_exists(fname):
3103 return
3104 f['filepath'] = fname
3105 downloaded.append(fname)
3106 partial_success, real_download = self.dl(fname, new_info)
3107 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3108 success = success and partial_success
3109
3110 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3111 info_dict['__postprocessors'].append(merger)
3112 info_dict['__files_to_merge'] = downloaded
3113 # Even if there were no downloads, it is being merged only now
3114 info_dict['__real_download'] = True
3115 else:
3116 for file in downloaded:
3117 files_to_move[file] = None
3118 else:
3119 # Just a single file
3120 dl_filename = existing_video_file(full_filename, temp_filename)
3121 if dl_filename is None or dl_filename == temp_filename:
3122 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3123 # So we should try to resume the download
3124 success, real_download = self.dl(temp_filename, info_dict)
3125 info_dict['__real_download'] = real_download
3126 else:
3127 self.report_file_already_downloaded(dl_filename)
3128
3129 dl_filename = dl_filename or temp_filename
3130 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3131
3132 except network_exceptions as err:
3133 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3134 return
3135 except OSError as err:
3136 raise UnavailableVideoError(err)
3137 except (ContentTooShortError, ) as err:
3138 self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})')
3139 return
3140
3141 if success and full_filename != '-':
3142
3143 def fixup():
3144 do_fixup = True
3145 fixup_policy = self.params.get('fixup')
3146 vid = info_dict['id']
3147
3148 if fixup_policy in ('ignore', 'never'):
3149 return
3150 elif fixup_policy == 'warn':
3151 do_fixup = False
3152 elif fixup_policy != 'force':
3153 assert fixup_policy in ('detect_or_warn', None)
3154 if not info_dict.get('__real_download'):
3155 do_fixup = False
3156
3157 def ffmpeg_fixup(cndn, msg, cls):
3158 if not cndn:
3159 return
3160 if not do_fixup:
3161 self.report_warning(f'{vid}: {msg}')
3162 return
3163 pp = cls(self)
3164 if pp.available:
3165 info_dict['__postprocessors'].append(pp)
3166 else:
3167 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3168
3169 stretched_ratio = info_dict.get('stretched_ratio')
3170 ffmpeg_fixup(
3171 stretched_ratio not in (1, None),
3172 f'Non-uniform pixel ratio {stretched_ratio}',
3173 FFmpegFixupStretchedPP)
3174
3175 ffmpeg_fixup(
3176 (info_dict.get('requested_formats') is None
3177 and info_dict.get('container') == 'm4a_dash'
3178 and info_dict.get('ext') == 'm4a'),
3179 'writing DASH m4a. Only some players support this container',
3180 FFmpegFixupM4aPP)
3181
3182 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3183 downloader = downloader.__name__ if downloader else None
3184
3185 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3186 ffmpeg_fixup(downloader == 'HlsFD',
3187 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3188 FFmpegFixupM3u8PP)
3189 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3190 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3191
3192 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3193 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3194
3195 fixup()
3196 try:
3197 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3198 except PostProcessingError as err:
3199 self.report_error('Postprocessing: %s' % str(err))
3200 return
3201 try:
3202 for ph in self._post_hooks:
3203 ph(info_dict['filepath'])
3204 except Exception as err:
3205 self.report_error('post hooks: %s' % str(err))
3206 return
3207 info_dict['__write_download_archive'] = True
3208
3209 if self.params.get('force_write_download_archive'):
3210 info_dict['__write_download_archive'] = True
3211
3212 # Make sure the info_dict was modified in-place
3213 assert info_dict is original_infodict
3214
3215 max_downloads = self.params.get('max_downloads')
3216 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3217 raise MaxDownloadsReached()
3218
3219 def __download_wrapper(self, func):
3220 @functools.wraps(func)
3221 def wrapper(*args, **kwargs):
3222 try:
3223 res = func(*args, **kwargs)
3224 except UnavailableVideoError as e:
3225 self.report_error(e)
3226 except MaxDownloadsReached as e:
3227 self.to_screen(f'[info] {e}')
3228 raise
3229 except DownloadCancelled as e:
3230 self.to_screen(f'[info] {e}')
3231 if not self.params.get('break_per_url'):
3232 raise
3233 else:
3234 if self.params.get('dump_single_json', False):
3235 self.post_extract(res)
3236 self.to_stdout(json.dumps(self.sanitize_info(res)))
3237 return wrapper
3238
3239 def download(self, url_list):
3240 """Download a given list of URLs."""
3241 url_list = variadic(url_list) # Passing a single URL is a common mistake
3242 outtmpl = self.outtmpl_dict['default']
3243 if (len(url_list) > 1
3244 and outtmpl != '-'
3245 and '%' not in outtmpl
3246 and self.params.get('max_downloads') != 1):
3247 raise SameFileError(outtmpl)
3248
3249 for url in url_list:
3250 self.__download_wrapper(self.extract_info)(
3251 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3252
3253 return self._download_retcode
3254
3255 def download_with_info_file(self, info_filename):
3256 with contextlib.closing(fileinput.FileInput(
3257 [info_filename], mode='r',
3258 openhook=fileinput.hook_encoded('utf-8'))) as f:
3259 # FileInput doesn't have a read method, we can't call json.load
3260 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3261 try:
3262 self.__download_wrapper(self.process_ie_result)(info, download=True)
3263 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3264 if not isinstance(e, EntryNotInPlaylist):
3265 self.to_stderr('\r')
3266 webpage_url = info.get('webpage_url')
3267 if webpage_url is not None:
3268 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3269 return self.download([webpage_url])
3270 else:
3271 raise
3272 return self._download_retcode
3273
3274 @staticmethod
3275 def sanitize_info(info_dict, remove_private_keys=False):
3276 ''' Sanitize the infodict for converting to json '''
3277 if info_dict is None:
3278 return info_dict
3279 info_dict.setdefault('epoch', int(time.time()))
3280 info_dict.setdefault('_type', 'video')
3281
3282 if remove_private_keys:
3283 reject = lambda k, v: v is None or k.startswith('__') or k in {
3284 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3285 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
3286 }
3287 else:
3288 reject = lambda k, v: False
3289
3290 def filter_fn(obj):
3291 if isinstance(obj, dict):
3292 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3293 elif isinstance(obj, (list, tuple, set, LazyList)):
3294 return list(map(filter_fn, obj))
3295 elif obj is None or isinstance(obj, (str, int, float, bool)):
3296 return obj
3297 else:
3298 return repr(obj)
3299
3300 return filter_fn(info_dict)
3301
3302 @staticmethod
3303 def filter_requested_info(info_dict, actually_filter=True):
3304 ''' Alias of sanitize_info for backward compatibility '''
3305 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3306
3307 @staticmethod
3308 def post_extract(info_dict):
3309 def actual_post_extract(info_dict):
3310 if info_dict.get('_type') in ('playlist', 'multi_video'):
3311 for video_dict in info_dict.get('entries', {}):
3312 actual_post_extract(video_dict or {})
3313 return
3314
3315 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3316 info_dict.update(post_extractor())
3317
3318 actual_post_extract(info_dict or {})
3319
3320 def run_pp(self, pp, infodict):
3321 files_to_delete = []
3322 if '__files_to_move' not in infodict:
3323 infodict['__files_to_move'] = {}
3324 try:
3325 files_to_delete, infodict = pp.run(infodict)
3326 except PostProcessingError as e:
3327 # Must be True and not 'only_download'
3328 if self.params.get('ignoreerrors') is True:
3329 self.report_error(e)
3330 return infodict
3331 raise
3332
3333 if not files_to_delete:
3334 return infodict
3335 if self.params.get('keepvideo', False):
3336 for f in files_to_delete:
3337 infodict['__files_to_move'].setdefault(f, '')
3338 else:
3339 for old_filename in set(files_to_delete):
3340 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3341 try:
3342 os.remove(encodeFilename(old_filename))
3343 except OSError:
3344 self.report_warning('Unable to remove downloaded original file')
3345 if old_filename in infodict['__files_to_move']:
3346 del infodict['__files_to_move'][old_filename]
3347 return infodict
3348
3349 def run_all_pps(self, key, info, *, additional_pps=None):
3350 self._forceprint(key, info)
3351 for pp in (additional_pps or []) + self._pps[key]:
3352 info = self.run_pp(pp, info)
3353 return info
3354
3355 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3356 info = dict(ie_info)
3357 info['__files_to_move'] = files_to_move or {}
3358 info = self.run_all_pps(key, info)
3359 return info, info.pop('__files_to_move', None)
3360
3361 def post_process(self, filename, info, files_to_move=None):
3362 """Run all the postprocessors on the given file."""
3363 info['filepath'] = filename
3364 info['__files_to_move'] = files_to_move or {}
3365 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3366 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3367 del info['__files_to_move']
3368 return self.run_all_pps('after_move', info)
3369
3370 def _make_archive_id(self, info_dict):
3371 video_id = info_dict.get('id')
3372 if not video_id:
3373 return
3374 # Future-proof against any change in case
3375 # and backwards compatibility with prior versions
3376 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3377 if extractor is None:
3378 url = str_or_none(info_dict.get('url'))
3379 if not url:
3380 return
3381 # Try to find matching extractor for the URL and take its ie_key
3382 for ie_key, ie in self._ies.items():
3383 if ie.suitable(url):
3384 extractor = ie_key
3385 break
3386 else:
3387 return
3388 return f'{extractor.lower()} {video_id}'
3389
3390 def in_download_archive(self, info_dict):
3391 fn = self.params.get('download_archive')
3392 if fn is None:
3393 return False
3394
3395 vid_id = self._make_archive_id(info_dict)
3396 if not vid_id:
3397 return False # Incomplete video information
3398
3399 return vid_id in self.archive
3400
3401 def record_download_archive(self, info_dict):
3402 fn = self.params.get('download_archive')
3403 if fn is None:
3404 return
3405 vid_id = self._make_archive_id(info_dict)
3406 assert vid_id
3407 self.write_debug(f'Adding to archive: {vid_id}')
3408 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3409 archive_file.write(vid_id + '\n')
3410 self.archive.add(vid_id)
3411
3412 @staticmethod
3413 def format_resolution(format, default='unknown'):
3414 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3415 return 'audio only'
3416 if format.get('resolution') is not None:
3417 return format['resolution']
3418 if format.get('width') and format.get('height'):
3419 return '%dx%d' % (format['width'], format['height'])
3420 elif format.get('height'):
3421 return '%sp' % format['height']
3422 elif format.get('width'):
3423 return '%dx?' % format['width']
3424 return default
3425
3426 def _list_format_headers(self, *headers):
3427 if self.params.get('listformats_table', True) is not False:
3428 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3429 return headers
3430
3431 def _format_note(self, fdict):
3432 res = ''
3433 if fdict.get('ext') in ['f4f', 'f4m']:
3434 res += '(unsupported)'
3435 if fdict.get('language'):
3436 if res:
3437 res += ' '
3438 res += '[%s]' % fdict['language']
3439 if fdict.get('format_note') is not None:
3440 if res:
3441 res += ' '
3442 res += fdict['format_note']
3443 if fdict.get('tbr') is not None:
3444 if res:
3445 res += ', '
3446 res += '%4dk' % fdict['tbr']
3447 if fdict.get('container') is not None:
3448 if res:
3449 res += ', '
3450 res += '%s container' % fdict['container']
3451 if (fdict.get('vcodec') is not None
3452 and fdict.get('vcodec') != 'none'):
3453 if res:
3454 res += ', '
3455 res += fdict['vcodec']
3456 if fdict.get('vbr') is not None:
3457 res += '@'
3458 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3459 res += 'video@'
3460 if fdict.get('vbr') is not None:
3461 res += '%4dk' % fdict['vbr']
3462 if fdict.get('fps') is not None:
3463 if res:
3464 res += ', '
3465 res += '%sfps' % fdict['fps']
3466 if fdict.get('acodec') is not None:
3467 if res:
3468 res += ', '
3469 if fdict['acodec'] == 'none':
3470 res += 'video only'
3471 else:
3472 res += '%-5s' % fdict['acodec']
3473 elif fdict.get('abr') is not None:
3474 if res:
3475 res += ', '
3476 res += 'audio'
3477 if fdict.get('abr') is not None:
3478 res += '@%3dk' % fdict['abr']
3479 if fdict.get('asr') is not None:
3480 res += ' (%5dHz)' % fdict['asr']
3481 if fdict.get('filesize') is not None:
3482 if res:
3483 res += ', '
3484 res += format_bytes(fdict['filesize'])
3485 elif fdict.get('filesize_approx') is not None:
3486 if res:
3487 res += ', '
3488 res += '~' + format_bytes(fdict['filesize_approx'])
3489 return res
3490
3491 def render_formats_table(self, info_dict):
3492 if not info_dict.get('formats') and not info_dict.get('url'):
3493 return None
3494
3495 formats = info_dict.get('formats', [info_dict])
3496 if not self.params.get('listformats_table', True) is not False:
3497 table = [
3498 [
3499 format_field(f, 'format_id'),
3500 format_field(f, 'ext'),
3501 self.format_resolution(f),
3502 self._format_note(f)
3503 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3504 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3505
3506 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3507 table = [
3508 [
3509 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3510 format_field(f, 'ext'),
3511 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3512 format_field(f, 'fps', '\t%d'),
3513 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3514 delim,
3515 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3516 format_field(f, 'tbr', '\t%dk'),
3517 shorten_protocol_name(f.get('protocol', '')),
3518 delim,
3519 format_field(f, 'vcodec', default='unknown').replace(
3520 'none', 'images' if f.get('acodec') == 'none'
3521 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3522 format_field(f, 'vbr', '\t%dk'),
3523 format_field(f, 'acodec', default='unknown').replace(
3524 'none', '' if f.get('vcodec') == 'none'
3525 else self._format_screen('video only', self.Styles.SUPPRESS)),
3526 format_field(f, 'abr', '\t%dk'),
3527 format_field(f, 'asr', '\t%dHz'),
3528 join_nonempty(
3529 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3530 format_field(f, 'language', '[%s]'),
3531 join_nonempty(format_field(f, 'format_note'),
3532 format_field(f, 'container', ignore=(None, f.get('ext'))),
3533 delim=', '),
3534 delim=' '),
3535 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3536 header_line = self._list_format_headers(
3537 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3538 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3539
3540 return render_table(
3541 header_line, table, hide_empty=True,
3542 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3543
3544 def render_thumbnails_table(self, info_dict):
3545 thumbnails = list(info_dict.get('thumbnails') or [])
3546 if not thumbnails:
3547 return None
3548 return render_table(
3549 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3550 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3551
3552 def render_subtitles_table(self, video_id, subtitles):
3553 def _row(lang, formats):
3554 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3555 if len(set(names)) == 1:
3556 names = [] if names[0] == 'unknown' else names[:1]
3557 return [lang, ', '.join(names), ', '.join(exts)]
3558
3559 if not subtitles:
3560 return None
3561 return render_table(
3562 self._list_format_headers('Language', 'Name', 'Formats'),
3563 [_row(lang, formats) for lang, formats in subtitles.items()],
3564 hide_empty=True)
3565
3566 def __list_table(self, video_id, name, func, *args):
3567 table = func(*args)
3568 if not table:
3569 self.to_screen(f'{video_id} has no {name}')
3570 return
3571 self.to_screen(f'[info] Available {name} for {video_id}:')
3572 self.to_stdout(table)
3573
3574 def list_formats(self, info_dict):
3575 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3576
3577 def list_thumbnails(self, info_dict):
3578 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3579
3580 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3581 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3582
3583 def urlopen(self, req):
3584 """ Start an HTTP download """
3585 if isinstance(req, str):
3586 req = sanitized_Request(req)
3587 return self._opener.open(req, timeout=self._socket_timeout)
3588
3589 def print_debug_header(self):
3590 if not self.params.get('verbose'):
3591 return
3592
3593 def get_encoding(stream):
3594 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3595 if not supports_terminal_sequences(stream):
3596 from .compat import WINDOWS_VT_MODE # Must be imported locally
3597 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3598 return ret
3599
3600 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3601 locale.getpreferredencoding(),
3602 sys.getfilesystemencoding(),
3603 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
3604 self.get_encoding())
3605
3606 logger = self.params.get('logger')
3607 if logger:
3608 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3609 write_debug(encoding_str)
3610 else:
3611 write_string(f'[debug] {encoding_str}\n', encoding=None)
3612 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3613
3614 source = detect_variant()
3615 write_debug(join_nonempty(
3616 'yt-dlp version', __version__,
3617 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3618 '' if source == 'unknown' else f'({source})',
3619 delim=' '))
3620 if not _LAZY_LOADER:
3621 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3622 write_debug('Lazy loading extractors is forcibly disabled')
3623 else:
3624 write_debug('Lazy loading extractors is disabled')
3625 if plugin_extractors or plugin_postprocessors:
3626 write_debug('Plugins: %s' % [
3627 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3628 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3629 if self.params.get('compat_opts'):
3630 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3631
3632 if source == 'source':
3633 try:
3634 sp = Popen(
3635 ['git', 'rev-parse', '--short', 'HEAD'],
3636 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3637 cwd=os.path.dirname(os.path.abspath(__file__)))
3638 out, err = sp.communicate_or_kill()
3639 out = out.decode().strip()
3640 if re.match('[0-9a-f]+', out):
3641 write_debug('Git HEAD: %s' % out)
3642 except Exception:
3643 with contextlib.suppress(Exception):
3644 sys.exc_clear()
3645
3646 def python_implementation():
3647 impl_name = platform.python_implementation()
3648 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3649 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3650 return impl_name
3651
3652 write_debug('Python version %s (%s %s) - %s' % (
3653 platform.python_version(),
3654 python_implementation(),
3655 platform.architecture()[0],
3656 platform_name()))
3657
3658 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3659 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3660 if ffmpeg_features:
3661 exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
3662
3663 exe_versions['rtmpdump'] = rtmpdump_version()
3664 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3665 exe_str = ', '.join(
3666 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3667 ) or 'none'
3668 write_debug('exe versions: %s' % exe_str)
3669
3670 from .dependencies import available_dependencies
3671
3672 write_debug('Optional libraries: %s' % (', '.join(sorted({
3673 module.__name__.split('.')[0] for module in available_dependencies.values()
3674 })) or 'none'))
3675
3676 self._setup_opener()
3677 proxy_map = {}
3678 for handler in self._opener.handlers:
3679 if hasattr(handler, 'proxies'):
3680 proxy_map.update(handler.proxies)
3681 write_debug(f'Proxy map: {proxy_map}')
3682
3683 # Not implemented
3684 if False and self.params.get('call_home'):
3685 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3686 write_debug('Public IP address: %s' % ipaddr)
3687 latest_version = self.urlopen(
3688 'https://yt-dl.org/latest/version').read().decode('utf-8')
3689 if version_tuple(latest_version) > version_tuple(__version__):
3690 self.report_warning(
3691 'You are using an outdated version (newest version: %s)! '
3692 'See https://yt-dl.org/update if you need help updating.' %
3693 latest_version)
3694
3695 def _setup_opener(self):
3696 if hasattr(self, '_opener'):
3697 return
3698 timeout_val = self.params.get('socket_timeout')
3699 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3700
3701 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3702 opts_cookiefile = self.params.get('cookiefile')
3703 opts_proxy = self.params.get('proxy')
3704
3705 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3706
3707 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3708 if opts_proxy is not None:
3709 if opts_proxy == '':
3710 proxies = {}
3711 else:
3712 proxies = {'http': opts_proxy, 'https': opts_proxy}
3713 else:
3714 proxies = compat_urllib_request.getproxies()
3715 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3716 if 'http' in proxies and 'https' not in proxies:
3717 proxies['https'] = proxies['http']
3718 proxy_handler = PerRequestProxyHandler(proxies)
3719
3720 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3721 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3722 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3723 redirect_handler = YoutubeDLRedirectHandler()
3724 data_handler = urllib.request.DataHandler()
3725
3726 # When passing our own FileHandler instance, build_opener won't add the
3727 # default FileHandler and allows us to disable the file protocol, which
3728 # can be used for malicious purposes (see
3729 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3730 file_handler = compat_urllib_request.FileHandler()
3731
3732 def file_open(*args, **kwargs):
3733 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3734 file_handler.file_open = file_open
3735
3736 opener = compat_urllib_request.build_opener(
3737 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3738
3739 # Delete the default user-agent header, which would otherwise apply in
3740 # cases where our custom HTTP handler doesn't come into play
3741 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3742 opener.addheaders = []
3743 self._opener = opener
3744
3745 def encode(self, s):
3746 if isinstance(s, bytes):
3747 return s # Already encoded
3748
3749 try:
3750 return s.encode(self.get_encoding())
3751 except UnicodeEncodeError as err:
3752 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3753 raise
3754
3755 def get_encoding(self):
3756 encoding = self.params.get('encoding')
3757 if encoding is None:
3758 encoding = preferredencoding()
3759 return encoding
3760
3761 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3762 ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
3763 if overwrite is None:
3764 overwrite = self.params.get('overwrites', True)
3765 if not self.params.get('writeinfojson'):
3766 return False
3767 elif not infofn:
3768 self.write_debug(f'Skipping writing {label} infojson')
3769 return False
3770 elif not self._ensure_dir_exists(infofn):
3771 return None
3772 elif not overwrite and os.path.exists(infofn):
3773 self.to_screen(f'[info] {label.title()} metadata is already present')
3774 return 'exists'
3775
3776 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3777 try:
3778 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3779 return True
3780 except OSError:
3781 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3782 return None
3783
3784 def _write_description(self, label, ie_result, descfn):
3785 ''' Write description and returns True = written, False = skip, None = error '''
3786 if not self.params.get('writedescription'):
3787 return False
3788 elif not descfn:
3789 self.write_debug(f'Skipping writing {label} description')
3790 return False
3791 elif not self._ensure_dir_exists(descfn):
3792 return None
3793 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3794 self.to_screen(f'[info] {label.title()} description is already present')
3795 elif ie_result.get('description') is None:
3796 self.report_warning(f'There\'s no {label} description to write')
3797 return False
3798 else:
3799 try:
3800 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3801 with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3802 descfile.write(ie_result['description'])
3803 except OSError:
3804 self.report_error(f'Cannot write {label} description file {descfn}')
3805 return None
3806 return True
3807
3808 def _write_subtitles(self, info_dict, filename):
3809 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3810 ret = []
3811 subtitles = info_dict.get('requested_subtitles')
3812 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3813 # subtitles download errors are already managed as troubles in relevant IE
3814 # that way it will silently go on when used with unsupporting IE
3815 return ret
3816
3817 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3818 if not sub_filename_base:
3819 self.to_screen('[info] Skipping writing video subtitles')
3820 return ret
3821 for sub_lang, sub_info in subtitles.items():
3822 sub_format = sub_info['ext']
3823 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3824 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3825 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3826 if existing_sub:
3827 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3828 sub_info['filepath'] = existing_sub
3829 ret.append((existing_sub, sub_filename_final))
3830 continue
3831
3832 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3833 if sub_info.get('data') is not None:
3834 try:
3835 # Use newline='' to prevent conversion of newline characters
3836 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3837 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3838 subfile.write(sub_info['data'])
3839 sub_info['filepath'] = sub_filename
3840 ret.append((sub_filename, sub_filename_final))
3841 continue
3842 except OSError:
3843 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3844 return None
3845
3846 try:
3847 sub_copy = sub_info.copy()
3848 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3849 self.dl(sub_filename, sub_copy, subtitle=True)
3850 sub_info['filepath'] = sub_filename
3851 ret.append((sub_filename, sub_filename_final))
3852 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3853 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3854 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3855 if not self.params.get('ignoreerrors'):
3856 self.report_error(msg)
3857 raise DownloadError(msg)
3858 self.report_warning(msg)
3859 return ret
3860
3861 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3862 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3863 write_all = self.params.get('write_all_thumbnails', False)
3864 thumbnails, ret = [], []
3865 if write_all or self.params.get('writethumbnail', False):
3866 thumbnails = info_dict.get('thumbnails') or []
3867 multiple = write_all and len(thumbnails) > 1
3868
3869 if thumb_filename_base is None:
3870 thumb_filename_base = filename
3871 if thumbnails and not thumb_filename_base:
3872 self.write_debug(f'Skipping writing {label} thumbnail')
3873 return ret
3874
3875 for idx, t in list(enumerate(thumbnails))[::-1]:
3876 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3877 thumb_display_id = f'{label} thumbnail {t["id"]}'
3878 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3879 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3880
3881 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3882 if existing_thumb:
3883 self.to_screen('[info] %s is already present' % (
3884 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3885 t['filepath'] = existing_thumb
3886 ret.append((existing_thumb, thumb_filename_final))
3887 else:
3888 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3889 try:
3890 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3891 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3892 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3893 shutil.copyfileobj(uf, thumbf)
3894 ret.append((thumb_filename, thumb_filename_final))
3895 t['filepath'] = thumb_filename
3896 except network_exceptions as err:
3897 thumbnails.pop(idx)
3898 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3899 if ret and not write_all:
3900 break
3901 return ret