]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
478bdaccaf4cc8bd7bb879694a100f9085727adc
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import functools
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28 import unicodedata
29
30 from enum import Enum
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_brotli,
36 compat_get_terminal_size,
37 compat_kwargs,
38 compat_numeric_types,
39 compat_os_name,
40 compat_pycrypto_AES,
41 compat_shlex_quote,
42 compat_str,
43 compat_tokenize_tokenize,
44 compat_urllib_error,
45 compat_urllib_request,
46 compat_urllib_request_DataHandler,
47 windows_enable_vt_mode,
48 )
49 from .cookies import load_cookies
50 from .utils import (
51 age_restricted,
52 args_to_str,
53 ContentTooShortError,
54 date_from_str,
55 DateRange,
56 DEFAULT_OUTTMPL,
57 determine_ext,
58 determine_protocol,
59 DownloadCancelled,
60 DownloadError,
61 encode_compat_str,
62 encodeFilename,
63 EntryNotInPlaylist,
64 error_to_compat_str,
65 ExistingVideoReached,
66 expand_path,
67 ExtractorError,
68 float_or_none,
69 format_bytes,
70 format_field,
71 format_decimal_suffix,
72 formatSeconds,
73 GeoRestrictedError,
74 get_domain,
75 has_certifi,
76 HEADRequest,
77 InAdvancePagedList,
78 int_or_none,
79 iri_to_uri,
80 ISO3166Utils,
81 join_nonempty,
82 LazyList,
83 LINK_TEMPLATES,
84 locked_file,
85 make_dir,
86 make_HTTPS_handler,
87 MaxDownloadsReached,
88 merge_headers,
89 network_exceptions,
90 number_of_digits,
91 orderedSet,
92 OUTTMPL_TYPES,
93 PagedList,
94 parse_filesize,
95 PerRequestProxyHandler,
96 platform_name,
97 Popen,
98 POSTPROCESS_WHEN,
99 PostProcessingError,
100 preferredencoding,
101 prepend_extension,
102 ReExtractInfo,
103 register_socks_protocols,
104 RejectedVideoReached,
105 remove_terminal_sequences,
106 render_table,
107 replace_extension,
108 SameFileError,
109 sanitize_filename,
110 sanitize_path,
111 sanitize_url,
112 sanitized_Request,
113 std_headers,
114 STR_FORMAT_RE_TMPL,
115 STR_FORMAT_TYPES,
116 str_or_none,
117 strftime_or_none,
118 subtitles_filename,
119 supports_terminal_sequences,
120 timetuple_from_msec,
121 to_high_limit_path,
122 traverse_obj,
123 try_get,
124 UnavailableVideoError,
125 url_basename,
126 variadic,
127 version_tuple,
128 write_json_file,
129 write_string,
130 YoutubeDLCookieProcessor,
131 YoutubeDLHandler,
132 YoutubeDLRedirectHandler,
133 )
134 from .cache import Cache
135 from .minicurses import format_text
136 from .extractor import (
137 gen_extractor_classes,
138 get_info_extractor,
139 _LAZY_LOADER,
140 _PLUGIN_CLASSES as plugin_extractors
141 )
142 from .extractor.openload import PhantomJSwrapper
143 from .downloader import (
144 FFmpegFD,
145 get_suitable_downloader,
146 shorten_protocol_name
147 )
148 from .downloader.rtmp import rtmpdump_version
149 from .postprocessor import (
150 get_postprocessor,
151 EmbedThumbnailPP,
152 FFmpegFixupDuplicateMoovPP,
153 FFmpegFixupDurationPP,
154 FFmpegFixupM3u8PP,
155 FFmpegFixupM4aPP,
156 FFmpegFixupStretchedPP,
157 FFmpegFixupTimestampPP,
158 FFmpegMergerPP,
159 FFmpegPostProcessor,
160 MoveFilesAfterDownloadPP,
161 _PLUGIN_CLASSES as plugin_postprocessors
162 )
163 from .update import detect_variant
164 from .version import __version__, RELEASE_GIT_HEAD
165
166 if compat_os_name == 'nt':
167 import ctypes
168
169
170 class YoutubeDL(object):
171 """YoutubeDL class.
172
173 YoutubeDL objects are the ones responsible of downloading the
174 actual video file and writing it to disk if the user has requested
175 it, among some other tasks. In most cases there should be one per
176 program. As, given a video URL, the downloader doesn't know how to
177 extract all the needed information, task that InfoExtractors do, it
178 has to pass the URL to one of them.
179
180 For this, YoutubeDL objects have a method that allows
181 InfoExtractors to be registered in a given order. When it is passed
182 a URL, the YoutubeDL object handles it to the first InfoExtractor it
183 finds that reports being able to handle it. The InfoExtractor extracts
184 all the information about the video or videos the URL refers to, and
185 YoutubeDL process the extracted information, possibly using a File
186 Downloader to download the video.
187
188 YoutubeDL objects accept a lot of parameters. In order not to saturate
189 the object constructor with arguments, it receives a dictionary of
190 options instead. These options are available through the params
191 attribute for the InfoExtractors to use. The YoutubeDL also
192 registers itself as the downloader in charge for the InfoExtractors
193 that are added to it, so this is a "mutual registration".
194
195 Available options:
196
197 username: Username for authentication purposes.
198 password: Password for authentication purposes.
199 videopassword: Password for accessing a video.
200 ap_mso: Adobe Pass multiple-system operator identifier.
201 ap_username: Multiple-system operator account username.
202 ap_password: Multiple-system operator account password.
203 usenetrc: Use netrc for authentication instead.
204 verbose: Print additional info to stdout.
205 quiet: Do not print messages to stdout.
206 no_warnings: Do not print out anything for warnings.
207 forceprint: A dict with keys WHEN mapped to a list of templates to
208 print to stdout. The allowed keys are video or any of the
209 items in utils.POSTPROCESS_WHEN.
210 For compatibility, a single list is also accepted
211 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
212 a list of tuples with (template, filename)
213 forceurl: Force printing final URL. (Deprecated)
214 forcetitle: Force printing title. (Deprecated)
215 forceid: Force printing ID. (Deprecated)
216 forcethumbnail: Force printing thumbnail URL. (Deprecated)
217 forcedescription: Force printing description. (Deprecated)
218 forcefilename: Force printing final filename. (Deprecated)
219 forceduration: Force printing duration. (Deprecated)
220 forcejson: Force printing info_dict as JSON.
221 dump_single_json: Force printing the info_dict of the whole playlist
222 (or video) as a single JSON line.
223 force_write_download_archive: Force writing download archive regardless
224 of 'skip_download' or 'simulate'.
225 simulate: Do not download the video files. If unset (or None),
226 simulate only if listsubtitles, listformats or list_thumbnails is used
227 format: Video format code. see "FORMAT SELECTION" for more details.
228 You can also pass a function. The function takes 'ctx' as
229 argument and returns the formats to download.
230 See "build_format_selector" for an implementation
231 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
232 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
233 extracting metadata even if the video is not actually
234 available for download (experimental)
235 format_sort: A list of fields by which to sort the video formats.
236 See "Sorting Formats" for more details.
237 format_sort_force: Force the given format_sort. see "Sorting Formats"
238 for more details.
239 prefer_free_formats: Whether to prefer video formats with free containers
240 over non-free ones of same quality.
241 allow_multiple_video_streams: Allow multiple video streams to be merged
242 into a single file
243 allow_multiple_audio_streams: Allow multiple audio streams to be merged
244 into a single file
245 check_formats Whether to test if the formats are downloadable.
246 Can be True (check all), False (check none),
247 'selected' (check selected formats),
248 or None (check only if requested by extractor)
249 paths: Dictionary of output paths. The allowed keys are 'home'
250 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
251 outtmpl: Dictionary of templates for output names. Allowed keys
252 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
253 For compatibility with youtube-dl, a single string can also be used
254 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
255 restrictfilenames: Do not allow "&" and spaces in file names
256 trim_file_name: Limit length of filename (extension excluded)
257 windowsfilenames: Force the filenames to be windows compatible
258 ignoreerrors: Do not stop on download/postprocessing errors.
259 Can be 'only_download' to ignore only download errors.
260 Default is 'only_download' for CLI, but False for API
261 skip_playlist_after_errors: Number of allowed failures until the rest of
262 the playlist is skipped
263 force_generic_extractor: Force downloader to use the generic extractor
264 overwrites: Overwrite all video and metadata files if True,
265 overwrite only non-video files if None
266 and don't overwrite any file if False
267 For compatibility with youtube-dl,
268 "nooverwrites" may also be used instead
269 playliststart: Playlist item to start at.
270 playlistend: Playlist item to end at.
271 playlist_items: Specific indices of playlist to download.
272 playlistreverse: Download playlist items in reverse order.
273 playlistrandom: Download playlist items in random order.
274 matchtitle: Download only matching titles.
275 rejecttitle: Reject downloads for matching titles.
276 logger: Log messages to a logging.Logger instance.
277 logtostderr: Log messages to stderr instead of stdout.
278 consoletitle: Display progress in console window's titlebar.
279 writedescription: Write the video description to a .description file
280 writeinfojson: Write the video description to a .info.json file
281 clean_infojson: Remove private fields from the infojson
282 getcomments: Extract video comments. This will not be written to disk
283 unless writeinfojson is also given
284 writeannotations: Write the video annotations to a .annotations.xml file
285 writethumbnail: Write the thumbnail image to a file
286 allow_playlist_files: Whether to write playlists' description, infojson etc
287 also to disk when using the 'write*' options
288 write_all_thumbnails: Write all thumbnail formats to files
289 writelink: Write an internet shortcut file, depending on the
290 current platform (.url/.webloc/.desktop)
291 writeurllink: Write a Windows internet shortcut file (.url)
292 writewebloclink: Write a macOS internet shortcut file (.webloc)
293 writedesktoplink: Write a Linux internet shortcut file (.desktop)
294 writesubtitles: Write the video subtitles to a file
295 writeautomaticsub: Write the automatically generated subtitles to a file
296 allsubtitles: Deprecated - Use subtitleslangs = ['all']
297 Downloads all the subtitles of the video
298 (requires writesubtitles or writeautomaticsub)
299 listsubtitles: Lists all available subtitles for the video
300 subtitlesformat: The format code for subtitles
301 subtitleslangs: List of languages of the subtitles to download (can be regex).
302 The list may contain "all" to refer to all the available
303 subtitles. The language can be prefixed with a "-" to
304 exclude it from the requested languages. Eg: ['all', '-live_chat']
305 keepvideo: Keep the video file after post-processing
306 daterange: A DateRange object, download only if the upload_date is in the range.
307 skip_download: Skip the actual download of the video file
308 cachedir: Location of the cache files in the filesystem.
309 False to disable filesystem cache.
310 noplaylist: Download single video instead of a playlist if in doubt.
311 age_limit: An integer representing the user's age in years.
312 Unsuitable videos for the given age are skipped.
313 min_views: An integer representing the minimum view count the video
314 must have in order to not be skipped.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 max_views: An integer representing the maximum view count.
318 Videos that are more popular than that are not
319 downloaded.
320 Videos without view count information are always
321 downloaded. None for no limit.
322 download_archive: File name of a file where all downloads are recorded.
323 Videos already present in the file are not downloaded
324 again.
325 break_on_existing: Stop the download process after attempting to download a
326 file that is in the archive.
327 break_on_reject: Stop the download process when encountering a video that
328 has been filtered out.
329 break_per_url: Whether break_on_reject and break_on_existing
330 should act on each input URL as opposed to for the entire queue
331 cookiefile: File name where cookies should be read from and dumped to
332 cookiesfrombrowser: A tuple containing the name of the browser, the profile
333 name/pathfrom where cookies are loaded, and the name of the
334 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
335 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
336 support RFC 5746 secure renegotiation
337 nocheckcertificate: Do not verify SSL certificates
338 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
339 At the moment, this is only supported by YouTube.
340 http_headers: A dictionary of custom headers to be used for all requests
341 proxy: URL of the proxy server to use
342 geo_verification_proxy: URL of the proxy to use for IP address verification
343 on geo-restricted sites.
344 socket_timeout: Time to wait for unresponsive hosts, in seconds
345 bidi_workaround: Work around buggy terminals without bidirectional text
346 support, using fridibi
347 debug_printtraffic:Print out sent and received HTTP traffic
348 include_ads: Download ads as well (deprecated)
349 default_search: Prepend this string if an input url is not valid.
350 'auto' for elaborate guessing
351 encoding: Use this encoding instead of the system-specified.
352 extract_flat: Do not resolve URLs, return the immediate result.
353 Pass in 'in_playlist' to only show this behavior for
354 playlist items.
355 wait_for_video: If given, wait for scheduled streams to become available.
356 The value should be a tuple containing the range
357 (min_secs, max_secs) to wait between retries
358 postprocessors: A list of dictionaries, each with an entry
359 * key: The name of the postprocessor. See
360 yt_dlp/postprocessor/__init__.py for a list.
361 * when: When to run the postprocessor. Allowed values are
362 the entries of utils.POSTPROCESS_WHEN
363 Assumed to be 'post_process' if not given
364 post_hooks: Deprecated - Register a custom postprocessor instead
365 A list of functions that get called as the final step
366 for each video file, after all postprocessors have been
367 called. The filename will be passed as the only argument.
368 progress_hooks: A list of functions that get called on download
369 progress, with a dictionary with the entries
370 * status: One of "downloading", "error", or "finished".
371 Check this first and ignore unknown values.
372 * info_dict: The extracted info_dict
373
374 If status is one of "downloading", or "finished", the
375 following properties may also be present:
376 * filename: The final filename (always present)
377 * tmpfilename: The filename we're currently writing to
378 * downloaded_bytes: Bytes on disk
379 * total_bytes: Size of the whole file, None if unknown
380 * total_bytes_estimate: Guess of the eventual file size,
381 None if unavailable.
382 * elapsed: The number of seconds since download started.
383 * eta: The estimated time in seconds, None if unknown
384 * speed: The download speed in bytes/second, None if
385 unknown
386 * fragment_index: The counter of the currently
387 downloaded video fragment.
388 * fragment_count: The number of fragments (= individual
389 files that will be merged)
390
391 Progress hooks are guaranteed to be called at least once
392 (with status "finished") if the download is successful.
393 postprocessor_hooks: A list of functions that get called on postprocessing
394 progress, with a dictionary with the entries
395 * status: One of "started", "processing", or "finished".
396 Check this first and ignore unknown values.
397 * postprocessor: Name of the postprocessor
398 * info_dict: The extracted info_dict
399
400 Progress hooks are guaranteed to be called at least twice
401 (with status "started" and "finished") if the processing is successful.
402 merge_output_format: Extension to use when merging formats.
403 final_ext: Expected final extension; used to detect when the file was
404 already downloaded and converted
405 fixup: Automatically correct known faults of the file.
406 One of:
407 - "never": do nothing
408 - "warn": only emit a warning
409 - "detect_or_warn": check whether we can do anything
410 about it, warn otherwise (default)
411 source_address: Client-side IP address to bind to.
412 call_home: Boolean, true iff we are allowed to contact the
413 yt-dlp servers for debugging. (BROKEN)
414 sleep_interval_requests: Number of seconds to sleep between requests
415 during extraction
416 sleep_interval: Number of seconds to sleep before each download when
417 used alone or a lower bound of a range for randomized
418 sleep before each download (minimum possible number
419 of seconds to sleep) when used along with
420 max_sleep_interval.
421 max_sleep_interval:Upper bound of a range for randomized sleep before each
422 download (maximum possible number of seconds to sleep).
423 Must only be used along with sleep_interval.
424 Actual sleep time will be a random float from range
425 [sleep_interval; max_sleep_interval].
426 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
427 listformats: Print an overview of available video formats and exit.
428 list_thumbnails: Print a table of all thumbnails and exit.
429 match_filter: A function that gets called with the info_dict of
430 every video.
431 If it returns a message, the video is ignored.
432 If it returns None, the video is downloaded.
433 match_filter_func in utils.py is one example for this.
434 no_color: Do not emit color codes in output.
435 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
436 HTTP header
437 geo_bypass_country:
438 Two-letter ISO 3166-2 country code that will be used for
439 explicit geographic restriction bypassing via faking
440 X-Forwarded-For HTTP header
441 geo_bypass_ip_block:
442 IP range in CIDR notation that will be used similarly to
443 geo_bypass_country
444
445 The following options determine which downloader is picked:
446 external_downloader: A dictionary of protocol keys and the executable of the
447 external downloader to use for it. The allowed protocols
448 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
449 Set the value to 'native' to use the native downloader
450 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
451 or {'m3u8': 'ffmpeg'} instead.
452 Use the native HLS downloader instead of ffmpeg/avconv
453 if True, otherwise use ffmpeg/avconv if False, otherwise
454 use downloader suggested by extractor if None.
455 compat_opts: Compatibility options. See "Differences in default behavior".
456 The following options do not work when used through the API:
457 filename, abort-on-error, multistreams, no-live-chat, format-sort
458 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
459 Refer __init__.py for their implementation
460 progress_template: Dictionary of templates for progress outputs.
461 Allowed keys are 'download', 'postprocess',
462 'download-title' (console title) and 'postprocess-title'.
463 The template is mapped on a dictionary with keys 'progress' and 'info'
464
465 The following parameters are not used by YoutubeDL itself, they are used by
466 the downloader (see yt_dlp/downloader/common.py):
467 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
468 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
469 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
470 external_downloader_args, concurrent_fragment_downloads.
471
472 The following options are used by the post processors:
473 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
474 otherwise prefer ffmpeg. (avconv support is deprecated)
475 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
476 to the binary or its containing directory.
477 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
478 and a list of additional command-line arguments for the
479 postprocessor/executable. The dict can also have "PP+EXE" keys
480 which are used when the given exe is used by the given PP.
481 Use 'default' as the name for arguments to passed to all PP
482 For compatibility with youtube-dl, a single list of args
483 can also be used
484
485 The following options are used by the extractors:
486 extractor_retries: Number of times to retry for known errors
487 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
488 hls_split_discontinuity: Split HLS playlists to different formats at
489 discontinuities such as ad breaks (default: False)
490 extractor_args: A dictionary of arguments to be passed to the extractors.
491 See "EXTRACTOR ARGUMENTS" for details.
492 Eg: {'youtube': {'skip': ['dash', 'hls']}}
493 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
494 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
495 If True (default), DASH manifests and related
496 data will be downloaded and processed by extractor.
497 You can reduce network I/O by disabling it if you don't
498 care about DASH. (only for youtube)
499 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
500 If True (default), HLS manifests and related
501 data will be downloaded and processed by extractor.
502 You can reduce network I/O by disabling it if you don't
503 care about HLS. (only for youtube)
504 """
505
506 _NUMERIC_FIELDS = set((
507 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
508 'timestamp', 'release_timestamp',
509 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
510 'average_rating', 'comment_count', 'age_limit',
511 'start_time', 'end_time',
512 'chapter_number', 'season_number', 'episode_number',
513 'track_number', 'disc_number', 'release_year',
514 ))
515
516 _format_fields = {
517 # NB: Keep in sync with the docstring of extractor/common.py
518 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
519 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
520 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
521 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
522 'preference', 'language', 'language_preference', 'quality', 'source_preference',
523 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
524 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
525 }
526 _format_selection_exts = {
527 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
528 'video': {'mp4', 'flv', 'webm', '3gp'},
529 'storyboards': {'mhtml'},
530 }
531
532 def __init__(self, params=None, auto_init=True):
533 """Create a FileDownloader object with the given options.
534 @param auto_init Whether to load the default extractors and print header (if verbose).
535 Set to 'no_verbose_header' to not print the header
536 """
537 if params is None:
538 params = {}
539 self.params = params
540 self._ies = {}
541 self._ies_instances = {}
542 self._pps = {k: [] for k in POSTPROCESS_WHEN}
543 self._printed_messages = set()
544 self._first_webpage_request = True
545 self._post_hooks = []
546 self._progress_hooks = []
547 self._postprocessor_hooks = []
548 self._download_retcode = 0
549 self._num_downloads = 0
550 self._num_videos = 0
551 self._playlist_level = 0
552 self._playlist_urls = set()
553 self.cache = Cache(self)
554
555 windows_enable_vt_mode()
556 self._out_files = {
557 'error': sys.stderr,
558 'print': sys.stderr if self.params.get('logtostderr') else sys.stdout,
559 'console': None if compat_os_name == 'nt' else next(
560 filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None)
561 }
562 self._out_files['screen'] = sys.stderr if self.params.get('quiet') else self._out_files['print']
563 self._allow_colors = {
564 type_: not self.params.get('no_color') and supports_terminal_sequences(self._out_files[type_])
565 for type_ in ('screen', 'error')
566 }
567
568 if sys.version_info < (3, 6):
569 self.report_warning(
570 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
571
572 if self.params.get('allow_unplayable_formats'):
573 self.report_warning(
574 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
575 'This is a developer option intended for debugging. \n'
576 ' If you experience any issues while using this option, '
577 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
578
579 def check_deprecated(param, option, suggestion):
580 if self.params.get(param) is not None:
581 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
582 return True
583 return False
584
585 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
586 if self.params.get('geo_verification_proxy') is None:
587 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
588
589 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
590 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
591 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
592
593 for msg in self.params.get('_warnings', []):
594 self.report_warning(msg)
595 for msg in self.params.get('_deprecation_warnings', []):
596 self.deprecation_warning(msg)
597
598 if 'list-formats' in self.params.get('compat_opts', []):
599 self.params['listformats_table'] = False
600
601 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
602 # nooverwrites was unnecessarily changed to overwrites
603 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
604 # This ensures compatibility with both keys
605 self.params['overwrites'] = not self.params['nooverwrites']
606 elif self.params.get('overwrites') is None:
607 self.params.pop('overwrites', None)
608 else:
609 self.params['nooverwrites'] = not self.params['overwrites']
610
611 self.params.setdefault('forceprint', {})
612 self.params.setdefault('print_to_file', {})
613
614 # Compatibility with older syntax
615 if not isinstance(params['forceprint'], dict):
616 self.params['forceprint'] = {'video': params['forceprint']}
617
618 if self.params.get('bidi_workaround', False):
619 try:
620 import pty
621 master, slave = pty.openpty()
622 width = compat_get_terminal_size().columns
623 if width is None:
624 width_args = []
625 else:
626 width_args = ['-w', str(width)]
627 sp_kwargs = dict(
628 stdin=subprocess.PIPE,
629 stdout=slave,
630 stderr=self._out_files['error'])
631 try:
632 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
633 except OSError:
634 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
635 self._output_channel = os.fdopen(master, 'rb')
636 except OSError as ose:
637 if ose.errno == errno.ENOENT:
638 self.report_warning(
639 'Could not find fribidi executable, ignoring --bidi-workaround. '
640 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
641 else:
642 raise
643
644 if (sys.platform != 'win32'
645 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
646 and not self.params.get('restrictfilenames', False)):
647 # Unicode filesystem API will throw errors (#1474, #13027)
648 self.report_warning(
649 'Assuming --restrict-filenames since file system encoding '
650 'cannot encode all characters. '
651 'Set the LC_ALL environment variable to fix this.')
652 self.params['restrictfilenames'] = True
653
654 self.outtmpl_dict = self.parse_outtmpl()
655
656 # Creating format selector here allows us to catch syntax errors before the extraction
657 self.format_selector = (
658 self.params.get('format') if self.params.get('format') in (None, '-')
659 else self.params['format'] if callable(self.params['format'])
660 else self.build_format_selector(self.params['format']))
661
662 # Set http_headers defaults according to std_headers
663 self.params['http_headers'] = merge_headers(std_headers, self.params.get('http_headers', {}))
664
665 self._setup_opener()
666
667 if auto_init:
668 if auto_init != 'no_verbose_header':
669 self.print_debug_header()
670 self.add_default_info_extractors()
671
672 hooks = {
673 'post_hooks': self.add_post_hook,
674 'progress_hooks': self.add_progress_hook,
675 'postprocessor_hooks': self.add_postprocessor_hook,
676 }
677 for opt, fn in hooks.items():
678 for ph in self.params.get(opt, []):
679 fn(ph)
680
681 for pp_def_raw in self.params.get('postprocessors', []):
682 pp_def = dict(pp_def_raw)
683 when = pp_def.pop('when', 'post_process')
684 self.add_post_processor(
685 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
686 when=when)
687
688 register_socks_protocols()
689
690 def preload_download_archive(fn):
691 """Preload the archive, if any is specified"""
692 if fn is None:
693 return False
694 self.write_debug(f'Loading archive file {fn!r}')
695 try:
696 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
697 for line in archive_file:
698 self.archive.add(line.strip())
699 except IOError as ioe:
700 if ioe.errno != errno.ENOENT:
701 raise
702 return False
703 return True
704
705 self.archive = set()
706 preload_download_archive(self.params.get('download_archive'))
707
708 def warn_if_short_id(self, argv):
709 # short YouTube ID starting with dash?
710 idxs = [
711 i for i, a in enumerate(argv)
712 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
713 if idxs:
714 correct_argv = (
715 ['yt-dlp']
716 + [a for i, a in enumerate(argv) if i not in idxs]
717 + ['--'] + [argv[i] for i in idxs]
718 )
719 self.report_warning(
720 'Long argument string detected. '
721 'Use -- to separate parameters and URLs, like this:\n%s' %
722 args_to_str(correct_argv))
723
724 def add_info_extractor(self, ie):
725 """Add an InfoExtractor object to the end of the list."""
726 ie_key = ie.ie_key()
727 self._ies[ie_key] = ie
728 if not isinstance(ie, type):
729 self._ies_instances[ie_key] = ie
730 ie.set_downloader(self)
731
732 def _get_info_extractor_class(self, ie_key):
733 ie = self._ies.get(ie_key)
734 if ie is None:
735 ie = get_info_extractor(ie_key)
736 self.add_info_extractor(ie)
737 return ie
738
739 def get_info_extractor(self, ie_key):
740 """
741 Get an instance of an IE with name ie_key, it will try to get one from
742 the _ies list, if there's no instance it will create a new one and add
743 it to the extractor list.
744 """
745 ie = self._ies_instances.get(ie_key)
746 if ie is None:
747 ie = get_info_extractor(ie_key)()
748 self.add_info_extractor(ie)
749 return ie
750
751 def add_default_info_extractors(self):
752 """
753 Add the InfoExtractors returned by gen_extractors to the end of the list
754 """
755 for ie in gen_extractor_classes():
756 self.add_info_extractor(ie)
757
758 def add_post_processor(self, pp, when='post_process'):
759 """Add a PostProcessor object to the end of the chain."""
760 self._pps[when].append(pp)
761 pp.set_downloader(self)
762
763 def add_post_hook(self, ph):
764 """Add the post hook"""
765 self._post_hooks.append(ph)
766
767 def add_progress_hook(self, ph):
768 """Add the download progress hook"""
769 self._progress_hooks.append(ph)
770
771 def add_postprocessor_hook(self, ph):
772 """Add the postprocessing progress hook"""
773 self._postprocessor_hooks.append(ph)
774 for pps in self._pps.values():
775 for pp in pps:
776 pp.add_progress_hook(ph)
777
778 def _bidi_workaround(self, message):
779 if not hasattr(self, '_output_channel'):
780 return message
781
782 assert hasattr(self, '_output_process')
783 assert isinstance(message, compat_str)
784 line_count = message.count('\n') + 1
785 self._output_process.stdin.write((message + '\n').encode('utf-8'))
786 self._output_process.stdin.flush()
787 res = ''.join(self._output_channel.readline().decode('utf-8')
788 for _ in range(line_count))
789 return res[:-len('\n')]
790
791 def _write_string(self, message, out=None, only_once=False):
792 if only_once:
793 if message in self._printed_messages:
794 return
795 self._printed_messages.add(message)
796 write_string(message, out=out, encoding=self.params.get('encoding'))
797
798 def to_stdout(self, message, skip_eol=False, quiet=None):
799 """Print message to stdout"""
800 if quiet is not None:
801 self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
802 self._write_string(
803 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
804 self._out_files['print'])
805
806 def to_screen(self, message, skip_eol=False, quiet=None):
807 """Print message to screen if not in quiet mode"""
808 if self.params.get('logger'):
809 self.params['logger'].debug(message)
810 return
811 if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'):
812 return
813 self._write_string(
814 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
815 self._out_files['screen'])
816
817 def to_stderr(self, message, only_once=False):
818 """Print message to stderr"""
819 assert isinstance(message, compat_str)
820 if self.params.get('logger'):
821 self.params['logger'].error(message)
822 else:
823 self._write_string('%s\n' % self._bidi_workaround(message), self._out_files['error'], only_once=only_once)
824
825 def _send_console_code(self, code):
826 if compat_os_name == 'nt' or not self._out_files['console']:
827 return
828 self._write_string(code, self._out_files['console'])
829
830 def to_console_title(self, message):
831 if not self.params.get('consoletitle', False):
832 return
833 message = remove_terminal_sequences(message)
834 if compat_os_name == 'nt':
835 if ctypes.windll.kernel32.GetConsoleWindow():
836 # c_wchar_p() might not be necessary if `message` is
837 # already of type unicode()
838 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
839 else:
840 self._send_console_code(f'\033]0;{message}\007')
841
842 def save_console_title(self):
843 if not self.params.get('consoletitle') or self.params.get('simulate'):
844 return
845 self._send_console_code('\033[22;0t') # Save the title on stack
846
847 def restore_console_title(self):
848 if not self.params.get('consoletitle') or self.params.get('simulate'):
849 return
850 self._send_console_code('\033[23;0t') # Restore the title from stack
851
852 def __enter__(self):
853 self.save_console_title()
854 return self
855
856 def __exit__(self, *args):
857 self.restore_console_title()
858
859 if self.params.get('cookiefile') is not None:
860 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
861
862 def trouble(self, message=None, tb=None, is_error=True):
863 """Determine action to take when a download problem appears.
864
865 Depending on if the downloader has been configured to ignore
866 download errors or not, this method may throw an exception or
867 not when errors are found, after printing the message.
868
869 @param tb If given, is additional traceback information
870 @param is_error Whether to raise error according to ignorerrors
871 """
872 if message is not None:
873 self.to_stderr(message)
874 if self.params.get('verbose'):
875 if tb is None:
876 if sys.exc_info()[0]: # if .trouble has been called from an except block
877 tb = ''
878 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
879 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
880 tb += encode_compat_str(traceback.format_exc())
881 else:
882 tb_data = traceback.format_list(traceback.extract_stack())
883 tb = ''.join(tb_data)
884 if tb:
885 self.to_stderr(tb)
886 if not is_error:
887 return
888 if not self.params.get('ignoreerrors'):
889 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
890 exc_info = sys.exc_info()[1].exc_info
891 else:
892 exc_info = sys.exc_info()
893 raise DownloadError(message, exc_info)
894 self._download_retcode = 1
895
896 class Styles(Enum):
897 HEADERS = 'yellow'
898 EMPHASIS = 'light blue'
899 ID = 'green'
900 DELIM = 'blue'
901 ERROR = 'red'
902 WARNING = 'yellow'
903 SUPPRESS = 'light black'
904
905 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
906 if test_encoding:
907 original_text = text
908 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
909 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
910 text = text.encode(encoding, 'ignore').decode(encoding)
911 if fallback is not None and text != original_text:
912 text = fallback
913 if isinstance(f, self.Styles):
914 f = f.value
915 return format_text(text, f) if allow_colors else text if fallback is None else fallback
916
917 def _format_screen(self, *args, **kwargs):
918 return self._format_text(
919 self._out_files['screen'], self._allow_colors['screen'], *args, **kwargs)
920
921 def _format_err(self, *args, **kwargs):
922 return self._format_text(
923 self._out_files['error'], self._allow_colors['error'], *args, **kwargs)
924
925 def report_warning(self, message, only_once=False):
926 '''
927 Print the message to stderr, it will be prefixed with 'WARNING:'
928 If stderr is a tty file the 'WARNING:' will be colored
929 '''
930 if self.params.get('logger') is not None:
931 self.params['logger'].warning(message)
932 else:
933 if self.params.get('no_warnings'):
934 return
935 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
936
937 def deprecation_warning(self, message):
938 if self.params.get('logger') is not None:
939 self.params['logger'].warning('DeprecationWarning: {message}')
940 else:
941 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
942
943 def report_error(self, message, *args, **kwargs):
944 '''
945 Do the same as trouble, but prefixes the message with 'ERROR:', colored
946 in red if stderr is a tty file.
947 '''
948 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
949
950 def write_debug(self, message, only_once=False):
951 '''Log debug message or Print message to stderr'''
952 if not self.params.get('verbose', False):
953 return
954 message = '[debug] %s' % message
955 if self.params.get('logger'):
956 self.params['logger'].debug(message)
957 else:
958 self.to_stderr(message, only_once)
959
960 def report_file_already_downloaded(self, file_name):
961 """Report file has already been fully downloaded."""
962 try:
963 self.to_screen('[download] %s has already been downloaded' % file_name)
964 except UnicodeEncodeError:
965 self.to_screen('[download] The file has already been downloaded')
966
967 def report_file_delete(self, file_name):
968 """Report that existing file will be deleted."""
969 try:
970 self.to_screen('Deleting existing file %s' % file_name)
971 except UnicodeEncodeError:
972 self.to_screen('Deleting existing file')
973
974 def raise_no_formats(self, info, forced=False, *, msg=None):
975 has_drm = info.get('__has_drm')
976 ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
977 msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
978 if forced or not ignored:
979 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
980 expected=has_drm or ignored or expected)
981 else:
982 self.report_warning(msg)
983
984 def parse_outtmpl(self):
985 outtmpl_dict = self.params.get('outtmpl', {})
986 if not isinstance(outtmpl_dict, dict):
987 outtmpl_dict = {'default': outtmpl_dict}
988 # Remove spaces in the default template
989 if self.params.get('restrictfilenames'):
990 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
991 else:
992 sanitize = lambda x: x
993 outtmpl_dict.update({
994 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
995 if outtmpl_dict.get(k) is None})
996 for key, val in outtmpl_dict.items():
997 if isinstance(val, bytes):
998 self.report_warning(
999 'Parameter outtmpl is bytes, but should be a unicode string. '
1000 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
1001 return outtmpl_dict
1002
1003 def get_output_path(self, dir_type='', filename=None):
1004 paths = self.params.get('paths', {})
1005 assert isinstance(paths, dict)
1006 path = os.path.join(
1007 expand_path(paths.get('home', '').strip()),
1008 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
1009 filename or '')
1010
1011 # Temporary fix for #4787
1012 # 'Treat' all problem characters by passing filename through preferredencoding
1013 # to workaround encoding issues with subprocess on python2 @ Windows
1014 if sys.version_info < (3, 0) and sys.platform == 'win32':
1015 path = encodeFilename(path, True).decode(preferredencoding())
1016 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1017
1018 @staticmethod
1019 def _outtmpl_expandpath(outtmpl):
1020 # expand_path translates '%%' into '%' and '$$' into '$'
1021 # correspondingly that is not what we want since we need to keep
1022 # '%%' intact for template dict substitution step. Working around
1023 # with boundary-alike separator hack.
1024 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1025 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1026
1027 # outtmpl should be expand_path'ed before template dict substitution
1028 # because meta fields may contain env variables we don't want to
1029 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1030 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1031 return expand_path(outtmpl).replace(sep, '')
1032
1033 @staticmethod
1034 def escape_outtmpl(outtmpl):
1035 ''' Escape any remaining strings like %s, %abc% etc. '''
1036 return re.sub(
1037 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1038 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1039 outtmpl)
1040
1041 @classmethod
1042 def validate_outtmpl(cls, outtmpl):
1043 ''' @return None or Exception object '''
1044 outtmpl = re.sub(
1045 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1046 lambda mobj: f'{mobj.group(0)[:-1]}s',
1047 cls._outtmpl_expandpath(outtmpl))
1048 try:
1049 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1050 return None
1051 except ValueError as err:
1052 return err
1053
1054 @staticmethod
1055 def _copy_infodict(info_dict):
1056 info_dict = dict(info_dict)
1057 info_dict.pop('__postprocessors', None)
1058 return info_dict
1059
1060 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1061 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1062 @param sanitize Whether to sanitize the output as a filename.
1063 For backward compatibility, a function can also be passed
1064 """
1065
1066 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1067
1068 info_dict = self._copy_infodict(info_dict)
1069 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1070 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1071 if info_dict.get('duration', None) is not None
1072 else None)
1073 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1074 info_dict['video_autonumber'] = self._num_videos
1075 if info_dict.get('resolution') is None:
1076 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1077
1078 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1079 # of %(field)s to %(field)0Nd for backward compatibility
1080 field_size_compat_map = {
1081 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1082 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1083 'autonumber': self.params.get('autonumber_size') or 5,
1084 }
1085
1086 TMPL_DICT = {}
1087 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1088 MATH_FUNCTIONS = {
1089 '+': float.__add__,
1090 '-': float.__sub__,
1091 }
1092 # Field is of the form key1.key2...
1093 # where keys (except first) can be string, int or slice
1094 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1095 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1096 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1097 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1098 (?P<negate>-)?
1099 (?P<fields>{field})
1100 (?P<maths>(?:{math_op}{math_field})*)
1101 (?:>(?P<strf_format>.+?))?
1102 (?P<remaining>
1103 (?P<alternate>(?<!\\),[^|&)]+)?
1104 (?:&(?P<replacement>.*?))?
1105 (?:\|(?P<default>.*?))?
1106 )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1107
1108 def _traverse_infodict(k):
1109 k = k.split('.')
1110 if k[0] == '':
1111 k.pop(0)
1112 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1113
1114 def get_value(mdict):
1115 # Object traversal
1116 value = _traverse_infodict(mdict['fields'])
1117 # Negative
1118 if mdict['negate']:
1119 value = float_or_none(value)
1120 if value is not None:
1121 value *= -1
1122 # Do maths
1123 offset_key = mdict['maths']
1124 if offset_key:
1125 value = float_or_none(value)
1126 operator = None
1127 while offset_key:
1128 item = re.match(
1129 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1130 offset_key).group(0)
1131 offset_key = offset_key[len(item):]
1132 if operator is None:
1133 operator = MATH_FUNCTIONS[item]
1134 continue
1135 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1136 offset = float_or_none(item)
1137 if offset is None:
1138 offset = float_or_none(_traverse_infodict(item))
1139 try:
1140 value = operator(value, multiplier * offset)
1141 except (TypeError, ZeroDivisionError):
1142 return None
1143 operator = None
1144 # Datetime formatting
1145 if mdict['strf_format']:
1146 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1147
1148 return value
1149
1150 na = self.params.get('outtmpl_na_placeholder', 'NA')
1151
1152 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1153 return sanitize_filename(str(value), restricted=restricted,
1154 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1155
1156 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1157 sanitize = bool(sanitize)
1158
1159 def _dumpjson_default(obj):
1160 if isinstance(obj, (set, LazyList)):
1161 return list(obj)
1162 return repr(obj)
1163
1164 def create_key(outer_mobj):
1165 if not outer_mobj.group('has_key'):
1166 return outer_mobj.group(0)
1167 key = outer_mobj.group('key')
1168 mobj = re.match(INTERNAL_FORMAT_RE, key)
1169 initial_field = mobj.group('fields') if mobj else ''
1170 value, replacement, default = None, None, na
1171 while mobj:
1172 mobj = mobj.groupdict()
1173 default = mobj['default'] if mobj['default'] is not None else default
1174 value = get_value(mobj)
1175 replacement = mobj['replacement']
1176 if value is None and mobj['alternate']:
1177 mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
1178 else:
1179 break
1180
1181 fmt = outer_mobj.group('format')
1182 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1183 fmt = '0{:d}d'.format(field_size_compat_map[key])
1184
1185 value = default if value is None else value if replacement is None else replacement
1186
1187 flags = outer_mobj.group('conversion') or ''
1188 str_fmt = f'{fmt[:-1]}s'
1189 if fmt[-1] == 'l': # list
1190 delim = '\n' if '#' in flags else ', '
1191 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1192 elif fmt[-1] == 'j': # json
1193 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1194 elif fmt[-1] == 'q': # quoted
1195 value = map(str, variadic(value) if '#' in flags else [value])
1196 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1197 elif fmt[-1] == 'B': # bytes
1198 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1199 value, fmt = value.decode('utf-8', 'ignore'), 's'
1200 elif fmt[-1] == 'U': # unicode normalized
1201 value, fmt = unicodedata.normalize(
1202 # "+" = compatibility equivalence, "#" = NFD
1203 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1204 value), str_fmt
1205 elif fmt[-1] == 'D': # decimal suffix
1206 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1207 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1208 factor=1024 if '#' in flags else 1000)
1209 elif fmt[-1] == 'S': # filename sanitization
1210 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1211 elif fmt[-1] == 'c':
1212 if value:
1213 value = str(value)[0]
1214 else:
1215 fmt = str_fmt
1216 elif fmt[-1] not in 'rs': # numeric
1217 value = float_or_none(value)
1218 if value is None:
1219 value, fmt = default, 's'
1220
1221 if sanitize:
1222 if fmt[-1] == 'r':
1223 # If value is an object, sanitize might convert it to a string
1224 # So we convert it to repr first
1225 value, fmt = repr(value), str_fmt
1226 if fmt[-1] in 'csr':
1227 value = sanitizer(initial_field, value)
1228
1229 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1230 TMPL_DICT[key] = value
1231 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1232
1233 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1234
1235 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1236 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1237 return self.escape_outtmpl(outtmpl) % info_dict
1238
1239 def _prepare_filename(self, info_dict, tmpl_type='default'):
1240 try:
1241 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1242 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1243 if not filename:
1244 return None
1245
1246 if tmpl_type in ('default', 'temp'):
1247 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1248 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1249 filename = replace_extension(filename, ext, final_ext)
1250 else:
1251 force_ext = OUTTMPL_TYPES[tmpl_type]
1252 if force_ext:
1253 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1254
1255 # https://github.com/blackjack4494/youtube-dlc/issues/85
1256 trim_file_name = self.params.get('trim_file_name', False)
1257 if trim_file_name:
1258 no_ext, *ext = filename.rsplit('.', 2)
1259 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1260
1261 return filename
1262 except ValueError as err:
1263 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1264 return None
1265
1266 def prepare_filename(self, info_dict, dir_type='', warn=False):
1267 """Generate the output filename."""
1268
1269 filename = self._prepare_filename(info_dict, dir_type or 'default')
1270 if not filename and dir_type not in ('', 'temp'):
1271 return ''
1272
1273 if warn:
1274 if not self.params.get('paths'):
1275 pass
1276 elif filename == '-':
1277 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1278 elif os.path.isabs(filename):
1279 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1280 if filename == '-' or not filename:
1281 return filename
1282
1283 return self.get_output_path(dir_type, filename)
1284
1285 def _match_entry(self, info_dict, incomplete=False, silent=False):
1286 """ Returns None if the file should be downloaded """
1287
1288 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1289
1290 def check_filter():
1291 if 'title' in info_dict:
1292 # This can happen when we're just evaluating the playlist
1293 title = info_dict['title']
1294 matchtitle = self.params.get('matchtitle', False)
1295 if matchtitle:
1296 if not re.search(matchtitle, title, re.IGNORECASE):
1297 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1298 rejecttitle = self.params.get('rejecttitle', False)
1299 if rejecttitle:
1300 if re.search(rejecttitle, title, re.IGNORECASE):
1301 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1302 date = info_dict.get('upload_date')
1303 if date is not None:
1304 dateRange = self.params.get('daterange', DateRange())
1305 if date not in dateRange:
1306 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1307 view_count = info_dict.get('view_count')
1308 if view_count is not None:
1309 min_views = self.params.get('min_views')
1310 if min_views is not None and view_count < min_views:
1311 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1312 max_views = self.params.get('max_views')
1313 if max_views is not None and view_count > max_views:
1314 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1315 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1316 return 'Skipping "%s" because it is age restricted' % video_title
1317
1318 match_filter = self.params.get('match_filter')
1319 if match_filter is not None:
1320 try:
1321 ret = match_filter(info_dict, incomplete=incomplete)
1322 except TypeError:
1323 # For backward compatibility
1324 ret = None if incomplete else match_filter(info_dict)
1325 if ret is not None:
1326 return ret
1327 return None
1328
1329 if self.in_download_archive(info_dict):
1330 reason = '%s has already been recorded in the archive' % video_title
1331 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1332 else:
1333 reason = check_filter()
1334 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1335 if reason is not None:
1336 if not silent:
1337 self.to_screen('[download] ' + reason)
1338 if self.params.get(break_opt, False):
1339 raise break_err()
1340 return reason
1341
1342 @staticmethod
1343 def add_extra_info(info_dict, extra_info):
1344 '''Set the keys from extra_info in info dict if they are missing'''
1345 for key, value in extra_info.items():
1346 info_dict.setdefault(key, value)
1347
1348 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1349 process=True, force_generic_extractor=False):
1350 """
1351 Return a list with a dictionary for each video extracted.
1352
1353 Arguments:
1354 url -- URL to extract
1355
1356 Keyword arguments:
1357 download -- whether to download videos during extraction
1358 ie_key -- extractor key hint
1359 extra_info -- dictionary containing the extra values to add to each result
1360 process -- whether to resolve all unresolved references (URLs, playlist items),
1361 must be True for download to work.
1362 force_generic_extractor -- force using the generic extractor
1363 """
1364
1365 if extra_info is None:
1366 extra_info = {}
1367
1368 if not ie_key and force_generic_extractor:
1369 ie_key = 'Generic'
1370
1371 if ie_key:
1372 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1373 else:
1374 ies = self._ies
1375
1376 for ie_key, ie in ies.items():
1377 if not ie.suitable(url):
1378 continue
1379
1380 if not ie.working():
1381 self.report_warning('The program functionality for this site has been marked as broken, '
1382 'and will probably not work.')
1383
1384 temp_id = ie.get_temp_id(url)
1385 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1386 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1387 if self.params.get('break_on_existing', False):
1388 raise ExistingVideoReached()
1389 break
1390 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1391 else:
1392 self.report_error('no suitable InfoExtractor for URL %s' % url)
1393
1394 def __handle_extraction_exceptions(func):
1395 @functools.wraps(func)
1396 def wrapper(self, *args, **kwargs):
1397 while True:
1398 try:
1399 return func(self, *args, **kwargs)
1400 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1401 raise
1402 except ReExtractInfo as e:
1403 if e.expected:
1404 self.to_screen(f'{e}; Re-extracting data')
1405 else:
1406 self.to_stderr('\r')
1407 self.report_warning(f'{e}; Re-extracting data')
1408 continue
1409 except GeoRestrictedError as e:
1410 msg = e.msg
1411 if e.countries:
1412 msg += '\nThis video is available in %s.' % ', '.join(
1413 map(ISO3166Utils.short2full, e.countries))
1414 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1415 self.report_error(msg)
1416 except ExtractorError as e: # An error we somewhat expected
1417 self.report_error(str(e), e.format_traceback())
1418 except Exception as e:
1419 if self.params.get('ignoreerrors'):
1420 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1421 else:
1422 raise
1423 break
1424 return wrapper
1425
1426 def _wait_for_video(self, ie_result):
1427 if (not self.params.get('wait_for_video')
1428 or ie_result.get('_type', 'video') != 'video'
1429 or ie_result.get('formats') or ie_result.get('url')):
1430 return
1431
1432 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1433 last_msg = ''
1434
1435 def progress(msg):
1436 nonlocal last_msg
1437 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1438 last_msg = msg
1439
1440 min_wait, max_wait = self.params.get('wait_for_video')
1441 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1442 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1443 diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0)
1444 self.report_warning('Release time of video is not known')
1445 elif (diff or 0) <= 0:
1446 self.report_warning('Video should already be available according to extracted info')
1447 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1448 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1449
1450 wait_till = time.time() + diff
1451 try:
1452 while True:
1453 diff = wait_till - time.time()
1454 if diff <= 0:
1455 progress('')
1456 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1457 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1458 time.sleep(1)
1459 except KeyboardInterrupt:
1460 progress('')
1461 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1462 except BaseException as e:
1463 if not isinstance(e, ReExtractInfo):
1464 self.to_screen('')
1465 raise
1466
1467 @__handle_extraction_exceptions
1468 def __extract_info(self, url, ie, download, extra_info, process):
1469 ie_result = ie.extract(url)
1470 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1471 return
1472 if isinstance(ie_result, list):
1473 # Backwards compatibility: old IE result format
1474 ie_result = {
1475 '_type': 'compat_list',
1476 'entries': ie_result,
1477 }
1478 if extra_info.get('original_url'):
1479 ie_result.setdefault('original_url', extra_info['original_url'])
1480 self.add_default_extra_info(ie_result, ie, url)
1481 if process:
1482 self._wait_for_video(ie_result)
1483 return self.process_ie_result(ie_result, download, extra_info)
1484 else:
1485 return ie_result
1486
1487 def add_default_extra_info(self, ie_result, ie, url):
1488 if url is not None:
1489 self.add_extra_info(ie_result, {
1490 'webpage_url': url,
1491 'original_url': url,
1492 })
1493 webpage_url = ie_result.get('webpage_url')
1494 if webpage_url:
1495 self.add_extra_info(ie_result, {
1496 'webpage_url_basename': url_basename(webpage_url),
1497 'webpage_url_domain': get_domain(webpage_url),
1498 })
1499 if ie is not None:
1500 self.add_extra_info(ie_result, {
1501 'extractor': ie.IE_NAME,
1502 'extractor_key': ie.ie_key(),
1503 })
1504
1505 def process_ie_result(self, ie_result, download=True, extra_info=None):
1506 """
1507 Take the result of the ie(may be modified) and resolve all unresolved
1508 references (URLs, playlist items).
1509
1510 It will also download the videos if 'download'.
1511 Returns the resolved ie_result.
1512 """
1513 if extra_info is None:
1514 extra_info = {}
1515 result_type = ie_result.get('_type', 'video')
1516
1517 if result_type in ('url', 'url_transparent'):
1518 ie_result['url'] = sanitize_url(ie_result['url'])
1519 if ie_result.get('original_url'):
1520 extra_info.setdefault('original_url', ie_result['original_url'])
1521
1522 extract_flat = self.params.get('extract_flat', False)
1523 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1524 or extract_flat is True):
1525 info_copy = ie_result.copy()
1526 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1527 if ie and not ie_result.get('id'):
1528 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1529 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1530 self.add_extra_info(info_copy, extra_info)
1531 info_copy, _ = self.pre_process(info_copy)
1532 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1533 if self.params.get('force_write_download_archive', False):
1534 self.record_download_archive(info_copy)
1535 return ie_result
1536
1537 if result_type == 'video':
1538 self.add_extra_info(ie_result, extra_info)
1539 ie_result = self.process_video_result(ie_result, download=download)
1540 additional_urls = (ie_result or {}).get('additional_urls')
1541 if additional_urls:
1542 # TODO: Improve MetadataParserPP to allow setting a list
1543 if isinstance(additional_urls, compat_str):
1544 additional_urls = [additional_urls]
1545 self.to_screen(
1546 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1547 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1548 ie_result['additional_entries'] = [
1549 self.extract_info(
1550 url, download, extra_info=extra_info,
1551 force_generic_extractor=self.params.get('force_generic_extractor'))
1552 for url in additional_urls
1553 ]
1554 return ie_result
1555 elif result_type == 'url':
1556 # We have to add extra_info to the results because it may be
1557 # contained in a playlist
1558 return self.extract_info(
1559 ie_result['url'], download,
1560 ie_key=ie_result.get('ie_key'),
1561 extra_info=extra_info)
1562 elif result_type == 'url_transparent':
1563 # Use the information from the embedding page
1564 info = self.extract_info(
1565 ie_result['url'], ie_key=ie_result.get('ie_key'),
1566 extra_info=extra_info, download=False, process=False)
1567
1568 # extract_info may return None when ignoreerrors is enabled and
1569 # extraction failed with an error, don't crash and return early
1570 # in this case
1571 if not info:
1572 return info
1573
1574 force_properties = dict(
1575 (k, v) for k, v in ie_result.items() if v is not None)
1576 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1577 if f in force_properties:
1578 del force_properties[f]
1579 new_result = info.copy()
1580 new_result.update(force_properties)
1581
1582 # Extracted info may not be a video result (i.e.
1583 # info.get('_type', 'video') != video) but rather an url or
1584 # url_transparent. In such cases outer metadata (from ie_result)
1585 # should be propagated to inner one (info). For this to happen
1586 # _type of info should be overridden with url_transparent. This
1587 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1588 if new_result.get('_type') == 'url':
1589 new_result['_type'] = 'url_transparent'
1590
1591 return self.process_ie_result(
1592 new_result, download=download, extra_info=extra_info)
1593 elif result_type in ('playlist', 'multi_video'):
1594 # Protect from infinite recursion due to recursively nested playlists
1595 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1596 webpage_url = ie_result['webpage_url']
1597 if webpage_url in self._playlist_urls:
1598 self.to_screen(
1599 '[download] Skipping already downloaded playlist: %s'
1600 % ie_result.get('title') or ie_result.get('id'))
1601 return
1602
1603 self._playlist_level += 1
1604 self._playlist_urls.add(webpage_url)
1605 self._fill_common_fields(ie_result, False)
1606 self._sanitize_thumbnails(ie_result)
1607 try:
1608 return self.__process_playlist(ie_result, download)
1609 finally:
1610 self._playlist_level -= 1
1611 if not self._playlist_level:
1612 self._playlist_urls.clear()
1613 elif result_type == 'compat_list':
1614 self.report_warning(
1615 'Extractor %s returned a compat_list result. '
1616 'It needs to be updated.' % ie_result.get('extractor'))
1617
1618 def _fixup(r):
1619 self.add_extra_info(r, {
1620 'extractor': ie_result['extractor'],
1621 'webpage_url': ie_result['webpage_url'],
1622 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1623 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1624 'extractor_key': ie_result['extractor_key'],
1625 })
1626 return r
1627 ie_result['entries'] = [
1628 self.process_ie_result(_fixup(r), download, extra_info)
1629 for r in ie_result['entries']
1630 ]
1631 return ie_result
1632 else:
1633 raise Exception('Invalid result type: %s' % result_type)
1634
1635 def _ensure_dir_exists(self, path):
1636 return make_dir(path, self.report_error)
1637
1638 @staticmethod
1639 def _playlist_infodict(ie_result, **kwargs):
1640 return {
1641 **ie_result,
1642 'playlist': ie_result.get('title') or ie_result.get('id'),
1643 'playlist_id': ie_result.get('id'),
1644 'playlist_title': ie_result.get('title'),
1645 'playlist_uploader': ie_result.get('uploader'),
1646 'playlist_uploader_id': ie_result.get('uploader_id'),
1647 'playlist_index': 0,
1648 **kwargs,
1649 }
1650
1651 def __process_playlist(self, ie_result, download):
1652 # We process each entry in the playlist
1653 playlist = ie_result.get('title') or ie_result.get('id')
1654 self.to_screen('[download] Downloading playlist: %s' % playlist)
1655
1656 if 'entries' not in ie_result:
1657 raise EntryNotInPlaylist('There are no entries')
1658
1659 MissingEntry = object()
1660 incomplete_entries = bool(ie_result.get('requested_entries'))
1661 if incomplete_entries:
1662 def fill_missing_entries(entries, indices):
1663 ret = [MissingEntry] * max(indices)
1664 for i, entry in zip(indices, entries):
1665 ret[i - 1] = entry
1666 return ret
1667 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1668
1669 playlist_results = []
1670
1671 playliststart = self.params.get('playliststart', 1)
1672 playlistend = self.params.get('playlistend')
1673 # For backwards compatibility, interpret -1 as whole list
1674 if playlistend == -1:
1675 playlistend = None
1676
1677 playlistitems_str = self.params.get('playlist_items')
1678 playlistitems = None
1679 if playlistitems_str is not None:
1680 def iter_playlistitems(format):
1681 for string_segment in format.split(','):
1682 if '-' in string_segment:
1683 start, end = string_segment.split('-')
1684 for item in range(int(start), int(end) + 1):
1685 yield int(item)
1686 else:
1687 yield int(string_segment)
1688 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1689
1690 ie_entries = ie_result['entries']
1691 if isinstance(ie_entries, list):
1692 playlist_count = len(ie_entries)
1693 msg = f'Collected {playlist_count} videos; downloading %d of them'
1694 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1695
1696 def get_entry(i):
1697 return ie_entries[i - 1]
1698 else:
1699 msg = 'Downloading %d videos'
1700 if not isinstance(ie_entries, (PagedList, LazyList)):
1701 ie_entries = LazyList(ie_entries)
1702 elif isinstance(ie_entries, InAdvancePagedList):
1703 if ie_entries._pagesize == 1:
1704 playlist_count = ie_entries._pagecount
1705
1706 def get_entry(i):
1707 return YoutubeDL.__handle_extraction_exceptions(
1708 lambda self, i: ie_entries[i - 1]
1709 )(self, i)
1710
1711 entries, broken = [], False
1712 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1713 for i in items:
1714 if i == 0:
1715 continue
1716 if playlistitems is None and playlistend is not None and playlistend < i:
1717 break
1718 entry = None
1719 try:
1720 entry = get_entry(i)
1721 if entry is MissingEntry:
1722 raise EntryNotInPlaylist()
1723 except (IndexError, EntryNotInPlaylist):
1724 if incomplete_entries:
1725 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1726 elif not playlistitems:
1727 break
1728 entries.append(entry)
1729 try:
1730 if entry is not None:
1731 self._match_entry(entry, incomplete=True, silent=True)
1732 except (ExistingVideoReached, RejectedVideoReached):
1733 broken = True
1734 break
1735 ie_result['entries'] = entries
1736
1737 # Save playlist_index before re-ordering
1738 entries = [
1739 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1740 for i, entry in enumerate(entries, 1)
1741 if entry is not None]
1742 n_entries = len(entries)
1743
1744 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1745 ie_result['playlist_count'] = n_entries
1746
1747 if not playlistitems and (playliststart != 1 or playlistend):
1748 playlistitems = list(range(playliststart, playliststart + n_entries))
1749 ie_result['requested_entries'] = playlistitems
1750
1751 _infojson_written = False
1752 write_playlist_files = self.params.get('allow_playlist_files', True)
1753 if write_playlist_files and self.params.get('list_thumbnails'):
1754 self.list_thumbnails(ie_result)
1755 if write_playlist_files and not self.params.get('simulate'):
1756 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1757 _infojson_written = self._write_info_json(
1758 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1759 if _infojson_written is None:
1760 return
1761 if self._write_description('playlist', ie_result,
1762 self.prepare_filename(ie_copy, 'pl_description')) is None:
1763 return
1764 # TODO: This should be passed to ThumbnailsConvertor if necessary
1765 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1766
1767 if self.params.get('playlistreverse', False):
1768 entries = entries[::-1]
1769 if self.params.get('playlistrandom', False):
1770 random.shuffle(entries)
1771
1772 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1773
1774 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1775 failures = 0
1776 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1777 for i, entry_tuple in enumerate(entries, 1):
1778 playlist_index, entry = entry_tuple
1779 if 'playlist-index' in self.params.get('compat_opts', []):
1780 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1781 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1782 # This __x_forwarded_for_ip thing is a bit ugly but requires
1783 # minimal changes
1784 if x_forwarded_for:
1785 entry['__x_forwarded_for_ip'] = x_forwarded_for
1786 extra = {
1787 'n_entries': n_entries,
1788 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1789 'playlist_count': ie_result.get('playlist_count'),
1790 'playlist_index': playlist_index,
1791 'playlist_autonumber': i,
1792 'playlist': playlist,
1793 'playlist_id': ie_result.get('id'),
1794 'playlist_title': ie_result.get('title'),
1795 'playlist_uploader': ie_result.get('uploader'),
1796 'playlist_uploader_id': ie_result.get('uploader_id'),
1797 'extractor': ie_result['extractor'],
1798 'webpage_url': ie_result['webpage_url'],
1799 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1800 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1801 'extractor_key': ie_result['extractor_key'],
1802 }
1803
1804 if self._match_entry(entry, incomplete=True) is not None:
1805 continue
1806
1807 entry_result = self.__process_iterable_entry(entry, download, extra)
1808 if not entry_result:
1809 failures += 1
1810 if failures >= max_failures:
1811 self.report_error(
1812 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1813 break
1814 playlist_results.append(entry_result)
1815 ie_result['entries'] = playlist_results
1816
1817 # Write the updated info to json
1818 if _infojson_written and self._write_info_json(
1819 'updated playlist', ie_result,
1820 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1821 return
1822
1823 ie_result = self.run_all_pps('playlist', ie_result)
1824 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1825 return ie_result
1826
1827 @__handle_extraction_exceptions
1828 def __process_iterable_entry(self, entry, download, extra_info):
1829 return self.process_ie_result(
1830 entry, download=download, extra_info=extra_info)
1831
1832 def _build_format_filter(self, filter_spec):
1833 " Returns a function to filter the formats according to the filter_spec "
1834
1835 OPERATORS = {
1836 '<': operator.lt,
1837 '<=': operator.le,
1838 '>': operator.gt,
1839 '>=': operator.ge,
1840 '=': operator.eq,
1841 '!=': operator.ne,
1842 }
1843 operator_rex = re.compile(r'''(?x)\s*
1844 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1845 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1846 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1847 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1848 m = operator_rex.fullmatch(filter_spec)
1849 if m:
1850 try:
1851 comparison_value = int(m.group('value'))
1852 except ValueError:
1853 comparison_value = parse_filesize(m.group('value'))
1854 if comparison_value is None:
1855 comparison_value = parse_filesize(m.group('value') + 'B')
1856 if comparison_value is None:
1857 raise ValueError(
1858 'Invalid value %r in format specification %r' % (
1859 m.group('value'), filter_spec))
1860 op = OPERATORS[m.group('op')]
1861
1862 if not m:
1863 STR_OPERATORS = {
1864 '=': operator.eq,
1865 '^=': lambda attr, value: attr.startswith(value),
1866 '$=': lambda attr, value: attr.endswith(value),
1867 '*=': lambda attr, value: value in attr,
1868 '~=': lambda attr, value: value.search(attr) is not None
1869 }
1870 str_operator_rex = re.compile(r'''(?x)\s*
1871 (?P<key>[a-zA-Z0-9._-]+)\s*
1872 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1873 (?P<quote>["'])?
1874 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1875 (?(quote)(?P=quote))\s*
1876 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1877 m = str_operator_rex.fullmatch(filter_spec)
1878 if m:
1879 if m.group('op') == '~=':
1880 comparison_value = re.compile(m.group('value'))
1881 else:
1882 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1883 str_op = STR_OPERATORS[m.group('op')]
1884 if m.group('negation'):
1885 op = lambda attr, value: not str_op(attr, value)
1886 else:
1887 op = str_op
1888
1889 if not m:
1890 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1891
1892 def _filter(f):
1893 actual_value = f.get(m.group('key'))
1894 if actual_value is None:
1895 return m.group('none_inclusive')
1896 return op(actual_value, comparison_value)
1897 return _filter
1898
1899 def _check_formats(self, formats):
1900 for f in formats:
1901 self.to_screen('[info] Testing format %s' % f['format_id'])
1902 path = self.get_output_path('temp')
1903 if not self._ensure_dir_exists(f'{path}/'):
1904 continue
1905 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1906 temp_file.close()
1907 try:
1908 success, _ = self.dl(temp_file.name, f, test=True)
1909 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1910 success = False
1911 finally:
1912 if os.path.exists(temp_file.name):
1913 try:
1914 os.remove(temp_file.name)
1915 except OSError:
1916 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1917 if success:
1918 yield f
1919 else:
1920 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1921
1922 def _default_format_spec(self, info_dict, download=True):
1923
1924 def can_merge():
1925 merger = FFmpegMergerPP(self)
1926 return merger.available and merger.can_merge()
1927
1928 prefer_best = (
1929 not self.params.get('simulate')
1930 and download
1931 and (
1932 not can_merge()
1933 or info_dict.get('is_live', False)
1934 or self.outtmpl_dict['default'] == '-'))
1935 compat = (
1936 prefer_best
1937 or self.params.get('allow_multiple_audio_streams', False)
1938 or 'format-spec' in self.params.get('compat_opts', []))
1939
1940 return (
1941 'best/bestvideo+bestaudio' if prefer_best
1942 else 'bestvideo*+bestaudio/best' if not compat
1943 else 'bestvideo+bestaudio/best')
1944
1945 def build_format_selector(self, format_spec):
1946 def syntax_error(note, start):
1947 message = (
1948 'Invalid format specification: '
1949 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1950 return SyntaxError(message)
1951
1952 PICKFIRST = 'PICKFIRST'
1953 MERGE = 'MERGE'
1954 SINGLE = 'SINGLE'
1955 GROUP = 'GROUP'
1956 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1957
1958 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1959 'video': self.params.get('allow_multiple_video_streams', False)}
1960
1961 check_formats = self.params.get('check_formats') == 'selected'
1962
1963 def _parse_filter(tokens):
1964 filter_parts = []
1965 for type, string, start, _, _ in tokens:
1966 if type == tokenize.OP and string == ']':
1967 return ''.join(filter_parts)
1968 else:
1969 filter_parts.append(string)
1970
1971 def _remove_unused_ops(tokens):
1972 # Remove operators that we don't use and join them with the surrounding strings
1973 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1974 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1975 last_string, last_start, last_end, last_line = None, None, None, None
1976 for type, string, start, end, line in tokens:
1977 if type == tokenize.OP and string == '[':
1978 if last_string:
1979 yield tokenize.NAME, last_string, last_start, last_end, last_line
1980 last_string = None
1981 yield type, string, start, end, line
1982 # everything inside brackets will be handled by _parse_filter
1983 for type, string, start, end, line in tokens:
1984 yield type, string, start, end, line
1985 if type == tokenize.OP and string == ']':
1986 break
1987 elif type == tokenize.OP and string in ALLOWED_OPS:
1988 if last_string:
1989 yield tokenize.NAME, last_string, last_start, last_end, last_line
1990 last_string = None
1991 yield type, string, start, end, line
1992 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1993 if not last_string:
1994 last_string = string
1995 last_start = start
1996 last_end = end
1997 else:
1998 last_string += string
1999 if last_string:
2000 yield tokenize.NAME, last_string, last_start, last_end, last_line
2001
2002 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
2003 selectors = []
2004 current_selector = None
2005 for type, string, start, _, _ in tokens:
2006 # ENCODING is only defined in python 3.x
2007 if type == getattr(tokenize, 'ENCODING', None):
2008 continue
2009 elif type in [tokenize.NAME, tokenize.NUMBER]:
2010 current_selector = FormatSelector(SINGLE, string, [])
2011 elif type == tokenize.OP:
2012 if string == ')':
2013 if not inside_group:
2014 # ')' will be handled by the parentheses group
2015 tokens.restore_last_token()
2016 break
2017 elif inside_merge and string in ['/', ',']:
2018 tokens.restore_last_token()
2019 break
2020 elif inside_choice and string == ',':
2021 tokens.restore_last_token()
2022 break
2023 elif string == ',':
2024 if not current_selector:
2025 raise syntax_error('"," must follow a format selector', start)
2026 selectors.append(current_selector)
2027 current_selector = None
2028 elif string == '/':
2029 if not current_selector:
2030 raise syntax_error('"/" must follow a format selector', start)
2031 first_choice = current_selector
2032 second_choice = _parse_format_selection(tokens, inside_choice=True)
2033 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2034 elif string == '[':
2035 if not current_selector:
2036 current_selector = FormatSelector(SINGLE, 'best', [])
2037 format_filter = _parse_filter(tokens)
2038 current_selector.filters.append(format_filter)
2039 elif string == '(':
2040 if current_selector:
2041 raise syntax_error('Unexpected "("', start)
2042 group = _parse_format_selection(tokens, inside_group=True)
2043 current_selector = FormatSelector(GROUP, group, [])
2044 elif string == '+':
2045 if not current_selector:
2046 raise syntax_error('Unexpected "+"', start)
2047 selector_1 = current_selector
2048 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2049 if not selector_2:
2050 raise syntax_error('Expected a selector', start)
2051 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2052 else:
2053 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2054 elif type == tokenize.ENDMARKER:
2055 break
2056 if current_selector:
2057 selectors.append(current_selector)
2058 return selectors
2059
2060 def _merge(formats_pair):
2061 format_1, format_2 = formats_pair
2062
2063 formats_info = []
2064 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2065 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2066
2067 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2068 get_no_more = {'video': False, 'audio': False}
2069 for (i, fmt_info) in enumerate(formats_info):
2070 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2071 formats_info.pop(i)
2072 continue
2073 for aud_vid in ['audio', 'video']:
2074 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2075 if get_no_more[aud_vid]:
2076 formats_info.pop(i)
2077 break
2078 get_no_more[aud_vid] = True
2079
2080 if len(formats_info) == 1:
2081 return formats_info[0]
2082
2083 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2084 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2085
2086 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2087 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2088
2089 output_ext = self.params.get('merge_output_format')
2090 if not output_ext:
2091 if the_only_video:
2092 output_ext = the_only_video['ext']
2093 elif the_only_audio and not video_fmts:
2094 output_ext = the_only_audio['ext']
2095 else:
2096 output_ext = 'mkv'
2097
2098 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2099
2100 new_dict = {
2101 'requested_formats': formats_info,
2102 'format': '+'.join(filtered('format')),
2103 'format_id': '+'.join(filtered('format_id')),
2104 'ext': output_ext,
2105 'protocol': '+'.join(map(determine_protocol, formats_info)),
2106 'language': '+'.join(orderedSet(filtered('language'))) or None,
2107 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2108 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2109 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2110 }
2111
2112 if the_only_video:
2113 new_dict.update({
2114 'width': the_only_video.get('width'),
2115 'height': the_only_video.get('height'),
2116 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2117 'fps': the_only_video.get('fps'),
2118 'dynamic_range': the_only_video.get('dynamic_range'),
2119 'vcodec': the_only_video.get('vcodec'),
2120 'vbr': the_only_video.get('vbr'),
2121 'stretched_ratio': the_only_video.get('stretched_ratio'),
2122 })
2123
2124 if the_only_audio:
2125 new_dict.update({
2126 'acodec': the_only_audio.get('acodec'),
2127 'abr': the_only_audio.get('abr'),
2128 'asr': the_only_audio.get('asr'),
2129 })
2130
2131 return new_dict
2132
2133 def _check_formats(formats):
2134 if not check_formats:
2135 yield from formats
2136 return
2137 yield from self._check_formats(formats)
2138
2139 def _build_selector_function(selector):
2140 if isinstance(selector, list): # ,
2141 fs = [_build_selector_function(s) for s in selector]
2142
2143 def selector_function(ctx):
2144 for f in fs:
2145 yield from f(ctx)
2146 return selector_function
2147
2148 elif selector.type == GROUP: # ()
2149 selector_function = _build_selector_function(selector.selector)
2150
2151 elif selector.type == PICKFIRST: # /
2152 fs = [_build_selector_function(s) for s in selector.selector]
2153
2154 def selector_function(ctx):
2155 for f in fs:
2156 picked_formats = list(f(ctx))
2157 if picked_formats:
2158 return picked_formats
2159 return []
2160
2161 elif selector.type == MERGE: # +
2162 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2163
2164 def selector_function(ctx):
2165 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2166 yield _merge(pair)
2167
2168 elif selector.type == SINGLE: # atom
2169 format_spec = selector.selector or 'best'
2170
2171 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2172 if format_spec == 'all':
2173 def selector_function(ctx):
2174 yield from _check_formats(ctx['formats'][::-1])
2175 elif format_spec == 'mergeall':
2176 def selector_function(ctx):
2177 formats = list(_check_formats(ctx['formats']))
2178 if not formats:
2179 return
2180 merged_format = formats[-1]
2181 for f in formats[-2::-1]:
2182 merged_format = _merge((merged_format, f))
2183 yield merged_format
2184
2185 else:
2186 format_fallback, format_reverse, format_idx = False, True, 1
2187 mobj = re.match(
2188 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2189 format_spec)
2190 if mobj is not None:
2191 format_idx = int_or_none(mobj.group('n'), default=1)
2192 format_reverse = mobj.group('bw')[0] == 'b'
2193 format_type = (mobj.group('type') or [None])[0]
2194 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2195 format_modified = mobj.group('mod') is not None
2196
2197 format_fallback = not format_type and not format_modified # for b, w
2198 _filter_f = (
2199 (lambda f: f.get('%scodec' % format_type) != 'none')
2200 if format_type and format_modified # bv*, ba*, wv*, wa*
2201 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2202 if format_type # bv, ba, wv, wa
2203 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2204 if not format_modified # b, w
2205 else lambda f: True) # b*, w*
2206 filter_f = lambda f: _filter_f(f) and (
2207 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2208 else:
2209 if format_spec in self._format_selection_exts['audio']:
2210 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2211 elif format_spec in self._format_selection_exts['video']:
2212 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2213 elif format_spec in self._format_selection_exts['storyboards']:
2214 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2215 else:
2216 filter_f = lambda f: f.get('format_id') == format_spec # id
2217
2218 def selector_function(ctx):
2219 formats = list(ctx['formats'])
2220 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2221 if format_fallback and ctx['incomplete_formats'] and not matches:
2222 # for extractors with incomplete formats (audio only (soundcloud)
2223 # or video only (imgur)) best/worst will fallback to
2224 # best/worst {video,audio}-only format
2225 matches = formats
2226 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2227 try:
2228 yield matches[format_idx - 1]
2229 except IndexError:
2230 return
2231
2232 filters = [self._build_format_filter(f) for f in selector.filters]
2233
2234 def final_selector(ctx):
2235 ctx_copy = dict(ctx)
2236 for _filter in filters:
2237 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2238 return selector_function(ctx_copy)
2239 return final_selector
2240
2241 stream = io.BytesIO(format_spec.encode('utf-8'))
2242 try:
2243 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2244 except tokenize.TokenError:
2245 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2246
2247 class TokenIterator(object):
2248 def __init__(self, tokens):
2249 self.tokens = tokens
2250 self.counter = 0
2251
2252 def __iter__(self):
2253 return self
2254
2255 def __next__(self):
2256 if self.counter >= len(self.tokens):
2257 raise StopIteration()
2258 value = self.tokens[self.counter]
2259 self.counter += 1
2260 return value
2261
2262 next = __next__
2263
2264 def restore_last_token(self):
2265 self.counter -= 1
2266
2267 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2268 return _build_selector_function(parsed_selector)
2269
2270 def _calc_headers(self, info_dict):
2271 res = merge_headers(self.params['http_headers'], info_dict.get('http_headers') or {})
2272
2273 cookies = self._calc_cookies(info_dict)
2274 if cookies:
2275 res['Cookie'] = cookies
2276
2277 if 'X-Forwarded-For' not in res:
2278 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2279 if x_forwarded_for_ip:
2280 res['X-Forwarded-For'] = x_forwarded_for_ip
2281
2282 return res
2283
2284 def _calc_cookies(self, info_dict):
2285 pr = sanitized_Request(info_dict['url'])
2286 self.cookiejar.add_cookie_header(pr)
2287 return pr.get_header('Cookie')
2288
2289 def _sort_thumbnails(self, thumbnails):
2290 thumbnails.sort(key=lambda t: (
2291 t.get('preference') if t.get('preference') is not None else -1,
2292 t.get('width') if t.get('width') is not None else -1,
2293 t.get('height') if t.get('height') is not None else -1,
2294 t.get('id') if t.get('id') is not None else '',
2295 t.get('url')))
2296
2297 def _sanitize_thumbnails(self, info_dict):
2298 thumbnails = info_dict.get('thumbnails')
2299 if thumbnails is None:
2300 thumbnail = info_dict.get('thumbnail')
2301 if thumbnail:
2302 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2303 if not thumbnails:
2304 return
2305
2306 def check_thumbnails(thumbnails):
2307 for t in thumbnails:
2308 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2309 try:
2310 self.urlopen(HEADRequest(t['url']))
2311 except network_exceptions as err:
2312 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2313 continue
2314 yield t
2315
2316 self._sort_thumbnails(thumbnails)
2317 for i, t in enumerate(thumbnails):
2318 if t.get('id') is None:
2319 t['id'] = '%d' % i
2320 if t.get('width') and t.get('height'):
2321 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2322 t['url'] = sanitize_url(t['url'])
2323
2324 if self.params.get('check_formats') is True:
2325 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2326 else:
2327 info_dict['thumbnails'] = thumbnails
2328
2329 def _fill_common_fields(self, info_dict, is_video=True):
2330 # TODO: move sanitization here
2331 if is_video:
2332 # playlists are allowed to lack "title"
2333 info_dict['fulltitle'] = info_dict.get('title')
2334 if 'title' not in info_dict:
2335 raise ExtractorError('Missing "title" field in extractor result',
2336 video_id=info_dict['id'], ie=info_dict['extractor'])
2337 elif not info_dict.get('title'):
2338 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2339 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2340
2341 if info_dict.get('duration') is not None:
2342 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2343
2344 for ts_key, date_key in (
2345 ('timestamp', 'upload_date'),
2346 ('release_timestamp', 'release_date'),
2347 ('modified_timestamp', 'modified_date'),
2348 ):
2349 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2350 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2351 # see http://bugs.python.org/issue1646728)
2352 try:
2353 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2354 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2355 except (ValueError, OverflowError, OSError):
2356 pass
2357
2358 live_keys = ('is_live', 'was_live')
2359 live_status = info_dict.get('live_status')
2360 if live_status is None:
2361 for key in live_keys:
2362 if info_dict.get(key) is False:
2363 continue
2364 if info_dict.get(key):
2365 live_status = key
2366 break
2367 if all(info_dict.get(key) is False for key in live_keys):
2368 live_status = 'not_live'
2369 if live_status:
2370 info_dict['live_status'] = live_status
2371 for key in live_keys:
2372 if info_dict.get(key) is None:
2373 info_dict[key] = (live_status == key)
2374
2375 # Auto generate title fields corresponding to the *_number fields when missing
2376 # in order to always have clean titles. This is very common for TV series.
2377 for field in ('chapter', 'season', 'episode'):
2378 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2379 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2380
2381 def process_video_result(self, info_dict, download=True):
2382 assert info_dict.get('_type', 'video') == 'video'
2383 self._num_videos += 1
2384
2385 if 'id' not in info_dict:
2386 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2387 elif not info_dict.get('id'):
2388 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2389
2390 def report_force_conversion(field, field_not, conversion):
2391 self.report_warning(
2392 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2393 % (field, field_not, conversion))
2394
2395 def sanitize_string_field(info, string_field):
2396 field = info.get(string_field)
2397 if field is None or isinstance(field, compat_str):
2398 return
2399 report_force_conversion(string_field, 'a string', 'string')
2400 info[string_field] = compat_str(field)
2401
2402 def sanitize_numeric_fields(info):
2403 for numeric_field in self._NUMERIC_FIELDS:
2404 field = info.get(numeric_field)
2405 if field is None or isinstance(field, compat_numeric_types):
2406 continue
2407 report_force_conversion(numeric_field, 'numeric', 'int')
2408 info[numeric_field] = int_or_none(field)
2409
2410 sanitize_string_field(info_dict, 'id')
2411 sanitize_numeric_fields(info_dict)
2412 if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None):
2413 self.report_warning('"duration" field is negative, there is an error in extractor')
2414
2415 if 'playlist' not in info_dict:
2416 # It isn't part of a playlist
2417 info_dict['playlist'] = None
2418 info_dict['playlist_index'] = None
2419
2420 self._sanitize_thumbnails(info_dict)
2421
2422 thumbnail = info_dict.get('thumbnail')
2423 thumbnails = info_dict.get('thumbnails')
2424 if thumbnail:
2425 info_dict['thumbnail'] = sanitize_url(thumbnail)
2426 elif thumbnails:
2427 info_dict['thumbnail'] = thumbnails[-1]['url']
2428
2429 if info_dict.get('display_id') is None and 'id' in info_dict:
2430 info_dict['display_id'] = info_dict['id']
2431
2432 self._fill_common_fields(info_dict)
2433
2434 for cc_kind in ('subtitles', 'automatic_captions'):
2435 cc = info_dict.get(cc_kind)
2436 if cc:
2437 for _, subtitle in cc.items():
2438 for subtitle_format in subtitle:
2439 if subtitle_format.get('url'):
2440 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2441 if subtitle_format.get('ext') is None:
2442 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2443
2444 automatic_captions = info_dict.get('automatic_captions')
2445 subtitles = info_dict.get('subtitles')
2446
2447 info_dict['requested_subtitles'] = self.process_subtitles(
2448 info_dict['id'], subtitles, automatic_captions)
2449
2450 if info_dict.get('formats') is None:
2451 # There's only one format available
2452 formats = [info_dict]
2453 else:
2454 formats = info_dict['formats']
2455
2456 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2457 if not self.params.get('allow_unplayable_formats'):
2458 formats = [f for f in formats if not f.get('has_drm')]
2459
2460 get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
2461 if not get_from_start:
2462 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2463 if info_dict.get('is_live') and formats:
2464 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2465 if get_from_start and not formats:
2466 self.raise_no_formats(info_dict, msg='--live-from-start is passed, but there are no formats that can be downloaded from the start. '
2467 'If you want to download from the current time, pass --no-live-from-start')
2468
2469 if not formats:
2470 self.raise_no_formats(info_dict)
2471
2472 def is_wellformed(f):
2473 url = f.get('url')
2474 if not url:
2475 self.report_warning(
2476 '"url" field is missing or empty - skipping format, '
2477 'there is an error in extractor')
2478 return False
2479 if isinstance(url, bytes):
2480 sanitize_string_field(f, 'url')
2481 return True
2482
2483 # Filter out malformed formats for better extraction robustness
2484 formats = list(filter(is_wellformed, formats))
2485
2486 formats_dict = {}
2487
2488 # We check that all the formats have the format and format_id fields
2489 for i, format in enumerate(formats):
2490 sanitize_string_field(format, 'format_id')
2491 sanitize_numeric_fields(format)
2492 format['url'] = sanitize_url(format['url'])
2493 if not format.get('format_id'):
2494 format['format_id'] = compat_str(i)
2495 else:
2496 # Sanitize format_id from characters used in format selector expression
2497 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2498 format_id = format['format_id']
2499 if format_id not in formats_dict:
2500 formats_dict[format_id] = []
2501 formats_dict[format_id].append(format)
2502
2503 # Make sure all formats have unique format_id
2504 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2505 for format_id, ambiguous_formats in formats_dict.items():
2506 ambigious_id = len(ambiguous_formats) > 1
2507 for i, format in enumerate(ambiguous_formats):
2508 if ambigious_id:
2509 format['format_id'] = '%s-%d' % (format_id, i)
2510 if format.get('ext') is None:
2511 format['ext'] = determine_ext(format['url']).lower()
2512 # Ensure there is no conflict between id and ext in format selection
2513 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2514 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2515 format['format_id'] = 'f%s' % format['format_id']
2516
2517 for i, format in enumerate(formats):
2518 if format.get('format') is None:
2519 format['format'] = '{id} - {res}{note}'.format(
2520 id=format['format_id'],
2521 res=self.format_resolution(format),
2522 note=format_field(format, 'format_note', ' (%s)'),
2523 )
2524 if format.get('protocol') is None:
2525 format['protocol'] = determine_protocol(format)
2526 if format.get('resolution') is None:
2527 format['resolution'] = self.format_resolution(format, default=None)
2528 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2529 format['dynamic_range'] = 'SDR'
2530 if (info_dict.get('duration') and format.get('tbr')
2531 and not format.get('filesize') and not format.get('filesize_approx')):
2532 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2533
2534 # Add HTTP headers, so that external programs can use them from the
2535 # json output
2536 full_format_info = info_dict.copy()
2537 full_format_info.update(format)
2538 format['http_headers'] = self._calc_headers(full_format_info)
2539 # Remove private housekeeping stuff
2540 if '__x_forwarded_for_ip' in info_dict:
2541 del info_dict['__x_forwarded_for_ip']
2542
2543 if self.params.get('check_formats') is True:
2544 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2545
2546 if not formats or formats[0] is not info_dict:
2547 # only set the 'formats' fields if the original info_dict list them
2548 # otherwise we end up with a circular reference, the first (and unique)
2549 # element in the 'formats' field in info_dict is info_dict itself,
2550 # which can't be exported to json
2551 info_dict['formats'] = formats
2552
2553 info_dict, _ = self.pre_process(info_dict)
2554
2555 if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
2556 return info_dict
2557
2558 self.post_extract(info_dict)
2559 info_dict, _ = self.pre_process(info_dict, 'after_filter')
2560
2561 # The pre-processors may have modified the formats
2562 formats = info_dict.get('formats', [info_dict])
2563
2564 list_only = self.params.get('simulate') is None and (
2565 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2566 interactive_format_selection = not list_only and self.format_selector == '-'
2567 if self.params.get('list_thumbnails'):
2568 self.list_thumbnails(info_dict)
2569 if self.params.get('listsubtitles'):
2570 if 'automatic_captions' in info_dict:
2571 self.list_subtitles(
2572 info_dict['id'], automatic_captions, 'automatic captions')
2573 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2574 if self.params.get('listformats') or interactive_format_selection:
2575 self.list_formats(info_dict)
2576 if list_only:
2577 # Without this printing, -F --print-json will not work
2578 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2579 return
2580
2581 format_selector = self.format_selector
2582 if format_selector is None:
2583 req_format = self._default_format_spec(info_dict, download=download)
2584 self.write_debug('Default format spec: %s' % req_format)
2585 format_selector = self.build_format_selector(req_format)
2586
2587 while True:
2588 if interactive_format_selection:
2589 req_format = input(
2590 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2591 try:
2592 format_selector = self.build_format_selector(req_format)
2593 except SyntaxError as err:
2594 self.report_error(err, tb=False, is_error=False)
2595 continue
2596
2597 # While in format selection we may need to have an access to the original
2598 # format set in order to calculate some metrics or do some processing.
2599 # For now we need to be able to guess whether original formats provided
2600 # by extractor are incomplete or not (i.e. whether extractor provides only
2601 # video-only or audio-only formats) for proper formats selection for
2602 # extractors with such incomplete formats (see
2603 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2604 # Since formats may be filtered during format selection and may not match
2605 # the original formats the results may be incorrect. Thus original formats
2606 # or pre-calculated metrics should be passed to format selection routines
2607 # as well.
2608 # We will pass a context object containing all necessary additional data
2609 # instead of just formats.
2610 # This fixes incorrect format selection issue (see
2611 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2612 incomplete_formats = (
2613 # All formats are video-only or
2614 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2615 # all formats are audio-only
2616 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2617
2618 ctx = {
2619 'formats': formats,
2620 'incomplete_formats': incomplete_formats,
2621 }
2622
2623 formats_to_download = list(format_selector(ctx))
2624 if interactive_format_selection and not formats_to_download:
2625 self.report_error('Requested format is not available', tb=False, is_error=False)
2626 continue
2627 break
2628
2629 if not formats_to_download:
2630 if not self.params.get('ignore_no_formats_error'):
2631 raise ExtractorError('Requested format is not available', expected=True,
2632 video_id=info_dict['id'], ie=info_dict['extractor'])
2633 self.report_warning('Requested format is not available')
2634 # Process what we can, even without any available formats.
2635 formats_to_download = [{}]
2636
2637 best_format = formats_to_download[-1]
2638 if download:
2639 if best_format:
2640 self.to_screen(
2641 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2642 + ', '.join([f['format_id'] for f in formats_to_download]))
2643 max_downloads_reached = False
2644 for i, fmt in enumerate(formats_to_download):
2645 formats_to_download[i] = new_info = self._copy_infodict(info_dict)
2646 new_info.update(fmt)
2647 try:
2648 self.process_info(new_info)
2649 except MaxDownloadsReached:
2650 max_downloads_reached = True
2651 # Remove copied info
2652 for key, val in tuple(new_info.items()):
2653 if info_dict.get(key) == val:
2654 new_info.pop(key)
2655 if max_downloads_reached:
2656 break
2657
2658 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2659 assert write_archive.issubset({True, False, 'ignore'})
2660 if True in write_archive and False not in write_archive:
2661 self.record_download_archive(info_dict)
2662
2663 info_dict['requested_downloads'] = formats_to_download
2664 info_dict = self.run_all_pps('after_video', info_dict)
2665 if max_downloads_reached:
2666 raise MaxDownloadsReached()
2667
2668 # We update the info dict with the selected best quality format (backwards compatibility)
2669 info_dict.update(best_format)
2670 return info_dict
2671
2672 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2673 """Select the requested subtitles and their format"""
2674 available_subs = {}
2675 if normal_subtitles and self.params.get('writesubtitles'):
2676 available_subs.update(normal_subtitles)
2677 if automatic_captions and self.params.get('writeautomaticsub'):
2678 for lang, cap_info in automatic_captions.items():
2679 if lang not in available_subs:
2680 available_subs[lang] = cap_info
2681
2682 if (not self.params.get('writesubtitles') and not
2683 self.params.get('writeautomaticsub') or not
2684 available_subs):
2685 return None
2686
2687 all_sub_langs = available_subs.keys()
2688 if self.params.get('allsubtitles', False):
2689 requested_langs = all_sub_langs
2690 elif self.params.get('subtitleslangs', False):
2691 # A list is used so that the order of languages will be the same as
2692 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2693 requested_langs = []
2694 for lang_re in self.params.get('subtitleslangs'):
2695 discard = lang_re[0] == '-'
2696 if discard:
2697 lang_re = lang_re[1:]
2698 if lang_re == 'all':
2699 if discard:
2700 requested_langs = []
2701 else:
2702 requested_langs.extend(all_sub_langs)
2703 continue
2704 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2705 if discard:
2706 for lang in current_langs:
2707 while lang in requested_langs:
2708 requested_langs.remove(lang)
2709 else:
2710 requested_langs.extend(current_langs)
2711 requested_langs = orderedSet(requested_langs)
2712 elif 'en' in available_subs:
2713 requested_langs = ['en']
2714 else:
2715 requested_langs = [list(all_sub_langs)[0]]
2716 if requested_langs:
2717 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2718
2719 formats_query = self.params.get('subtitlesformat', 'best')
2720 formats_preference = formats_query.split('/') if formats_query else []
2721 subs = {}
2722 for lang in requested_langs:
2723 formats = available_subs.get(lang)
2724 if formats is None:
2725 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2726 continue
2727 for ext in formats_preference:
2728 if ext == 'best':
2729 f = formats[-1]
2730 break
2731 matches = list(filter(lambda f: f['ext'] == ext, formats))
2732 if matches:
2733 f = matches[-1]
2734 break
2735 else:
2736 f = formats[-1]
2737 self.report_warning(
2738 'No subtitle format found matching "%s" for language %s, '
2739 'using %s' % (formats_query, lang, f['ext']))
2740 subs[lang] = f
2741 return subs
2742
2743 def _forceprint(self, key, info_dict):
2744 if info_dict is None:
2745 return
2746 info_copy = info_dict.copy()
2747 info_copy['formats_table'] = self.render_formats_table(info_dict)
2748 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2749 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2750 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2751
2752 def format_tmpl(tmpl):
2753 mobj = re.match(r'\w+(=?)$', tmpl)
2754 if mobj and mobj.group(1):
2755 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2756 elif mobj:
2757 return f'%({tmpl})s'
2758 return tmpl
2759
2760 for tmpl in self.params['forceprint'].get(key, []):
2761 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2762
2763 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2764 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2765 tmpl = format_tmpl(tmpl)
2766 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2767 if self._ensure_dir_exists(filename):
2768 with io.open(filename, 'a', encoding='utf-8') as f:
2769 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2770
2771 def __forced_printings(self, info_dict, filename, incomplete):
2772 def print_mandatory(field, actual_field=None):
2773 if actual_field is None:
2774 actual_field = field
2775 if (self.params.get('force%s' % field, False)
2776 and (not incomplete or info_dict.get(actual_field) is not None)):
2777 self.to_stdout(info_dict[actual_field])
2778
2779 def print_optional(field):
2780 if (self.params.get('force%s' % field, False)
2781 and info_dict.get(field) is not None):
2782 self.to_stdout(info_dict[field])
2783
2784 info_dict = info_dict.copy()
2785 if filename is not None:
2786 info_dict['filename'] = filename
2787 if info_dict.get('requested_formats') is not None:
2788 # For RTMP URLs, also include the playpath
2789 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2790 elif info_dict.get('url'):
2791 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2792
2793 if (self.params.get('forcejson')
2794 or self.params['forceprint'].get('video')
2795 or self.params['print_to_file'].get('video')):
2796 self.post_extract(info_dict)
2797 self._forceprint('video', info_dict)
2798
2799 print_mandatory('title')
2800 print_mandatory('id')
2801 print_mandatory('url', 'urls')
2802 print_optional('thumbnail')
2803 print_optional('description')
2804 print_optional('filename')
2805 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2806 self.to_stdout(formatSeconds(info_dict['duration']))
2807 print_mandatory('format')
2808
2809 if self.params.get('forcejson'):
2810 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2811
2812 def dl(self, name, info, subtitle=False, test=False):
2813 if not info.get('url'):
2814 self.raise_no_formats(info, True)
2815
2816 if test:
2817 verbose = self.params.get('verbose')
2818 params = {
2819 'test': True,
2820 'quiet': self.params.get('quiet') or not verbose,
2821 'verbose': verbose,
2822 'noprogress': not verbose,
2823 'nopart': True,
2824 'skip_unavailable_fragments': False,
2825 'keep_fragments': False,
2826 'overwrites': True,
2827 '_no_ytdl_file': True,
2828 }
2829 else:
2830 params = self.params
2831 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2832 if not test:
2833 for ph in self._progress_hooks:
2834 fd.add_progress_hook(ph)
2835 urls = '", "'.join(
2836 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2837 for f in info.get('requested_formats', []) or [info])
2838 self.write_debug('Invoking downloader on "%s"' % urls)
2839
2840 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2841 # But it may contain objects that are not deep-copyable
2842 new_info = self._copy_infodict(info)
2843 if new_info.get('http_headers') is None:
2844 new_info['http_headers'] = self._calc_headers(new_info)
2845 return fd.download(name, new_info, subtitle)
2846
2847 def existing_file(self, filepaths, *, default_overwrite=True):
2848 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2849 if existing_files and not self.params.get('overwrites', default_overwrite):
2850 return existing_files[0]
2851
2852 for file in existing_files:
2853 self.report_file_delete(file)
2854 os.remove(file)
2855 return None
2856
2857 def process_info(self, info_dict):
2858 """Process a single resolved IE result. (Modifies it in-place)"""
2859
2860 assert info_dict.get('_type', 'video') == 'video'
2861 original_infodict = info_dict
2862
2863 if 'format' not in info_dict and 'ext' in info_dict:
2864 info_dict['format'] = info_dict['ext']
2865
2866 # This is mostly just for backward compatibility of process_info
2867 # As a side-effect, this allows for format-specific filters
2868 if self._match_entry(info_dict) is not None:
2869 info_dict['__write_download_archive'] = 'ignore'
2870 return
2871
2872 # Does nothing under normal operation - for backward compatibility of process_info
2873 self.post_extract(info_dict)
2874 self._num_downloads += 1
2875
2876 # info_dict['_filename'] needs to be set for backward compatibility
2877 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2878 temp_filename = self.prepare_filename(info_dict, 'temp')
2879 files_to_move = {}
2880
2881 # Forced printings
2882 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2883
2884 if self.params.get('simulate'):
2885 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2886 return
2887
2888 if full_filename is None:
2889 return
2890 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2891 return
2892 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2893 return
2894
2895 if self._write_description('video', info_dict,
2896 self.prepare_filename(info_dict, 'description')) is None:
2897 return
2898
2899 sub_files = self._write_subtitles(info_dict, temp_filename)
2900 if sub_files is None:
2901 return
2902 files_to_move.update(dict(sub_files))
2903
2904 thumb_files = self._write_thumbnails(
2905 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2906 if thumb_files is None:
2907 return
2908 files_to_move.update(dict(thumb_files))
2909
2910 infofn = self.prepare_filename(info_dict, 'infojson')
2911 _infojson_written = self._write_info_json('video', info_dict, infofn)
2912 if _infojson_written:
2913 info_dict['infojson_filename'] = infofn
2914 # For backward compatibility, even though it was a private field
2915 info_dict['__infojson_filename'] = infofn
2916 elif _infojson_written is None:
2917 return
2918
2919 # Note: Annotations are deprecated
2920 annofn = None
2921 if self.params.get('writeannotations', False):
2922 annofn = self.prepare_filename(info_dict, 'annotation')
2923 if annofn:
2924 if not self._ensure_dir_exists(encodeFilename(annofn)):
2925 return
2926 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2927 self.to_screen('[info] Video annotations are already present')
2928 elif not info_dict.get('annotations'):
2929 self.report_warning('There are no annotations to write.')
2930 else:
2931 try:
2932 self.to_screen('[info] Writing video annotations to: ' + annofn)
2933 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2934 annofile.write(info_dict['annotations'])
2935 except (KeyError, TypeError):
2936 self.report_warning('There are no annotations to write.')
2937 except (OSError, IOError):
2938 self.report_error('Cannot write annotations file: ' + annofn)
2939 return
2940
2941 # Write internet shortcut files
2942 def _write_link_file(link_type):
2943 url = try_get(info_dict['webpage_url'], iri_to_uri)
2944 if not url:
2945 self.report_warning(
2946 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2947 return True
2948 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2949 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2950 return False
2951 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2952 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2953 return True
2954 try:
2955 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2956 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2957 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2958 template_vars = {'url': url}
2959 if link_type == 'desktop':
2960 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2961 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2962 except (OSError, IOError):
2963 self.report_error(f'Cannot write internet shortcut {linkfn}')
2964 return False
2965 return True
2966
2967 write_links = {
2968 'url': self.params.get('writeurllink'),
2969 'webloc': self.params.get('writewebloclink'),
2970 'desktop': self.params.get('writedesktoplink'),
2971 }
2972 if self.params.get('writelink'):
2973 link_type = ('webloc' if sys.platform == 'darwin'
2974 else 'desktop' if sys.platform.startswith('linux')
2975 else 'url')
2976 write_links[link_type] = True
2977
2978 if any(should_write and not _write_link_file(link_type)
2979 for link_type, should_write in write_links.items()):
2980 return
2981
2982 def replace_info_dict(new_info):
2983 nonlocal info_dict
2984 if new_info == info_dict:
2985 return
2986 info_dict.clear()
2987 info_dict.update(new_info)
2988
2989 try:
2990 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2991 replace_info_dict(new_info)
2992 except PostProcessingError as err:
2993 self.report_error('Preprocessing: %s' % str(err))
2994 return
2995
2996 if self.params.get('skip_download'):
2997 info_dict['filepath'] = temp_filename
2998 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2999 info_dict['__files_to_move'] = files_to_move
3000 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
3001 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
3002 else:
3003 # Download
3004 info_dict.setdefault('__postprocessors', [])
3005 try:
3006
3007 def existing_video_file(*filepaths):
3008 ext = info_dict.get('ext')
3009 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
3010 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
3011 default_overwrite=False)
3012 if file:
3013 info_dict['ext'] = os.path.splitext(file)[1][1:]
3014 return file
3015
3016 success = True
3017 if info_dict.get('requested_formats') is not None:
3018
3019 def compatible_formats(formats):
3020 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
3021 video_formats = [format for format in formats if format.get('vcodec') != 'none']
3022 audio_formats = [format for format in formats if format.get('acodec') != 'none']
3023 if len(video_formats) > 2 or len(audio_formats) > 2:
3024 return False
3025
3026 # Check extension
3027 exts = set(format.get('ext') for format in formats)
3028 COMPATIBLE_EXTS = (
3029 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
3030 set(('webm',)),
3031 )
3032 for ext_sets in COMPATIBLE_EXTS:
3033 if ext_sets.issuperset(exts):
3034 return True
3035 # TODO: Check acodec/vcodec
3036 return False
3037
3038 requested_formats = info_dict['requested_formats']
3039 old_ext = info_dict['ext']
3040 if self.params.get('merge_output_format') is None:
3041 if not compatible_formats(requested_formats):
3042 info_dict['ext'] = 'mkv'
3043 self.report_warning(
3044 'Requested formats are incompatible for merge and will be merged into mkv')
3045 if (info_dict['ext'] == 'webm'
3046 and info_dict.get('thumbnails')
3047 # check with type instead of pp_key, __name__, or isinstance
3048 # since we dont want any custom PPs to trigger this
3049 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3050 info_dict['ext'] = 'mkv'
3051 self.report_warning(
3052 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3053 new_ext = info_dict['ext']
3054
3055 def correct_ext(filename, ext=new_ext):
3056 if filename == '-':
3057 return filename
3058 filename_real_ext = os.path.splitext(filename)[1][1:]
3059 filename_wo_ext = (
3060 os.path.splitext(filename)[0]
3061 if filename_real_ext in (old_ext, new_ext)
3062 else filename)
3063 return '%s.%s' % (filename_wo_ext, ext)
3064
3065 # Ensure filename always has a correct extension for successful merge
3066 full_filename = correct_ext(full_filename)
3067 temp_filename = correct_ext(temp_filename)
3068 dl_filename = existing_video_file(full_filename, temp_filename)
3069 info_dict['__real_download'] = False
3070
3071 downloaded = []
3072 merger = FFmpegMergerPP(self)
3073
3074 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3075 if dl_filename is not None:
3076 self.report_file_already_downloaded(dl_filename)
3077 elif fd:
3078 for f in requested_formats if fd != FFmpegFD else []:
3079 f['filepath'] = fname = prepend_extension(
3080 correct_ext(temp_filename, info_dict['ext']),
3081 'f%s' % f['format_id'], info_dict['ext'])
3082 downloaded.append(fname)
3083 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3084 success, real_download = self.dl(temp_filename, info_dict)
3085 info_dict['__real_download'] = real_download
3086 else:
3087 if self.params.get('allow_unplayable_formats'):
3088 self.report_warning(
3089 'You have requested merging of multiple formats '
3090 'while also allowing unplayable formats to be downloaded. '
3091 'The formats won\'t be merged to prevent data corruption.')
3092 elif not merger.available:
3093 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3094 if not self.params.get('ignoreerrors'):
3095 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3096 return
3097 self.report_warning(f'{msg}. The formats won\'t be merged')
3098
3099 if temp_filename == '-':
3100 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3101 else 'but the formats are incompatible for simultaneous download' if merger.available
3102 else 'but ffmpeg is not installed')
3103 self.report_warning(
3104 f'You have requested downloading multiple formats to stdout {reason}. '
3105 'The formats will be streamed one after the other')
3106 fname = temp_filename
3107 for f in requested_formats:
3108 new_info = dict(info_dict)
3109 del new_info['requested_formats']
3110 new_info.update(f)
3111 if temp_filename != '-':
3112 fname = prepend_extension(
3113 correct_ext(temp_filename, new_info['ext']),
3114 'f%s' % f['format_id'], new_info['ext'])
3115 if not self._ensure_dir_exists(fname):
3116 return
3117 f['filepath'] = fname
3118 downloaded.append(fname)
3119 partial_success, real_download = self.dl(fname, new_info)
3120 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3121 success = success and partial_success
3122
3123 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3124 info_dict['__postprocessors'].append(merger)
3125 info_dict['__files_to_merge'] = downloaded
3126 # Even if there were no downloads, it is being merged only now
3127 info_dict['__real_download'] = True
3128 else:
3129 for file in downloaded:
3130 files_to_move[file] = None
3131 else:
3132 # Just a single file
3133 dl_filename = existing_video_file(full_filename, temp_filename)
3134 if dl_filename is None or dl_filename == temp_filename:
3135 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3136 # So we should try to resume the download
3137 success, real_download = self.dl(temp_filename, info_dict)
3138 info_dict['__real_download'] = real_download
3139 else:
3140 self.report_file_already_downloaded(dl_filename)
3141
3142 dl_filename = dl_filename or temp_filename
3143 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3144
3145 except network_exceptions as err:
3146 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3147 return
3148 except (OSError, IOError) as err:
3149 raise UnavailableVideoError(err)
3150 except (ContentTooShortError, ) as err:
3151 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3152 return
3153
3154 if success and full_filename != '-':
3155
3156 def fixup():
3157 do_fixup = True
3158 fixup_policy = self.params.get('fixup')
3159 vid = info_dict['id']
3160
3161 if fixup_policy in ('ignore', 'never'):
3162 return
3163 elif fixup_policy == 'warn':
3164 do_fixup = False
3165 elif fixup_policy != 'force':
3166 assert fixup_policy in ('detect_or_warn', None)
3167 if not info_dict.get('__real_download'):
3168 do_fixup = False
3169
3170 def ffmpeg_fixup(cndn, msg, cls):
3171 if not cndn:
3172 return
3173 if not do_fixup:
3174 self.report_warning(f'{vid}: {msg}')
3175 return
3176 pp = cls(self)
3177 if pp.available:
3178 info_dict['__postprocessors'].append(pp)
3179 else:
3180 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3181
3182 stretched_ratio = info_dict.get('stretched_ratio')
3183 ffmpeg_fixup(
3184 stretched_ratio not in (1, None),
3185 f'Non-uniform pixel ratio {stretched_ratio}',
3186 FFmpegFixupStretchedPP)
3187
3188 ffmpeg_fixup(
3189 (info_dict.get('requested_formats') is None
3190 and info_dict.get('container') == 'm4a_dash'
3191 and info_dict.get('ext') == 'm4a'),
3192 'writing DASH m4a. Only some players support this container',
3193 FFmpegFixupM4aPP)
3194
3195 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3196 downloader = downloader.__name__ if downloader else None
3197
3198 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3199 ffmpeg_fixup(downloader == 'HlsFD',
3200 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3201 FFmpegFixupM3u8PP)
3202 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3203 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3204
3205 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3206 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3207
3208 fixup()
3209 try:
3210 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3211 except PostProcessingError as err:
3212 self.report_error('Postprocessing: %s' % str(err))
3213 return
3214 try:
3215 for ph in self._post_hooks:
3216 ph(info_dict['filepath'])
3217 except Exception as err:
3218 self.report_error('post hooks: %s' % str(err))
3219 return
3220 info_dict['__write_download_archive'] = True
3221
3222 if self.params.get('force_write_download_archive'):
3223 info_dict['__write_download_archive'] = True
3224
3225 # Make sure the info_dict was modified in-place
3226 assert info_dict is original_infodict
3227
3228 max_downloads = self.params.get('max_downloads')
3229 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3230 raise MaxDownloadsReached()
3231
3232 def __download_wrapper(self, func):
3233 @functools.wraps(func)
3234 def wrapper(*args, **kwargs):
3235 try:
3236 res = func(*args, **kwargs)
3237 except UnavailableVideoError as e:
3238 self.report_error(e)
3239 except MaxDownloadsReached as e:
3240 self.to_screen(f'[info] {e}')
3241 raise
3242 except DownloadCancelled as e:
3243 self.to_screen(f'[info] {e}')
3244 if not self.params.get('break_per_url'):
3245 raise
3246 else:
3247 if self.params.get('dump_single_json', False):
3248 self.post_extract(res)
3249 self.to_stdout(json.dumps(self.sanitize_info(res)))
3250 return wrapper
3251
3252 def download(self, url_list):
3253 """Download a given list of URLs."""
3254 url_list = variadic(url_list) # Passing a single URL is a common mistake
3255 outtmpl = self.outtmpl_dict['default']
3256 if (len(url_list) > 1
3257 and outtmpl != '-'
3258 and '%' not in outtmpl
3259 and self.params.get('max_downloads') != 1):
3260 raise SameFileError(outtmpl)
3261
3262 for url in url_list:
3263 self.__download_wrapper(self.extract_info)(
3264 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3265
3266 return self._download_retcode
3267
3268 def download_with_info_file(self, info_filename):
3269 with contextlib.closing(fileinput.FileInput(
3270 [info_filename], mode='r',
3271 openhook=fileinput.hook_encoded('utf-8'))) as f:
3272 # FileInput doesn't have a read method, we can't call json.load
3273 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3274 try:
3275 self.__download_wrapper(self.process_ie_result)(info, download=True)
3276 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3277 if not isinstance(e, EntryNotInPlaylist):
3278 self.to_stderr('\r')
3279 webpage_url = info.get('webpage_url')
3280 if webpage_url is not None:
3281 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3282 return self.download([webpage_url])
3283 else:
3284 raise
3285 return self._download_retcode
3286
3287 @staticmethod
3288 def sanitize_info(info_dict, remove_private_keys=False):
3289 ''' Sanitize the infodict for converting to json '''
3290 if info_dict is None:
3291 return info_dict
3292 info_dict.setdefault('epoch', int(time.time()))
3293 info_dict.setdefault('_type', 'video')
3294
3295 if remove_private_keys:
3296 reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
3297 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3298 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3299 }
3300 else:
3301 reject = lambda k, v: False
3302
3303 def filter_fn(obj):
3304 if isinstance(obj, dict):
3305 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3306 elif isinstance(obj, (list, tuple, set, LazyList)):
3307 return list(map(filter_fn, obj))
3308 elif obj is None or isinstance(obj, (str, int, float, bool)):
3309 return obj
3310 else:
3311 return repr(obj)
3312
3313 return filter_fn(info_dict)
3314
3315 @staticmethod
3316 def filter_requested_info(info_dict, actually_filter=True):
3317 ''' Alias of sanitize_info for backward compatibility '''
3318 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3319
3320 @staticmethod
3321 def post_extract(info_dict):
3322 def actual_post_extract(info_dict):
3323 if info_dict.get('_type') in ('playlist', 'multi_video'):
3324 for video_dict in info_dict.get('entries', {}):
3325 actual_post_extract(video_dict or {})
3326 return
3327
3328 post_extractor = info_dict.pop('__post_extractor', None) or (lambda: {})
3329 info_dict.update(post_extractor())
3330
3331 actual_post_extract(info_dict or {})
3332
3333 def run_pp(self, pp, infodict):
3334 files_to_delete = []
3335 if '__files_to_move' not in infodict:
3336 infodict['__files_to_move'] = {}
3337 try:
3338 files_to_delete, infodict = pp.run(infodict)
3339 except PostProcessingError as e:
3340 # Must be True and not 'only_download'
3341 if self.params.get('ignoreerrors') is True:
3342 self.report_error(e)
3343 return infodict
3344 raise
3345
3346 if not files_to_delete:
3347 return infodict
3348 if self.params.get('keepvideo', False):
3349 for f in files_to_delete:
3350 infodict['__files_to_move'].setdefault(f, '')
3351 else:
3352 for old_filename in set(files_to_delete):
3353 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3354 try:
3355 os.remove(encodeFilename(old_filename))
3356 except (IOError, OSError):
3357 self.report_warning('Unable to remove downloaded original file')
3358 if old_filename in infodict['__files_to_move']:
3359 del infodict['__files_to_move'][old_filename]
3360 return infodict
3361
3362 def run_all_pps(self, key, info, *, additional_pps=None):
3363 self._forceprint(key, info)
3364 for pp in (additional_pps or []) + self._pps[key]:
3365 info = self.run_pp(pp, info)
3366 return info
3367
3368 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3369 info = dict(ie_info)
3370 info['__files_to_move'] = files_to_move or {}
3371 info = self.run_all_pps(key, info)
3372 return info, info.pop('__files_to_move', None)
3373
3374 def post_process(self, filename, info, files_to_move=None):
3375 """Run all the postprocessors on the given file."""
3376 info['filepath'] = filename
3377 info['__files_to_move'] = files_to_move or {}
3378 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3379 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3380 del info['__files_to_move']
3381 return self.run_all_pps('after_move', info)
3382
3383 def _make_archive_id(self, info_dict):
3384 video_id = info_dict.get('id')
3385 if not video_id:
3386 return
3387 # Future-proof against any change in case
3388 # and backwards compatibility with prior versions
3389 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3390 if extractor is None:
3391 url = str_or_none(info_dict.get('url'))
3392 if not url:
3393 return
3394 # Try to find matching extractor for the URL and take its ie_key
3395 for ie_key, ie in self._ies.items():
3396 if ie.suitable(url):
3397 extractor = ie_key
3398 break
3399 else:
3400 return
3401 return '%s %s' % (extractor.lower(), video_id)
3402
3403 def in_download_archive(self, info_dict):
3404 fn = self.params.get('download_archive')
3405 if fn is None:
3406 return False
3407
3408 vid_id = self._make_archive_id(info_dict)
3409 if not vid_id:
3410 return False # Incomplete video information
3411
3412 return vid_id in self.archive
3413
3414 def record_download_archive(self, info_dict):
3415 fn = self.params.get('download_archive')
3416 if fn is None:
3417 return
3418 vid_id = self._make_archive_id(info_dict)
3419 assert vid_id
3420 self.write_debug(f'Adding to archive: {vid_id}')
3421 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3422 archive_file.write(vid_id + '\n')
3423 self.archive.add(vid_id)
3424
3425 @staticmethod
3426 def format_resolution(format, default='unknown'):
3427 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3428 return 'audio only'
3429 if format.get('resolution') is not None:
3430 return format['resolution']
3431 if format.get('width') and format.get('height'):
3432 return '%dx%d' % (format['width'], format['height'])
3433 elif format.get('height'):
3434 return '%sp' % format['height']
3435 elif format.get('width'):
3436 return '%dx?' % format['width']
3437 return default
3438
3439 def _list_format_headers(self, *headers):
3440 if self.params.get('listformats_table', True) is not False:
3441 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3442 return headers
3443
3444 def _format_note(self, fdict):
3445 res = ''
3446 if fdict.get('ext') in ['f4f', 'f4m']:
3447 res += '(unsupported)'
3448 if fdict.get('language'):
3449 if res:
3450 res += ' '
3451 res += '[%s]' % fdict['language']
3452 if fdict.get('format_note') is not None:
3453 if res:
3454 res += ' '
3455 res += fdict['format_note']
3456 if fdict.get('tbr') is not None:
3457 if res:
3458 res += ', '
3459 res += '%4dk' % fdict['tbr']
3460 if fdict.get('container') is not None:
3461 if res:
3462 res += ', '
3463 res += '%s container' % fdict['container']
3464 if (fdict.get('vcodec') is not None
3465 and fdict.get('vcodec') != 'none'):
3466 if res:
3467 res += ', '
3468 res += fdict['vcodec']
3469 if fdict.get('vbr') is not None:
3470 res += '@'
3471 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3472 res += 'video@'
3473 if fdict.get('vbr') is not None:
3474 res += '%4dk' % fdict['vbr']
3475 if fdict.get('fps') is not None:
3476 if res:
3477 res += ', '
3478 res += '%sfps' % fdict['fps']
3479 if fdict.get('acodec') is not None:
3480 if res:
3481 res += ', '
3482 if fdict['acodec'] == 'none':
3483 res += 'video only'
3484 else:
3485 res += '%-5s' % fdict['acodec']
3486 elif fdict.get('abr') is not None:
3487 if res:
3488 res += ', '
3489 res += 'audio'
3490 if fdict.get('abr') is not None:
3491 res += '@%3dk' % fdict['abr']
3492 if fdict.get('asr') is not None:
3493 res += ' (%5dHz)' % fdict['asr']
3494 if fdict.get('filesize') is not None:
3495 if res:
3496 res += ', '
3497 res += format_bytes(fdict['filesize'])
3498 elif fdict.get('filesize_approx') is not None:
3499 if res:
3500 res += ', '
3501 res += '~' + format_bytes(fdict['filesize_approx'])
3502 return res
3503
3504 def render_formats_table(self, info_dict):
3505 if not info_dict.get('formats') and not info_dict.get('url'):
3506 return None
3507
3508 formats = info_dict.get('formats', [info_dict])
3509 if not self.params.get('listformats_table', True) is not False:
3510 table = [
3511 [
3512 format_field(f, 'format_id'),
3513 format_field(f, 'ext'),
3514 self.format_resolution(f),
3515 self._format_note(f)
3516 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3517 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3518
3519 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3520 table = [
3521 [
3522 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3523 format_field(f, 'ext'),
3524 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3525 format_field(f, 'fps', '\t%d'),
3526 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3527 delim,
3528 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3529 format_field(f, 'tbr', '\t%dk'),
3530 shorten_protocol_name(f.get('protocol', '')),
3531 delim,
3532 format_field(f, 'vcodec', default='unknown').replace(
3533 'none', 'images' if f.get('acodec') == 'none'
3534 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3535 format_field(f, 'vbr', '\t%dk'),
3536 format_field(f, 'acodec', default='unknown').replace(
3537 'none', '' if f.get('vcodec') == 'none'
3538 else self._format_screen('video only', self.Styles.SUPPRESS)),
3539 format_field(f, 'abr', '\t%dk'),
3540 format_field(f, 'asr', '\t%dHz'),
3541 join_nonempty(
3542 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3543 format_field(f, 'language', '[%s]'),
3544 join_nonempty(format_field(f, 'format_note'),
3545 format_field(f, 'container', ignore=(None, f.get('ext'))),
3546 delim=', '),
3547 delim=' '),
3548 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3549 header_line = self._list_format_headers(
3550 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3551 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3552
3553 return render_table(
3554 header_line, table, hide_empty=True,
3555 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3556
3557 def render_thumbnails_table(self, info_dict):
3558 thumbnails = list(info_dict.get('thumbnails') or [])
3559 if not thumbnails:
3560 return None
3561 return render_table(
3562 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3563 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3564
3565 def render_subtitles_table(self, video_id, subtitles):
3566 def _row(lang, formats):
3567 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3568 if len(set(names)) == 1:
3569 names = [] if names[0] == 'unknown' else names[:1]
3570 return [lang, ', '.join(names), ', '.join(exts)]
3571
3572 if not subtitles:
3573 return None
3574 return render_table(
3575 self._list_format_headers('Language', 'Name', 'Formats'),
3576 [_row(lang, formats) for lang, formats in subtitles.items()],
3577 hide_empty=True)
3578
3579 def __list_table(self, video_id, name, func, *args):
3580 table = func(*args)
3581 if not table:
3582 self.to_screen(f'{video_id} has no {name}')
3583 return
3584 self.to_screen(f'[info] Available {name} for {video_id}:')
3585 self.to_stdout(table)
3586
3587 def list_formats(self, info_dict):
3588 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3589
3590 def list_thumbnails(self, info_dict):
3591 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3592
3593 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3594 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3595
3596 def urlopen(self, req):
3597 """ Start an HTTP download """
3598 if isinstance(req, compat_basestring):
3599 req = sanitized_Request(req)
3600 return self._opener.open(req, timeout=self._socket_timeout)
3601
3602 def print_debug_header(self):
3603 if not self.params.get('verbose'):
3604 return
3605
3606 def get_encoding(stream):
3607 ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
3608 if not supports_terminal_sequences(stream):
3609 from .compat import WINDOWS_VT_MODE
3610 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3611 return ret
3612
3613 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3614 locale.getpreferredencoding(),
3615 sys.getfilesystemencoding(),
3616 get_encoding(self._out_files['screen']), get_encoding(self._out_files['error']),
3617 self.get_encoding())
3618
3619 logger = self.params.get('logger')
3620 if logger:
3621 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3622 write_debug(encoding_str)
3623 else:
3624 write_string(f'[debug] {encoding_str}\n', encoding=None)
3625 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3626
3627 source = detect_variant()
3628 write_debug(join_nonempty(
3629 'yt-dlp version', __version__,
3630 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3631 '' if source == 'unknown' else f'({source})',
3632 delim=' '))
3633 if not _LAZY_LOADER:
3634 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3635 write_debug('Lazy loading extractors is forcibly disabled')
3636 else:
3637 write_debug('Lazy loading extractors is disabled')
3638 if plugin_extractors or plugin_postprocessors:
3639 write_debug('Plugins: %s' % [
3640 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3641 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3642 if self.params.get('compat_opts'):
3643 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3644
3645 if source == 'source':
3646 try:
3647 sp = Popen(
3648 ['git', 'rev-parse', '--short', 'HEAD'],
3649 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3650 cwd=os.path.dirname(os.path.abspath(__file__)))
3651 out, err = sp.communicate_or_kill()
3652 out = out.decode().strip()
3653 if re.match('[0-9a-f]+', out):
3654 write_debug('Git HEAD: %s' % out)
3655 except Exception:
3656 try:
3657 sys.exc_clear()
3658 except Exception:
3659 pass
3660
3661 def python_implementation():
3662 impl_name = platform.python_implementation()
3663 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3664 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3665 return impl_name
3666
3667 write_debug('Python version %s (%s %s) - %s' % (
3668 platform.python_version(),
3669 python_implementation(),
3670 platform.architecture()[0],
3671 platform_name()))
3672
3673 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3674 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3675 if ffmpeg_features:
3676 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3677
3678 exe_versions['rtmpdump'] = rtmpdump_version()
3679 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3680 exe_str = ', '.join(
3681 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3682 ) or 'none'
3683 write_debug('exe versions: %s' % exe_str)
3684
3685 from .downloader.websocket import has_websockets
3686 from .postprocessor.embedthumbnail import has_mutagen
3687 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3688
3689 lib_str = join_nonempty(
3690 compat_brotli and compat_brotli.__name__,
3691 has_certifi and 'certifi',
3692 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3693 SECRETSTORAGE_AVAILABLE and 'secretstorage',
3694 has_mutagen and 'mutagen',
3695 SQLITE_AVAILABLE and 'sqlite',
3696 has_websockets and 'websockets',
3697 delim=', ') or 'none'
3698 write_debug('Optional libraries: %s' % lib_str)
3699
3700 proxy_map = {}
3701 for handler in self._opener.handlers:
3702 if hasattr(handler, 'proxies'):
3703 proxy_map.update(handler.proxies)
3704 write_debug(f'Proxy map: {proxy_map}')
3705
3706 # Not implemented
3707 if False and self.params.get('call_home'):
3708 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3709 write_debug('Public IP address: %s' % ipaddr)
3710 latest_version = self.urlopen(
3711 'https://yt-dl.org/latest/version').read().decode('utf-8')
3712 if version_tuple(latest_version) > version_tuple(__version__):
3713 self.report_warning(
3714 'You are using an outdated version (newest version: %s)! '
3715 'See https://yt-dl.org/update if you need help updating.' %
3716 latest_version)
3717
3718 def _setup_opener(self):
3719 timeout_val = self.params.get('socket_timeout')
3720 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3721
3722 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3723 opts_cookiefile = self.params.get('cookiefile')
3724 opts_proxy = self.params.get('proxy')
3725
3726 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3727
3728 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3729 if opts_proxy is not None:
3730 if opts_proxy == '':
3731 proxies = {}
3732 else:
3733 proxies = {'http': opts_proxy, 'https': opts_proxy}
3734 else:
3735 proxies = compat_urllib_request.getproxies()
3736 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3737 if 'http' in proxies and 'https' not in proxies:
3738 proxies['https'] = proxies['http']
3739 proxy_handler = PerRequestProxyHandler(proxies)
3740
3741 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3742 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3743 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3744 redirect_handler = YoutubeDLRedirectHandler()
3745 data_handler = compat_urllib_request_DataHandler()
3746
3747 # When passing our own FileHandler instance, build_opener won't add the
3748 # default FileHandler and allows us to disable the file protocol, which
3749 # can be used for malicious purposes (see
3750 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3751 file_handler = compat_urllib_request.FileHandler()
3752
3753 def file_open(*args, **kwargs):
3754 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3755 file_handler.file_open = file_open
3756
3757 opener = compat_urllib_request.build_opener(
3758 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3759
3760 # Delete the default user-agent header, which would otherwise apply in
3761 # cases where our custom HTTP handler doesn't come into play
3762 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3763 opener.addheaders = []
3764 self._opener = opener
3765
3766 def encode(self, s):
3767 if isinstance(s, bytes):
3768 return s # Already encoded
3769
3770 try:
3771 return s.encode(self.get_encoding())
3772 except UnicodeEncodeError as err:
3773 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3774 raise
3775
3776 def get_encoding(self):
3777 encoding = self.params.get('encoding')
3778 if encoding is None:
3779 encoding = preferredencoding()
3780 return encoding
3781
3782 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3783 ''' Write infojson and returns True = written, False = skip, None = error '''
3784 if overwrite is None:
3785 overwrite = self.params.get('overwrites', True)
3786 if not self.params.get('writeinfojson'):
3787 return False
3788 elif not infofn:
3789 self.write_debug(f'Skipping writing {label} infojson')
3790 return False
3791 elif not self._ensure_dir_exists(infofn):
3792 return None
3793 elif not overwrite and os.path.exists(infofn):
3794 self.to_screen(f'[info] {label.title()} metadata is already present')
3795 else:
3796 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3797 try:
3798 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3799 except (OSError, IOError):
3800 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3801 return None
3802 return True
3803
3804 def _write_description(self, label, ie_result, descfn):
3805 ''' Write description and returns True = written, False = skip, None = error '''
3806 if not self.params.get('writedescription'):
3807 return False
3808 elif not descfn:
3809 self.write_debug(f'Skipping writing {label} description')
3810 return False
3811 elif not self._ensure_dir_exists(descfn):
3812 return None
3813 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3814 self.to_screen(f'[info] {label.title()} description is already present')
3815 elif ie_result.get('description') is None:
3816 self.report_warning(f'There\'s no {label} description to write')
3817 return False
3818 else:
3819 try:
3820 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3821 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3822 descfile.write(ie_result['description'])
3823 except (OSError, IOError):
3824 self.report_error(f'Cannot write {label} description file {descfn}')
3825 return None
3826 return True
3827
3828 def _write_subtitles(self, info_dict, filename):
3829 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3830 ret = []
3831 subtitles = info_dict.get('requested_subtitles')
3832 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3833 # subtitles download errors are already managed as troubles in relevant IE
3834 # that way it will silently go on when used with unsupporting IE
3835 return ret
3836
3837 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3838 if not sub_filename_base:
3839 self.to_screen('[info] Skipping writing video subtitles')
3840 return ret
3841 for sub_lang, sub_info in subtitles.items():
3842 sub_format = sub_info['ext']
3843 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3844 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3845 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3846 if existing_sub:
3847 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3848 sub_info['filepath'] = existing_sub
3849 ret.append((existing_sub, sub_filename_final))
3850 continue
3851
3852 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3853 if sub_info.get('data') is not None:
3854 try:
3855 # Use newline='' to prevent conversion of newline characters
3856 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3857 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3858 subfile.write(sub_info['data'])
3859 sub_info['filepath'] = sub_filename
3860 ret.append((sub_filename, sub_filename_final))
3861 continue
3862 except (OSError, IOError):
3863 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3864 return None
3865
3866 try:
3867 sub_copy = sub_info.copy()
3868 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3869 self.dl(sub_filename, sub_copy, subtitle=True)
3870 sub_info['filepath'] = sub_filename
3871 ret.append((sub_filename, sub_filename_final))
3872 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3873 msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
3874 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3875 if not self.params.get('ignoreerrors'):
3876 self.report_error(msg)
3877 raise DownloadError(msg)
3878 self.report_warning(msg)
3879 return ret
3880
3881 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3882 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3883 write_all = self.params.get('write_all_thumbnails', False)
3884 thumbnails, ret = [], []
3885 if write_all or self.params.get('writethumbnail', False):
3886 thumbnails = info_dict.get('thumbnails') or []
3887 multiple = write_all and len(thumbnails) > 1
3888
3889 if thumb_filename_base is None:
3890 thumb_filename_base = filename
3891 if thumbnails and not thumb_filename_base:
3892 self.write_debug(f'Skipping writing {label} thumbnail')
3893 return ret
3894
3895 for idx, t in list(enumerate(thumbnails))[::-1]:
3896 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3897 thumb_display_id = f'{label} thumbnail {t["id"]}'
3898 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3899 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3900
3901 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3902 if existing_thumb:
3903 self.to_screen('[info] %s is already present' % (
3904 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3905 t['filepath'] = existing_thumb
3906 ret.append((existing_thumb, thumb_filename_final))
3907 else:
3908 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3909 try:
3910 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3911 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3912 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3913 shutil.copyfileobj(uf, thumbf)
3914 ret.append((thumb_filename, thumb_filename_final))
3915 t['filepath'] = thumb_filename
3916 except network_exceptions as err:
3917 thumbnails.pop(idx)
3918 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3919 if ret and not write_all:
3920 break
3921 return ret