]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
Tolerate failure to `--write-link` due to unknown URL
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import functools
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28 import unicodedata
29
30 from enum import Enum
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DownloadCancelled,
59 DownloadError,
60 encode_compat_str,
61 encodeFilename,
62 EntryNotInPlaylist,
63 error_to_compat_str,
64 ExistingVideoReached,
65 expand_path,
66 ExtractorError,
67 float_or_none,
68 format_bytes,
69 format_field,
70 format_decimal_suffix,
71 formatSeconds,
72 GeoRestrictedError,
73 get_domain,
74 HEADRequest,
75 InAdvancePagedList,
76 int_or_none,
77 iri_to_uri,
78 ISO3166Utils,
79 join_nonempty,
80 LazyList,
81 LINK_TEMPLATES,
82 locked_file,
83 make_dir,
84 make_HTTPS_handler,
85 MaxDownloadsReached,
86 network_exceptions,
87 number_of_digits,
88 orderedSet,
89 OUTTMPL_TYPES,
90 PagedList,
91 parse_filesize,
92 PerRequestProxyHandler,
93 platform_name,
94 Popen,
95 POSTPROCESS_WHEN,
96 PostProcessingError,
97 preferredencoding,
98 prepend_extension,
99 ReExtractInfo,
100 register_socks_protocols,
101 RejectedVideoReached,
102 remove_terminal_sequences,
103 render_table,
104 replace_extension,
105 SameFileError,
106 sanitize_filename,
107 sanitize_path,
108 sanitize_url,
109 sanitized_Request,
110 std_headers,
111 STR_FORMAT_RE_TMPL,
112 STR_FORMAT_TYPES,
113 str_or_none,
114 strftime_or_none,
115 subtitles_filename,
116 supports_terminal_sequences,
117 timetuple_from_msec,
118 to_high_limit_path,
119 traverse_obj,
120 try_get,
121 UnavailableVideoError,
122 url_basename,
123 variadic,
124 version_tuple,
125 write_json_file,
126 write_string,
127 YoutubeDLCookieProcessor,
128 YoutubeDLHandler,
129 YoutubeDLRedirectHandler,
130 )
131 from .cache import Cache
132 from .minicurses import format_text
133 from .extractor import (
134 gen_extractor_classes,
135 get_info_extractor,
136 _LAZY_LOADER,
137 _PLUGIN_CLASSES as plugin_extractors
138 )
139 from .extractor.openload import PhantomJSwrapper
140 from .downloader import (
141 FFmpegFD,
142 get_suitable_downloader,
143 shorten_protocol_name
144 )
145 from .downloader.rtmp import rtmpdump_version
146 from .postprocessor import (
147 get_postprocessor,
148 EmbedThumbnailPP,
149 FFmpegFixupDuplicateMoovPP,
150 FFmpegFixupDurationPP,
151 FFmpegFixupM3u8PP,
152 FFmpegFixupM4aPP,
153 FFmpegFixupStretchedPP,
154 FFmpegFixupTimestampPP,
155 FFmpegMergerPP,
156 FFmpegPostProcessor,
157 MoveFilesAfterDownloadPP,
158 _PLUGIN_CLASSES as plugin_postprocessors
159 )
160 from .update import detect_variant
161 from .version import __version__, RELEASE_GIT_HEAD
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL(object):
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 verbose: Print additional info to stdout.
202 quiet: Do not print messages to stdout.
203 no_warnings: Do not print out anything for warnings.
204 forceprint: A dict with keys WHEN mapped to a list of templates to
205 print to stdout. The allowed keys are video or any of the
206 items in utils.POSTPROCESS_WHEN.
207 For compatibility, a single list is also accepted
208 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
209 a list of tuples with (template, filename)
210 forceurl: Force printing final URL. (Deprecated)
211 forcetitle: Force printing title. (Deprecated)
212 forceid: Force printing ID. (Deprecated)
213 forcethumbnail: Force printing thumbnail URL. (Deprecated)
214 forcedescription: Force printing description. (Deprecated)
215 forcefilename: Force printing final filename. (Deprecated)
216 forceduration: Force printing duration. (Deprecated)
217 forcejson: Force printing info_dict as JSON.
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
224 format: Video format code. see "FORMAT SELECTION" for more details.
225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
234 format_sort_force: Force the given format_sort. see "Sorting Formats"
235 for more details.
236 allow_multiple_video_streams: Allow multiple video streams to be merged
237 into a single file
238 allow_multiple_audio_streams: Allow multiple audio streams to be merged
239 into a single file
240 check_formats Whether to test if the formats are downloadable.
241 Can be True (check all), False (check none),
242 'selected' (check selected formats),
243 or None (check only if requested by extractor)
244 paths: Dictionary of output paths. The allowed keys are 'home'
245 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
246 outtmpl: Dictionary of templates for output names. Allowed keys
247 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
248 For compatibility with youtube-dl, a single string can also be used
249 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
250 restrictfilenames: Do not allow "&" and spaces in file names
251 trim_file_name: Limit length of filename (extension excluded)
252 windowsfilenames: Force the filenames to be windows compatible
253 ignoreerrors: Do not stop on download/postprocessing errors.
254 Can be 'only_download' to ignore only download errors.
255 Default is 'only_download' for CLI, but False for API
256 skip_playlist_after_errors: Number of allowed failures until the rest of
257 the playlist is skipped
258 force_generic_extractor: Force downloader to use the generic extractor
259 overwrites: Overwrite all video and metadata files if True,
260 overwrite only non-video files if None
261 and don't overwrite any file if False
262 For compatibility with youtube-dl,
263 "nooverwrites" may also be used instead
264 playliststart: Playlist item to start at.
265 playlistend: Playlist item to end at.
266 playlist_items: Specific indices of playlist to download.
267 playlistreverse: Download playlist items in reverse order.
268 playlistrandom: Download playlist items in random order.
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
271 logger: Log messages to a logging.Logger instance.
272 logtostderr: Log messages to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
276 clean_infojson: Remove private fields from the infojson
277 getcomments: Extract video comments. This will not be written to disk
278 unless writeinfojson is also given
279 writeannotations: Write the video annotations to a .annotations.xml file
280 writethumbnail: Write the thumbnail image to a file
281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
283 write_all_thumbnails: Write all thumbnail formats to files
284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
289 writesubtitles: Write the video subtitles to a file
290 writeautomaticsub: Write the automatically generated subtitles to a file
291 allsubtitles: Deprecated - Use subtitleslangs = ['all']
292 Downloads all the subtitles of the video
293 (requires writesubtitles or writeautomaticsub)
294 listsubtitles: Lists all available subtitles for the video
295 subtitlesformat: The format code for subtitles
296 subtitleslangs: List of languages of the subtitles to download (can be regex).
297 The list may contain "all" to refer to all the available
298 subtitles. The language can be prefixed with a "-" to
299 exclude it from the requested languages. Eg: ['all', '-live_chat']
300 keepvideo: Keep the video file after post-processing
301 daterange: A DateRange object, download only if the upload_date is in the range.
302 skip_download: Skip the actual download of the video file
303 cachedir: Location of the cache files in the filesystem.
304 False to disable filesystem cache.
305 noplaylist: Download single video instead of a playlist if in doubt.
306 age_limit: An integer representing the user's age in years.
307 Unsuitable videos for the given age are skipped.
308 min_views: An integer representing the minimum view count the video
309 must have in order to not be skipped.
310 Videos without view count information are always
311 downloaded. None for no limit.
312 max_views: An integer representing the maximum view count.
313 Videos that are more popular than that are not
314 downloaded.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 download_archive: File name of a file where all downloads are recorded.
318 Videos already present in the file are not downloaded
319 again.
320 break_on_existing: Stop the download process after attempting to download a
321 file that is in the archive.
322 break_on_reject: Stop the download process when encountering a video that
323 has been filtered out.
324 break_per_url: Whether break_on_reject and break_on_existing
325 should act on each input URL as opposed to for the entire queue
326 cookiefile: File name where cookies should be read from and dumped to
327 cookiesfrombrowser: A tuple containing the name of the browser, the profile
328 name/pathfrom where cookies are loaded, and the name of the
329 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
330 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
331 support RFC 5746 secure renegotiation
332 nocheckcertificate: Do not verify SSL certificates
333 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
334 At the moment, this is only supported by YouTube.
335 proxy: URL of the proxy server to use
336 geo_verification_proxy: URL of the proxy to use for IP address verification
337 on geo-restricted sites.
338 socket_timeout: Time to wait for unresponsive hosts, in seconds
339 bidi_workaround: Work around buggy terminals without bidirectional text
340 support, using fridibi
341 debug_printtraffic:Print out sent and received HTTP traffic
342 include_ads: Download ads as well (deprecated)
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
345 encoding: Use this encoding instead of the system-specified.
346 extract_flat: Do not resolve URLs, return the immediate result.
347 Pass in 'in_playlist' to only show this behavior for
348 playlist items.
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 post_hooks: Deprecated - Register a custom postprocessor instead
359 A list of functions that get called as the final step
360 for each video file, after all postprocessors have been
361 called. The filename will be passed as the only argument.
362 progress_hooks: A list of functions that get called on download
363 progress, with a dictionary with the entries
364 * status: One of "downloading", "error", or "finished".
365 Check this first and ignore unknown values.
366 * info_dict: The extracted info_dict
367
368 If status is one of "downloading", or "finished", the
369 following properties may also be present:
370 * filename: The final filename (always present)
371 * tmpfilename: The filename we're currently writing to
372 * downloaded_bytes: Bytes on disk
373 * total_bytes: Size of the whole file, None if unknown
374 * total_bytes_estimate: Guess of the eventual file size,
375 None if unavailable.
376 * elapsed: The number of seconds since download started.
377 * eta: The estimated time in seconds, None if unknown
378 * speed: The download speed in bytes/second, None if
379 unknown
380 * fragment_index: The counter of the currently
381 downloaded video fragment.
382 * fragment_count: The number of fragments (= individual
383 files that will be merged)
384
385 Progress hooks are guaranteed to be called at least once
386 (with status "finished") if the download is successful.
387 postprocessor_hooks: A list of functions that get called on postprocessing
388 progress, with a dictionary with the entries
389 * status: One of "started", "processing", or "finished".
390 Check this first and ignore unknown values.
391 * postprocessor: Name of the postprocessor
392 * info_dict: The extracted info_dict
393
394 Progress hooks are guaranteed to be called at least twice
395 (with status "started" and "finished") if the processing is successful.
396 merge_output_format: Extension to use when merging formats.
397 final_ext: Expected final extension; used to detect when the file was
398 already downloaded and converted
399 fixup: Automatically correct known faults of the file.
400 One of:
401 - "never": do nothing
402 - "warn": only emit a warning
403 - "detect_or_warn": check whether we can do anything
404 about it, warn otherwise (default)
405 source_address: Client-side IP address to bind to.
406 call_home: Boolean, true iff we are allowed to contact the
407 yt-dlp servers for debugging. (BROKEN)
408 sleep_interval_requests: Number of seconds to sleep between requests
409 during extraction
410 sleep_interval: Number of seconds to sleep before each download when
411 used alone or a lower bound of a range for randomized
412 sleep before each download (minimum possible number
413 of seconds to sleep) when used along with
414 max_sleep_interval.
415 max_sleep_interval:Upper bound of a range for randomized sleep before each
416 download (maximum possible number of seconds to sleep).
417 Must only be used along with sleep_interval.
418 Actual sleep time will be a random float from range
419 [sleep_interval; max_sleep_interval].
420 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
421 listformats: Print an overview of available video formats and exit.
422 list_thumbnails: Print a table of all thumbnails and exit.
423 match_filter: A function that gets called with the info_dict of
424 every video.
425 If it returns a message, the video is ignored.
426 If it returns None, the video is downloaded.
427 match_filter_func in utils.py is one example for this.
428 no_color: Do not emit color codes in output.
429 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
430 HTTP header
431 geo_bypass_country:
432 Two-letter ISO 3166-2 country code that will be used for
433 explicit geographic restriction bypassing via faking
434 X-Forwarded-For HTTP header
435 geo_bypass_ip_block:
436 IP range in CIDR notation that will be used similarly to
437 geo_bypass_country
438
439 The following options determine which downloader is picked:
440 external_downloader: A dictionary of protocol keys and the executable of the
441 external downloader to use for it. The allowed protocols
442 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
443 Set the value to 'native' to use the native downloader
444 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
445 or {'m3u8': 'ffmpeg'} instead.
446 Use the native HLS downloader instead of ffmpeg/avconv
447 if True, otherwise use ffmpeg/avconv if False, otherwise
448 use downloader suggested by extractor if None.
449 compat_opts: Compatibility options. See "Differences in default behavior".
450 The following options do not work when used through the API:
451 filename, abort-on-error, multistreams, no-live-chat, format-sort
452 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
453 Refer __init__.py for their implementation
454 progress_template: Dictionary of templates for progress outputs.
455 Allowed keys are 'download', 'postprocess',
456 'download-title' (console title) and 'postprocess-title'.
457 The template is mapped on a dictionary with keys 'progress' and 'info'
458
459 The following parameters are not used by YoutubeDL itself, they are used by
460 the downloader (see yt_dlp/downloader/common.py):
461 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
462 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
463 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
464 external_downloader_args, concurrent_fragment_downloads.
465
466 The following options are used by the post processors:
467 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
468 otherwise prefer ffmpeg. (avconv support is deprecated)
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
477 can also be used
478
479 The following options are used by the extractors:
480 extractor_retries: Number of times to retry for known errors
481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
482 hls_split_discontinuity: Split HLS playlists to different formats at
483 discontinuities such as ad breaks (default: False)
484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
486 Eg: {'youtube': {'skip': ['dash', 'hls']}}
487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
488 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
489 If True (default), DASH manifests and related
490 data will be downloaded and processed by extractor.
491 You can reduce network I/O by disabling it if you don't
492 care about DASH. (only for youtube)
493 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
494 If True (default), HLS manifests and related
495 data will be downloaded and processed by extractor.
496 You can reduce network I/O by disabling it if you don't
497 care about HLS. (only for youtube)
498 """
499
500 _NUMERIC_FIELDS = set((
501 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
502 'timestamp', 'release_timestamp',
503 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
504 'average_rating', 'comment_count', 'age_limit',
505 'start_time', 'end_time',
506 'chapter_number', 'season_number', 'episode_number',
507 'track_number', 'disc_number', 'release_year',
508 ))
509
510 _format_selection_exts = {
511 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
512 'video': {'mp4', 'flv', 'webm', '3gp'},
513 'storyboards': {'mhtml'},
514 }
515
516 params = None
517 _ies = {}
518 _pps = {k: [] for k in POSTPROCESS_WHEN}
519 _printed_messages = set()
520 _first_webpage_request = True
521 _download_retcode = None
522 _num_downloads = None
523 _playlist_level = 0
524 _playlist_urls = set()
525 _screen_file = None
526
527 def __init__(self, params=None, auto_init=True):
528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
530 Set to 'no_verbose_header' to not print the header
531 """
532 if params is None:
533 params = {}
534 self._ies = {}
535 self._ies_instances = {}
536 self._pps = {k: [] for k in POSTPROCESS_WHEN}
537 self._printed_messages = set()
538 self._first_webpage_request = True
539 self._post_hooks = []
540 self._progress_hooks = []
541 self._postprocessor_hooks = []
542 self._download_retcode = 0
543 self._num_downloads = 0
544 self._num_videos = 0
545 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
546 self._err_file = sys.stderr
547 self.params = params
548 self.cache = Cache(self)
549
550 windows_enable_vt_mode()
551 self._allow_colors = {
552 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
553 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
554 }
555
556 if sys.version_info < (3, 6):
557 self.report_warning(
558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
559
560 if self.params.get('allow_unplayable_formats'):
561 self.report_warning(
562 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
565 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
566
567 def check_deprecated(param, option, suggestion):
568 if self.params.get(param) is not None:
569 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
570 return True
571 return False
572
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
574 if self.params.get('geo_verification_proxy') is None:
575 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
576
577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
580
581 for msg in self.params.get('_warnings', []):
582 self.report_warning(msg)
583 for msg in self.params.get('_deprecation_warnings', []):
584 self.deprecation_warning(msg)
585
586 if 'list-formats' in self.params.get('compat_opts', []):
587 self.params['listformats_table'] = False
588
589 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self.params['overwrites'] = not self.params['nooverwrites']
594 elif self.params.get('overwrites') is None:
595 self.params.pop('overwrites', None)
596 else:
597 self.params['nooverwrites'] = not self.params['overwrites']
598
599 self.params.setdefault('forceprint', {})
600 self.params.setdefault('print_to_file', {})
601
602 # Compatibility with older syntax
603 if not isinstance(params['forceprint'], dict):
604 self.params['forceprint'] = {'video': params['forceprint']}
605
606 if self.params.get('bidi_workaround', False):
607 try:
608 import pty
609 master, slave = pty.openpty()
610 width = compat_get_terminal_size().columns
611 if width is None:
612 width_args = []
613 else:
614 width_args = ['-w', str(width)]
615 sp_kwargs = dict(
616 stdin=subprocess.PIPE,
617 stdout=slave,
618 stderr=self._err_file)
619 try:
620 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
621 except OSError:
622 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
623 self._output_channel = os.fdopen(master, 'rb')
624 except OSError as ose:
625 if ose.errno == errno.ENOENT:
626 self.report_warning(
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
629 else:
630 raise
631
632 if (sys.platform != 'win32'
633 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
634 and not self.params.get('restrictfilenames', False)):
635 # Unicode filesystem API will throw errors (#1474, #13027)
636 self.report_warning(
637 'Assuming --restrict-filenames since file system encoding '
638 'cannot encode all characters. '
639 'Set the LC_ALL environment variable to fix this.')
640 self.params['restrictfilenames'] = True
641
642 self.outtmpl_dict = self.parse_outtmpl()
643
644 # Creating format selector here allows us to catch syntax errors before the extraction
645 self.format_selector = (
646 self.params.get('format') if self.params.get('format') in (None, '-')
647 else self.params['format'] if callable(self.params['format'])
648 else self.build_format_selector(self.params['format']))
649
650 self._setup_opener()
651
652 if auto_init:
653 if auto_init != 'no_verbose_header':
654 self.print_debug_header()
655 self.add_default_info_extractors()
656
657 hooks = {
658 'post_hooks': self.add_post_hook,
659 'progress_hooks': self.add_progress_hook,
660 'postprocessor_hooks': self.add_postprocessor_hook,
661 }
662 for opt, fn in hooks.items():
663 for ph in self.params.get(opt, []):
664 fn(ph)
665
666 for pp_def_raw in self.params.get('postprocessors', []):
667 pp_def = dict(pp_def_raw)
668 when = pp_def.pop('when', 'post_process')
669 self.add_post_processor(
670 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
671 when=when)
672
673 register_socks_protocols()
674
675 def preload_download_archive(fn):
676 """Preload the archive, if any is specified"""
677 if fn is None:
678 return False
679 self.write_debug(f'Loading archive file {fn!r}')
680 try:
681 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
682 for line in archive_file:
683 self.archive.add(line.strip())
684 except IOError as ioe:
685 if ioe.errno != errno.ENOENT:
686 raise
687 return False
688 return True
689
690 self.archive = set()
691 preload_download_archive(self.params.get('download_archive'))
692
693 def warn_if_short_id(self, argv):
694 # short YouTube ID starting with dash?
695 idxs = [
696 i for i, a in enumerate(argv)
697 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
698 if idxs:
699 correct_argv = (
700 ['yt-dlp']
701 + [a for i, a in enumerate(argv) if i not in idxs]
702 + ['--'] + [argv[i] for i in idxs]
703 )
704 self.report_warning(
705 'Long argument string detected. '
706 'Use -- to separate parameters and URLs, like this:\n%s' %
707 args_to_str(correct_argv))
708
709 def add_info_extractor(self, ie):
710 """Add an InfoExtractor object to the end of the list."""
711 ie_key = ie.ie_key()
712 self._ies[ie_key] = ie
713 if not isinstance(ie, type):
714 self._ies_instances[ie_key] = ie
715 ie.set_downloader(self)
716
717 def _get_info_extractor_class(self, ie_key):
718 ie = self._ies.get(ie_key)
719 if ie is None:
720 ie = get_info_extractor(ie_key)
721 self.add_info_extractor(ie)
722 return ie
723
724 def get_info_extractor(self, ie_key):
725 """
726 Get an instance of an IE with name ie_key, it will try to get one from
727 the _ies list, if there's no instance it will create a new one and add
728 it to the extractor list.
729 """
730 ie = self._ies_instances.get(ie_key)
731 if ie is None:
732 ie = get_info_extractor(ie_key)()
733 self.add_info_extractor(ie)
734 return ie
735
736 def add_default_info_extractors(self):
737 """
738 Add the InfoExtractors returned by gen_extractors to the end of the list
739 """
740 for ie in gen_extractor_classes():
741 self.add_info_extractor(ie)
742
743 def add_post_processor(self, pp, when='post_process'):
744 """Add a PostProcessor object to the end of the chain."""
745 self._pps[when].append(pp)
746 pp.set_downloader(self)
747
748 def add_post_hook(self, ph):
749 """Add the post hook"""
750 self._post_hooks.append(ph)
751
752 def add_progress_hook(self, ph):
753 """Add the download progress hook"""
754 self._progress_hooks.append(ph)
755
756 def add_postprocessor_hook(self, ph):
757 """Add the postprocessing progress hook"""
758 self._postprocessor_hooks.append(ph)
759 for pps in self._pps.values():
760 for pp in pps:
761 pp.add_progress_hook(ph)
762
763 def _bidi_workaround(self, message):
764 if not hasattr(self, '_output_channel'):
765 return message
766
767 assert hasattr(self, '_output_process')
768 assert isinstance(message, compat_str)
769 line_count = message.count('\n') + 1
770 self._output_process.stdin.write((message + '\n').encode('utf-8'))
771 self._output_process.stdin.flush()
772 res = ''.join(self._output_channel.readline().decode('utf-8')
773 for _ in range(line_count))
774 return res[:-len('\n')]
775
776 def _write_string(self, message, out=None, only_once=False):
777 if only_once:
778 if message in self._printed_messages:
779 return
780 self._printed_messages.add(message)
781 write_string(message, out=out, encoding=self.params.get('encoding'))
782
783 def to_stdout(self, message, skip_eol=False, quiet=False):
784 """Print message to stdout"""
785 if self.params.get('logger'):
786 self.params['logger'].debug(message)
787 elif not quiet or self.params.get('verbose'):
788 self._write_string(
789 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
790 self._err_file if quiet else self._screen_file)
791
792 def to_stderr(self, message, only_once=False):
793 """Print message to stderr"""
794 assert isinstance(message, compat_str)
795 if self.params.get('logger'):
796 self.params['logger'].error(message)
797 else:
798 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
799
800 def to_console_title(self, message):
801 if not self.params.get('consoletitle', False):
802 return
803 message = remove_terminal_sequences(message)
804 if compat_os_name == 'nt':
805 if ctypes.windll.kernel32.GetConsoleWindow():
806 # c_wchar_p() might not be necessary if `message` is
807 # already of type unicode()
808 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
809 elif 'TERM' in os.environ:
810 self._write_string('\033]0;%s\007' % message, self._screen_file)
811
812 def save_console_title(self):
813 if not self.params.get('consoletitle', False):
814 return
815 if self.params.get('simulate'):
816 return
817 if compat_os_name != 'nt' and 'TERM' in os.environ:
818 # Save the title on stack
819 self._write_string('\033[22;0t', self._screen_file)
820
821 def restore_console_title(self):
822 if not self.params.get('consoletitle', False):
823 return
824 if self.params.get('simulate'):
825 return
826 if compat_os_name != 'nt' and 'TERM' in os.environ:
827 # Restore the title from stack
828 self._write_string('\033[23;0t', self._screen_file)
829
830 def __enter__(self):
831 self.save_console_title()
832 return self
833
834 def __exit__(self, *args):
835 self.restore_console_title()
836
837 if self.params.get('cookiefile') is not None:
838 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
839
840 def trouble(self, message=None, tb=None, is_error=True):
841 """Determine action to take when a download problem appears.
842
843 Depending on if the downloader has been configured to ignore
844 download errors or not, this method may throw an exception or
845 not when errors are found, after printing the message.
846
847 @param tb If given, is additional traceback information
848 @param is_error Whether to raise error according to ignorerrors
849 """
850 if message is not None:
851 self.to_stderr(message)
852 if self.params.get('verbose'):
853 if tb is None:
854 if sys.exc_info()[0]: # if .trouble has been called from an except block
855 tb = ''
856 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
857 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
858 tb += encode_compat_str(traceback.format_exc())
859 else:
860 tb_data = traceback.format_list(traceback.extract_stack())
861 tb = ''.join(tb_data)
862 if tb:
863 self.to_stderr(tb)
864 if not is_error:
865 return
866 if not self.params.get('ignoreerrors'):
867 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
868 exc_info = sys.exc_info()[1].exc_info
869 else:
870 exc_info = sys.exc_info()
871 raise DownloadError(message, exc_info)
872 self._download_retcode = 1
873
874 def to_screen(self, message, skip_eol=False):
875 """Print message to stdout if not in quiet mode"""
876 self.to_stdout(
877 message, skip_eol, quiet=self.params.get('quiet', False))
878
879 class Styles(Enum):
880 HEADERS = 'yellow'
881 EMPHASIS = 'light blue'
882 ID = 'green'
883 DELIM = 'blue'
884 ERROR = 'red'
885 WARNING = 'yellow'
886 SUPPRESS = 'light black'
887
888 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
889 if test_encoding:
890 original_text = text
891 # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
892 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
893 text = text.encode(encoding, 'ignore').decode(encoding)
894 if fallback is not None and text != original_text:
895 text = fallback
896 if isinstance(f, self.Styles):
897 f = f.value
898 return format_text(text, f) if allow_colors else text if fallback is None else fallback
899
900 def _format_screen(self, *args, **kwargs):
901 return self._format_text(
902 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
903
904 def _format_err(self, *args, **kwargs):
905 return self._format_text(
906 self._err_file, self._allow_colors['err'], *args, **kwargs)
907
908 def report_warning(self, message, only_once=False):
909 '''
910 Print the message to stderr, it will be prefixed with 'WARNING:'
911 If stderr is a tty file the 'WARNING:' will be colored
912 '''
913 if self.params.get('logger') is not None:
914 self.params['logger'].warning(message)
915 else:
916 if self.params.get('no_warnings'):
917 return
918 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
919
920 def deprecation_warning(self, message):
921 if self.params.get('logger') is not None:
922 self.params['logger'].warning('DeprecationWarning: {message}')
923 else:
924 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
925
926 def report_error(self, message, *args, **kwargs):
927 '''
928 Do the same as trouble, but prefixes the message with 'ERROR:', colored
929 in red if stderr is a tty file.
930 '''
931 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
932
933 def write_debug(self, message, only_once=False):
934 '''Log debug message or Print message to stderr'''
935 if not self.params.get('verbose', False):
936 return
937 message = '[debug] %s' % message
938 if self.params.get('logger'):
939 self.params['logger'].debug(message)
940 else:
941 self.to_stderr(message, only_once)
942
943 def report_file_already_downloaded(self, file_name):
944 """Report file has already been fully downloaded."""
945 try:
946 self.to_screen('[download] %s has already been downloaded' % file_name)
947 except UnicodeEncodeError:
948 self.to_screen('[download] The file has already been downloaded')
949
950 def report_file_delete(self, file_name):
951 """Report that existing file will be deleted."""
952 try:
953 self.to_screen('Deleting existing file %s' % file_name)
954 except UnicodeEncodeError:
955 self.to_screen('Deleting existing file')
956
957 def raise_no_formats(self, info, forced=False):
958 has_drm = info.get('__has_drm')
959 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
960 expected = self.params.get('ignore_no_formats_error')
961 if forced or not expected:
962 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
963 expected=has_drm or expected)
964 else:
965 self.report_warning(msg)
966
967 def parse_outtmpl(self):
968 outtmpl_dict = self.params.get('outtmpl', {})
969 if not isinstance(outtmpl_dict, dict):
970 outtmpl_dict = {'default': outtmpl_dict}
971 # Remove spaces in the default template
972 if self.params.get('restrictfilenames'):
973 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
974 else:
975 sanitize = lambda x: x
976 outtmpl_dict.update({
977 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
978 if outtmpl_dict.get(k) is None})
979 for key, val in outtmpl_dict.items():
980 if isinstance(val, bytes):
981 self.report_warning(
982 'Parameter outtmpl is bytes, but should be a unicode string. '
983 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
984 return outtmpl_dict
985
986 def get_output_path(self, dir_type='', filename=None):
987 paths = self.params.get('paths', {})
988 assert isinstance(paths, dict)
989 path = os.path.join(
990 expand_path(paths.get('home', '').strip()),
991 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
992 filename or '')
993
994 # Temporary fix for #4787
995 # 'Treat' all problem characters by passing filename through preferredencoding
996 # to workaround encoding issues with subprocess on python2 @ Windows
997 if sys.version_info < (3, 0) and sys.platform == 'win32':
998 path = encodeFilename(path, True).decode(preferredencoding())
999 return sanitize_path(path, force=self.params.get('windowsfilenames'))
1000
1001 @staticmethod
1002 def _outtmpl_expandpath(outtmpl):
1003 # expand_path translates '%%' into '%' and '$$' into '$'
1004 # correspondingly that is not what we want since we need to keep
1005 # '%%' intact for template dict substitution step. Working around
1006 # with boundary-alike separator hack.
1007 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1008 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1009
1010 # outtmpl should be expand_path'ed before template dict substitution
1011 # because meta fields may contain env variables we don't want to
1012 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1013 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1014 return expand_path(outtmpl).replace(sep, '')
1015
1016 @staticmethod
1017 def escape_outtmpl(outtmpl):
1018 ''' Escape any remaining strings like %s, %abc% etc. '''
1019 return re.sub(
1020 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1021 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1022 outtmpl)
1023
1024 @classmethod
1025 def validate_outtmpl(cls, outtmpl):
1026 ''' @return None or Exception object '''
1027 outtmpl = re.sub(
1028 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1029 lambda mobj: f'{mobj.group(0)[:-1]}s',
1030 cls._outtmpl_expandpath(outtmpl))
1031 try:
1032 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1033 return None
1034 except ValueError as err:
1035 return err
1036
1037 @staticmethod
1038 def _copy_infodict(info_dict):
1039 info_dict = dict(info_dict)
1040 for key in ('__original_infodict', '__postprocessors'):
1041 info_dict.pop(key, None)
1042 return info_dict
1043
1044 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1045 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1046 @param sanitize Whether to sanitize the output as a filename.
1047 For backward compatibility, a function can also be passed
1048 """
1049
1050 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1051
1052 info_dict = self._copy_infodict(info_dict)
1053 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1054 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1055 if info_dict.get('duration', None) is not None
1056 else None)
1057 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1058 info_dict['video_autonumber'] = self._num_videos
1059 if info_dict.get('resolution') is None:
1060 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1061
1062 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1063 # of %(field)s to %(field)0Nd for backward compatibility
1064 field_size_compat_map = {
1065 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1066 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1067 'autonumber': self.params.get('autonumber_size') or 5,
1068 }
1069
1070 TMPL_DICT = {}
1071 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1072 MATH_FUNCTIONS = {
1073 '+': float.__add__,
1074 '-': float.__sub__,
1075 }
1076 # Field is of the form key1.key2...
1077 # where keys (except first) can be string, int or slice
1078 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1079 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1080 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1081 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1082 (?P<negate>-)?
1083 (?P<fields>{field})
1084 (?P<maths>(?:{math_op}{math_field})*)
1085 (?:>(?P<strf_format>.+?))?
1086 (?P<alternate>(?<!\\),[^|&)]+)?
1087 (?:&(?P<replacement>.*?))?
1088 (?:\|(?P<default>.*?))?
1089 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1090
1091 def _traverse_infodict(k):
1092 k = k.split('.')
1093 if k[0] == '':
1094 k.pop(0)
1095 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1096
1097 def get_value(mdict):
1098 # Object traversal
1099 value = _traverse_infodict(mdict['fields'])
1100 # Negative
1101 if mdict['negate']:
1102 value = float_or_none(value)
1103 if value is not None:
1104 value *= -1
1105 # Do maths
1106 offset_key = mdict['maths']
1107 if offset_key:
1108 value = float_or_none(value)
1109 operator = None
1110 while offset_key:
1111 item = re.match(
1112 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1113 offset_key).group(0)
1114 offset_key = offset_key[len(item):]
1115 if operator is None:
1116 operator = MATH_FUNCTIONS[item]
1117 continue
1118 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1119 offset = float_or_none(item)
1120 if offset is None:
1121 offset = float_or_none(_traverse_infodict(item))
1122 try:
1123 value = operator(value, multiplier * offset)
1124 except (TypeError, ZeroDivisionError):
1125 return None
1126 operator = None
1127 # Datetime formatting
1128 if mdict['strf_format']:
1129 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1130
1131 return value
1132
1133 na = self.params.get('outtmpl_na_placeholder', 'NA')
1134
1135 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1136 return sanitize_filename(str(value), restricted=restricted,
1137 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1138
1139 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1140 sanitize = bool(sanitize)
1141
1142 def _dumpjson_default(obj):
1143 if isinstance(obj, (set, LazyList)):
1144 return list(obj)
1145 return repr(obj)
1146
1147 def create_key(outer_mobj):
1148 if not outer_mobj.group('has_key'):
1149 return outer_mobj.group(0)
1150 key = outer_mobj.group('key')
1151 mobj = re.match(INTERNAL_FORMAT_RE, key)
1152 initial_field = mobj.group('fields') if mobj else ''
1153 value, replacement, default = None, None, na
1154 while mobj:
1155 mobj = mobj.groupdict()
1156 default = mobj['default'] if mobj['default'] is not None else default
1157 value = get_value(mobj)
1158 replacement = mobj['replacement']
1159 if value is None and mobj['alternate']:
1160 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1161 else:
1162 break
1163
1164 fmt = outer_mobj.group('format')
1165 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1166 fmt = '0{:d}d'.format(field_size_compat_map[key])
1167
1168 value = default if value is None else value if replacement is None else replacement
1169
1170 flags = outer_mobj.group('conversion') or ''
1171 str_fmt = f'{fmt[:-1]}s'
1172 if fmt[-1] == 'l': # list
1173 delim = '\n' if '#' in flags else ', '
1174 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1175 elif fmt[-1] == 'j': # json
1176 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1177 elif fmt[-1] == 'q': # quoted
1178 value = map(str, variadic(value) if '#' in flags else [value])
1179 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1180 elif fmt[-1] == 'B': # bytes
1181 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1182 value, fmt = value.decode('utf-8', 'ignore'), 's'
1183 elif fmt[-1] == 'U': # unicode normalized
1184 value, fmt = unicodedata.normalize(
1185 # "+" = compatibility equivalence, "#" = NFD
1186 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1187 value), str_fmt
1188 elif fmt[-1] == 'D': # decimal suffix
1189 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1190 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1191 factor=1024 if '#' in flags else 1000)
1192 elif fmt[-1] == 'S': # filename sanitization
1193 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1194 elif fmt[-1] == 'c':
1195 if value:
1196 value = str(value)[0]
1197 else:
1198 fmt = str_fmt
1199 elif fmt[-1] not in 'rs': # numeric
1200 value = float_or_none(value)
1201 if value is None:
1202 value, fmt = default, 's'
1203
1204 if sanitize:
1205 if fmt[-1] == 'r':
1206 # If value is an object, sanitize might convert it to a string
1207 # So we convert it to repr first
1208 value, fmt = repr(value), str_fmt
1209 if fmt[-1] in 'csr':
1210 value = sanitizer(initial_field, value)
1211
1212 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1213 TMPL_DICT[key] = value
1214 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1215
1216 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1217
1218 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1219 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1220 return self.escape_outtmpl(outtmpl) % info_dict
1221
1222 def _prepare_filename(self, info_dict, tmpl_type='default'):
1223 try:
1224 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1225 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1226 if not filename:
1227 return None
1228
1229 if tmpl_type in ('default', 'temp'):
1230 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1231 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1232 filename = replace_extension(filename, ext, final_ext)
1233 else:
1234 force_ext = OUTTMPL_TYPES[tmpl_type]
1235 if force_ext:
1236 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1237
1238 # https://github.com/blackjack4494/youtube-dlc/issues/85
1239 trim_file_name = self.params.get('trim_file_name', False)
1240 if trim_file_name:
1241 no_ext, *ext = filename.rsplit('.', 2)
1242 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1243
1244 return filename
1245 except ValueError as err:
1246 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1247 return None
1248
1249 def prepare_filename(self, info_dict, dir_type='', warn=False):
1250 """Generate the output filename."""
1251
1252 filename = self._prepare_filename(info_dict, dir_type or 'default')
1253 if not filename and dir_type not in ('', 'temp'):
1254 return ''
1255
1256 if warn:
1257 if not self.params.get('paths'):
1258 pass
1259 elif filename == '-':
1260 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1261 elif os.path.isabs(filename):
1262 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1263 if filename == '-' or not filename:
1264 return filename
1265
1266 return self.get_output_path(dir_type, filename)
1267
1268 def _match_entry(self, info_dict, incomplete=False, silent=False):
1269 """ Returns None if the file should be downloaded """
1270
1271 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1272
1273 def check_filter():
1274 if 'title' in info_dict:
1275 # This can happen when we're just evaluating the playlist
1276 title = info_dict['title']
1277 matchtitle = self.params.get('matchtitle', False)
1278 if matchtitle:
1279 if not re.search(matchtitle, title, re.IGNORECASE):
1280 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1281 rejecttitle = self.params.get('rejecttitle', False)
1282 if rejecttitle:
1283 if re.search(rejecttitle, title, re.IGNORECASE):
1284 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1285 date = info_dict.get('upload_date')
1286 if date is not None:
1287 dateRange = self.params.get('daterange', DateRange())
1288 if date not in dateRange:
1289 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1290 view_count = info_dict.get('view_count')
1291 if view_count is not None:
1292 min_views = self.params.get('min_views')
1293 if min_views is not None and view_count < min_views:
1294 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1295 max_views = self.params.get('max_views')
1296 if max_views is not None and view_count > max_views:
1297 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1298 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1299 return 'Skipping "%s" because it is age restricted' % video_title
1300
1301 match_filter = self.params.get('match_filter')
1302 if match_filter is not None:
1303 try:
1304 ret = match_filter(info_dict, incomplete=incomplete)
1305 except TypeError:
1306 # For backward compatibility
1307 ret = None if incomplete else match_filter(info_dict)
1308 if ret is not None:
1309 return ret
1310 return None
1311
1312 if self.in_download_archive(info_dict):
1313 reason = '%s has already been recorded in the archive' % video_title
1314 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1315 else:
1316 reason = check_filter()
1317 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1318 if reason is not None:
1319 if not silent:
1320 self.to_screen('[download] ' + reason)
1321 if self.params.get(break_opt, False):
1322 raise break_err()
1323 return reason
1324
1325 @staticmethod
1326 def add_extra_info(info_dict, extra_info):
1327 '''Set the keys from extra_info in info dict if they are missing'''
1328 for key, value in extra_info.items():
1329 info_dict.setdefault(key, value)
1330
1331 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1332 process=True, force_generic_extractor=False):
1333 """
1334 Return a list with a dictionary for each video extracted.
1335
1336 Arguments:
1337 url -- URL to extract
1338
1339 Keyword arguments:
1340 download -- whether to download videos during extraction
1341 ie_key -- extractor key hint
1342 extra_info -- dictionary containing the extra values to add to each result
1343 process -- whether to resolve all unresolved references (URLs, playlist items),
1344 must be True for download to work.
1345 force_generic_extractor -- force using the generic extractor
1346 """
1347
1348 if extra_info is None:
1349 extra_info = {}
1350
1351 if not ie_key and force_generic_extractor:
1352 ie_key = 'Generic'
1353
1354 if ie_key:
1355 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1356 else:
1357 ies = self._ies
1358
1359 for ie_key, ie in ies.items():
1360 if not ie.suitable(url):
1361 continue
1362
1363 if not ie.working():
1364 self.report_warning('The program functionality for this site has been marked as broken, '
1365 'and will probably not work.')
1366
1367 temp_id = ie.get_temp_id(url)
1368 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1369 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1370 if self.params.get('break_on_existing', False):
1371 raise ExistingVideoReached()
1372 break
1373 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1374 else:
1375 self.report_error('no suitable InfoExtractor for URL %s' % url)
1376
1377 def __handle_extraction_exceptions(func):
1378 @functools.wraps(func)
1379 def wrapper(self, *args, **kwargs):
1380 while True:
1381 try:
1382 return func(self, *args, **kwargs)
1383 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1384 raise
1385 except ReExtractInfo as e:
1386 if e.expected:
1387 self.to_screen(f'{e}; Re-extracting data')
1388 else:
1389 self.to_stderr('\r')
1390 self.report_warning(f'{e}; Re-extracting data')
1391 continue
1392 except GeoRestrictedError as e:
1393 msg = e.msg
1394 if e.countries:
1395 msg += '\nThis video is available in %s.' % ', '.join(
1396 map(ISO3166Utils.short2full, e.countries))
1397 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1398 self.report_error(msg)
1399 except ExtractorError as e: # An error we somewhat expected
1400 self.report_error(str(e), e.format_traceback())
1401 except Exception as e:
1402 if self.params.get('ignoreerrors'):
1403 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1404 else:
1405 raise
1406 break
1407 return wrapper
1408
1409 def _wait_for_video(self, ie_result):
1410 if (not self.params.get('wait_for_video')
1411 or ie_result.get('_type', 'video') != 'video'
1412 or ie_result.get('formats') or ie_result.get('url')):
1413 return
1414
1415 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1416 last_msg = ''
1417
1418 def progress(msg):
1419 nonlocal last_msg
1420 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1421 last_msg = msg
1422
1423 min_wait, max_wait = self.params.get('wait_for_video')
1424 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1425 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1426 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1427 self.report_warning('Release time of video is not known')
1428 elif (diff or 0) <= 0:
1429 self.report_warning('Video should already be available according to extracted info')
1430 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1431 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1432
1433 wait_till = time.time() + diff
1434 try:
1435 while True:
1436 diff = wait_till - time.time()
1437 if diff <= 0:
1438 progress('')
1439 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1440 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1441 time.sleep(1)
1442 except KeyboardInterrupt:
1443 progress('')
1444 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1445 except BaseException as e:
1446 if not isinstance(e, ReExtractInfo):
1447 self.to_screen('')
1448 raise
1449
1450 @__handle_extraction_exceptions
1451 def __extract_info(self, url, ie, download, extra_info, process):
1452 ie_result = ie.extract(url)
1453 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1454 return
1455 if isinstance(ie_result, list):
1456 # Backwards compatibility: old IE result format
1457 ie_result = {
1458 '_type': 'compat_list',
1459 'entries': ie_result,
1460 }
1461 if extra_info.get('original_url'):
1462 ie_result.setdefault('original_url', extra_info['original_url'])
1463 self.add_default_extra_info(ie_result, ie, url)
1464 if process:
1465 self._wait_for_video(ie_result)
1466 return self.process_ie_result(ie_result, download, extra_info)
1467 else:
1468 return ie_result
1469
1470 def add_default_extra_info(self, ie_result, ie, url):
1471 if url is not None:
1472 self.add_extra_info(ie_result, {
1473 'webpage_url': url,
1474 'original_url': url,
1475 })
1476 webpage_url = ie_result.get('webpage_url')
1477 if webpage_url:
1478 self.add_extra_info(ie_result, {
1479 'webpage_url_basename': url_basename(webpage_url),
1480 'webpage_url_domain': get_domain(webpage_url),
1481 })
1482 if ie is not None:
1483 self.add_extra_info(ie_result, {
1484 'extractor': ie.IE_NAME,
1485 'extractor_key': ie.ie_key(),
1486 })
1487
1488 def process_ie_result(self, ie_result, download=True, extra_info=None):
1489 """
1490 Take the result of the ie(may be modified) and resolve all unresolved
1491 references (URLs, playlist items).
1492
1493 It will also download the videos if 'download'.
1494 Returns the resolved ie_result.
1495 """
1496 if extra_info is None:
1497 extra_info = {}
1498 result_type = ie_result.get('_type', 'video')
1499
1500 if result_type in ('url', 'url_transparent'):
1501 ie_result['url'] = sanitize_url(ie_result['url'])
1502 if ie_result.get('original_url'):
1503 extra_info.setdefault('original_url', ie_result['original_url'])
1504
1505 extract_flat = self.params.get('extract_flat', False)
1506 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1507 or extract_flat is True):
1508 info_copy = ie_result.copy()
1509 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1510 if ie and not ie_result.get('id'):
1511 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1512 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1513 self.add_extra_info(info_copy, extra_info)
1514 info_copy, _ = self.pre_process(info_copy)
1515 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1516 if self.params.get('force_write_download_archive', False):
1517 self.record_download_archive(info_copy)
1518 return ie_result
1519
1520 if result_type == 'video':
1521 self.add_extra_info(ie_result, extra_info)
1522 ie_result = self.process_video_result(ie_result, download=download)
1523 additional_urls = (ie_result or {}).get('additional_urls')
1524 if additional_urls:
1525 # TODO: Improve MetadataParserPP to allow setting a list
1526 if isinstance(additional_urls, compat_str):
1527 additional_urls = [additional_urls]
1528 self.to_screen(
1529 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1530 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1531 ie_result['additional_entries'] = [
1532 self.extract_info(
1533 url, download, extra_info=extra_info,
1534 force_generic_extractor=self.params.get('force_generic_extractor'))
1535 for url in additional_urls
1536 ]
1537 return ie_result
1538 elif result_type == 'url':
1539 # We have to add extra_info to the results because it may be
1540 # contained in a playlist
1541 return self.extract_info(
1542 ie_result['url'], download,
1543 ie_key=ie_result.get('ie_key'),
1544 extra_info=extra_info)
1545 elif result_type == 'url_transparent':
1546 # Use the information from the embedding page
1547 info = self.extract_info(
1548 ie_result['url'], ie_key=ie_result.get('ie_key'),
1549 extra_info=extra_info, download=False, process=False)
1550
1551 # extract_info may return None when ignoreerrors is enabled and
1552 # extraction failed with an error, don't crash and return early
1553 # in this case
1554 if not info:
1555 return info
1556
1557 force_properties = dict(
1558 (k, v) for k, v in ie_result.items() if v is not None)
1559 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1560 if f in force_properties:
1561 del force_properties[f]
1562 new_result = info.copy()
1563 new_result.update(force_properties)
1564
1565 # Extracted info may not be a video result (i.e.
1566 # info.get('_type', 'video') != video) but rather an url or
1567 # url_transparent. In such cases outer metadata (from ie_result)
1568 # should be propagated to inner one (info). For this to happen
1569 # _type of info should be overridden with url_transparent. This
1570 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1571 if new_result.get('_type') == 'url':
1572 new_result['_type'] = 'url_transparent'
1573
1574 return self.process_ie_result(
1575 new_result, download=download, extra_info=extra_info)
1576 elif result_type in ('playlist', 'multi_video'):
1577 # Protect from infinite recursion due to recursively nested playlists
1578 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1579 webpage_url = ie_result['webpage_url']
1580 if webpage_url in self._playlist_urls:
1581 self.to_screen(
1582 '[download] Skipping already downloaded playlist: %s'
1583 % ie_result.get('title') or ie_result.get('id'))
1584 return
1585
1586 self._playlist_level += 1
1587 self._playlist_urls.add(webpage_url)
1588 self._sanitize_thumbnails(ie_result)
1589 try:
1590 return self.__process_playlist(ie_result, download)
1591 finally:
1592 self._playlist_level -= 1
1593 if not self._playlist_level:
1594 self._playlist_urls.clear()
1595 elif result_type == 'compat_list':
1596 self.report_warning(
1597 'Extractor %s returned a compat_list result. '
1598 'It needs to be updated.' % ie_result.get('extractor'))
1599
1600 def _fixup(r):
1601 self.add_extra_info(r, {
1602 'extractor': ie_result['extractor'],
1603 'webpage_url': ie_result['webpage_url'],
1604 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1605 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1606 'extractor_key': ie_result['extractor_key'],
1607 })
1608 return r
1609 ie_result['entries'] = [
1610 self.process_ie_result(_fixup(r), download, extra_info)
1611 for r in ie_result['entries']
1612 ]
1613 return ie_result
1614 else:
1615 raise Exception('Invalid result type: %s' % result_type)
1616
1617 def _ensure_dir_exists(self, path):
1618 return make_dir(path, self.report_error)
1619
1620 @staticmethod
1621 def _playlist_infodict(ie_result, **kwargs):
1622 return {
1623 **ie_result,
1624 'playlist': ie_result.get('title') or ie_result.get('id'),
1625 'playlist_id': ie_result.get('id'),
1626 'playlist_title': ie_result.get('title'),
1627 'playlist_uploader': ie_result.get('uploader'),
1628 'playlist_uploader_id': ie_result.get('uploader_id'),
1629 'playlist_index': 0,
1630 **kwargs,
1631 }
1632
1633 def __process_playlist(self, ie_result, download):
1634 # We process each entry in the playlist
1635 playlist = ie_result.get('title') or ie_result.get('id')
1636 self.to_screen('[download] Downloading playlist: %s' % playlist)
1637
1638 if 'entries' not in ie_result:
1639 raise EntryNotInPlaylist('There are no entries')
1640
1641 MissingEntry = object()
1642 incomplete_entries = bool(ie_result.get('requested_entries'))
1643 if incomplete_entries:
1644 def fill_missing_entries(entries, indices):
1645 ret = [MissingEntry] * max(indices)
1646 for i, entry in zip(indices, entries):
1647 ret[i - 1] = entry
1648 return ret
1649 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1650
1651 playlist_results = []
1652
1653 playliststart = self.params.get('playliststart', 1)
1654 playlistend = self.params.get('playlistend')
1655 # For backwards compatibility, interpret -1 as whole list
1656 if playlistend == -1:
1657 playlistend = None
1658
1659 playlistitems_str = self.params.get('playlist_items')
1660 playlistitems = None
1661 if playlistitems_str is not None:
1662 def iter_playlistitems(format):
1663 for string_segment in format.split(','):
1664 if '-' in string_segment:
1665 start, end = string_segment.split('-')
1666 for item in range(int(start), int(end) + 1):
1667 yield int(item)
1668 else:
1669 yield int(string_segment)
1670 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1671
1672 ie_entries = ie_result['entries']
1673 if isinstance(ie_entries, list):
1674 playlist_count = len(ie_entries)
1675 msg = f'Collected {playlist_count} videos; downloading %d of them'
1676 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1677
1678 def get_entry(i):
1679 return ie_entries[i - 1]
1680 else:
1681 msg = 'Downloading %d videos'
1682 if not isinstance(ie_entries, (PagedList, LazyList)):
1683 ie_entries = LazyList(ie_entries)
1684 elif isinstance(ie_entries, InAdvancePagedList):
1685 if ie_entries._pagesize == 1:
1686 playlist_count = ie_entries._pagecount
1687
1688 def get_entry(i):
1689 return YoutubeDL.__handle_extraction_exceptions(
1690 lambda self, i: ie_entries[i - 1]
1691 )(self, i)
1692
1693 entries, broken = [], False
1694 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1695 for i in items:
1696 if i == 0:
1697 continue
1698 if playlistitems is None and playlistend is not None and playlistend < i:
1699 break
1700 entry = None
1701 try:
1702 entry = get_entry(i)
1703 if entry is MissingEntry:
1704 raise EntryNotInPlaylist()
1705 except (IndexError, EntryNotInPlaylist):
1706 if incomplete_entries:
1707 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1708 elif not playlistitems:
1709 break
1710 entries.append(entry)
1711 try:
1712 if entry is not None:
1713 self._match_entry(entry, incomplete=True, silent=True)
1714 except (ExistingVideoReached, RejectedVideoReached):
1715 broken = True
1716 break
1717 ie_result['entries'] = entries
1718
1719 # Save playlist_index before re-ordering
1720 entries = [
1721 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1722 for i, entry in enumerate(entries, 1)
1723 if entry is not None]
1724 n_entries = len(entries)
1725
1726 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1727 ie_result['playlist_count'] = n_entries
1728
1729 if not playlistitems and (playliststart != 1 or playlistend):
1730 playlistitems = list(range(playliststart, playliststart + n_entries))
1731 ie_result['requested_entries'] = playlistitems
1732
1733 _infojson_written = False
1734 write_playlist_files = self.params.get('allow_playlist_files', True)
1735 if write_playlist_files and self.params.get('list_thumbnails'):
1736 self.list_thumbnails(ie_result)
1737 if write_playlist_files and not self.params.get('simulate'):
1738 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1739 _infojson_written = self._write_info_json(
1740 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1741 if _infojson_written is None:
1742 return
1743 if self._write_description('playlist', ie_result,
1744 self.prepare_filename(ie_copy, 'pl_description')) is None:
1745 return
1746 # TODO: This should be passed to ThumbnailsConvertor if necessary
1747 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1748
1749 if self.params.get('playlistreverse', False):
1750 entries = entries[::-1]
1751 if self.params.get('playlistrandom', False):
1752 random.shuffle(entries)
1753
1754 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1755
1756 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1757 failures = 0
1758 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1759 for i, entry_tuple in enumerate(entries, 1):
1760 playlist_index, entry = entry_tuple
1761 if 'playlist-index' in self.params.get('compat_opts', []):
1762 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1763 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1764 # This __x_forwarded_for_ip thing is a bit ugly but requires
1765 # minimal changes
1766 if x_forwarded_for:
1767 entry['__x_forwarded_for_ip'] = x_forwarded_for
1768 extra = {
1769 'n_entries': n_entries,
1770 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1771 'playlist_count': ie_result.get('playlist_count'),
1772 'playlist_index': playlist_index,
1773 'playlist_autonumber': i,
1774 'playlist': playlist,
1775 'playlist_id': ie_result.get('id'),
1776 'playlist_title': ie_result.get('title'),
1777 'playlist_uploader': ie_result.get('uploader'),
1778 'playlist_uploader_id': ie_result.get('uploader_id'),
1779 'extractor': ie_result['extractor'],
1780 'webpage_url': ie_result['webpage_url'],
1781 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1782 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1783 'extractor_key': ie_result['extractor_key'],
1784 }
1785
1786 if self._match_entry(entry, incomplete=True) is not None:
1787 continue
1788
1789 entry_result = self.__process_iterable_entry(entry, download, extra)
1790 if not entry_result:
1791 failures += 1
1792 if failures >= max_failures:
1793 self.report_error(
1794 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1795 break
1796 playlist_results.append(entry_result)
1797 ie_result['entries'] = playlist_results
1798
1799 # Write the updated info to json
1800 if _infojson_written and self._write_info_json(
1801 'updated playlist', ie_result,
1802 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1803 return
1804
1805 ie_result = self.run_all_pps('playlist', ie_result)
1806 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1807 return ie_result
1808
1809 @__handle_extraction_exceptions
1810 def __process_iterable_entry(self, entry, download, extra_info):
1811 return self.process_ie_result(
1812 entry, download=download, extra_info=extra_info)
1813
1814 def _build_format_filter(self, filter_spec):
1815 " Returns a function to filter the formats according to the filter_spec "
1816
1817 OPERATORS = {
1818 '<': operator.lt,
1819 '<=': operator.le,
1820 '>': operator.gt,
1821 '>=': operator.ge,
1822 '=': operator.eq,
1823 '!=': operator.ne,
1824 }
1825 operator_rex = re.compile(r'''(?x)\s*
1826 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1827 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1828 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1829 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1830 m = operator_rex.fullmatch(filter_spec)
1831 if m:
1832 try:
1833 comparison_value = int(m.group('value'))
1834 except ValueError:
1835 comparison_value = parse_filesize(m.group('value'))
1836 if comparison_value is None:
1837 comparison_value = parse_filesize(m.group('value') + 'B')
1838 if comparison_value is None:
1839 raise ValueError(
1840 'Invalid value %r in format specification %r' % (
1841 m.group('value'), filter_spec))
1842 op = OPERATORS[m.group('op')]
1843
1844 if not m:
1845 STR_OPERATORS = {
1846 '=': operator.eq,
1847 '^=': lambda attr, value: attr.startswith(value),
1848 '$=': lambda attr, value: attr.endswith(value),
1849 '*=': lambda attr, value: value in attr,
1850 '~=': lambda attr, value: value.search(attr) is not None
1851 }
1852 str_operator_rex = re.compile(r'''(?x)\s*
1853 (?P<key>[a-zA-Z0-9._-]+)\s*
1854 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1855 (?P<quote>["'])?
1856 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1857 (?(quote)(?P=quote))\s*
1858 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1859 m = str_operator_rex.fullmatch(filter_spec)
1860 if m:
1861 if m.group('op') == '~=':
1862 comparison_value = re.compile(m.group('value'))
1863 else:
1864 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1865 str_op = STR_OPERATORS[m.group('op')]
1866 if m.group('negation'):
1867 op = lambda attr, value: not str_op(attr, value)
1868 else:
1869 op = str_op
1870
1871 if not m:
1872 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1873
1874 def _filter(f):
1875 actual_value = f.get(m.group('key'))
1876 if actual_value is None:
1877 return m.group('none_inclusive')
1878 return op(actual_value, comparison_value)
1879 return _filter
1880
1881 def _check_formats(self, formats):
1882 for f in formats:
1883 self.to_screen('[info] Testing format %s' % f['format_id'])
1884 path = self.get_output_path('temp')
1885 if not self._ensure_dir_exists(f'{path}/'):
1886 continue
1887 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1888 temp_file.close()
1889 try:
1890 success, _ = self.dl(temp_file.name, f, test=True)
1891 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1892 success = False
1893 finally:
1894 if os.path.exists(temp_file.name):
1895 try:
1896 os.remove(temp_file.name)
1897 except OSError:
1898 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1899 if success:
1900 yield f
1901 else:
1902 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1903
1904 def _default_format_spec(self, info_dict, download=True):
1905
1906 def can_merge():
1907 merger = FFmpegMergerPP(self)
1908 return merger.available and merger.can_merge()
1909
1910 prefer_best = (
1911 not self.params.get('simulate')
1912 and download
1913 and (
1914 not can_merge()
1915 or info_dict.get('is_live', False)
1916 or self.outtmpl_dict['default'] == '-'))
1917 compat = (
1918 prefer_best
1919 or self.params.get('allow_multiple_audio_streams', False)
1920 or 'format-spec' in self.params.get('compat_opts', []))
1921
1922 return (
1923 'best/bestvideo+bestaudio' if prefer_best
1924 else 'bestvideo*+bestaudio/best' if not compat
1925 else 'bestvideo+bestaudio/best')
1926
1927 def build_format_selector(self, format_spec):
1928 def syntax_error(note, start):
1929 message = (
1930 'Invalid format specification: '
1931 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1932 return SyntaxError(message)
1933
1934 PICKFIRST = 'PICKFIRST'
1935 MERGE = 'MERGE'
1936 SINGLE = 'SINGLE'
1937 GROUP = 'GROUP'
1938 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1939
1940 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1941 'video': self.params.get('allow_multiple_video_streams', False)}
1942
1943 check_formats = self.params.get('check_formats') == 'selected'
1944
1945 def _parse_filter(tokens):
1946 filter_parts = []
1947 for type, string, start, _, _ in tokens:
1948 if type == tokenize.OP and string == ']':
1949 return ''.join(filter_parts)
1950 else:
1951 filter_parts.append(string)
1952
1953 def _remove_unused_ops(tokens):
1954 # Remove operators that we don't use and join them with the surrounding strings
1955 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1956 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1957 last_string, last_start, last_end, last_line = None, None, None, None
1958 for type, string, start, end, line in tokens:
1959 if type == tokenize.OP and string == '[':
1960 if last_string:
1961 yield tokenize.NAME, last_string, last_start, last_end, last_line
1962 last_string = None
1963 yield type, string, start, end, line
1964 # everything inside brackets will be handled by _parse_filter
1965 for type, string, start, end, line in tokens:
1966 yield type, string, start, end, line
1967 if type == tokenize.OP and string == ']':
1968 break
1969 elif type == tokenize.OP and string in ALLOWED_OPS:
1970 if last_string:
1971 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972 last_string = None
1973 yield type, string, start, end, line
1974 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1975 if not last_string:
1976 last_string = string
1977 last_start = start
1978 last_end = end
1979 else:
1980 last_string += string
1981 if last_string:
1982 yield tokenize.NAME, last_string, last_start, last_end, last_line
1983
1984 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1985 selectors = []
1986 current_selector = None
1987 for type, string, start, _, _ in tokens:
1988 # ENCODING is only defined in python 3.x
1989 if type == getattr(tokenize, 'ENCODING', None):
1990 continue
1991 elif type in [tokenize.NAME, tokenize.NUMBER]:
1992 current_selector = FormatSelector(SINGLE, string, [])
1993 elif type == tokenize.OP:
1994 if string == ')':
1995 if not inside_group:
1996 # ')' will be handled by the parentheses group
1997 tokens.restore_last_token()
1998 break
1999 elif inside_merge and string in ['/', ',']:
2000 tokens.restore_last_token()
2001 break
2002 elif inside_choice and string == ',':
2003 tokens.restore_last_token()
2004 break
2005 elif string == ',':
2006 if not current_selector:
2007 raise syntax_error('"," must follow a format selector', start)
2008 selectors.append(current_selector)
2009 current_selector = None
2010 elif string == '/':
2011 if not current_selector:
2012 raise syntax_error('"/" must follow a format selector', start)
2013 first_choice = current_selector
2014 second_choice = _parse_format_selection(tokens, inside_choice=True)
2015 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2016 elif string == '[':
2017 if not current_selector:
2018 current_selector = FormatSelector(SINGLE, 'best', [])
2019 format_filter = _parse_filter(tokens)
2020 current_selector.filters.append(format_filter)
2021 elif string == '(':
2022 if current_selector:
2023 raise syntax_error('Unexpected "("', start)
2024 group = _parse_format_selection(tokens, inside_group=True)
2025 current_selector = FormatSelector(GROUP, group, [])
2026 elif string == '+':
2027 if not current_selector:
2028 raise syntax_error('Unexpected "+"', start)
2029 selector_1 = current_selector
2030 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2031 if not selector_2:
2032 raise syntax_error('Expected a selector', start)
2033 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2034 else:
2035 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2036 elif type == tokenize.ENDMARKER:
2037 break
2038 if current_selector:
2039 selectors.append(current_selector)
2040 return selectors
2041
2042 def _merge(formats_pair):
2043 format_1, format_2 = formats_pair
2044
2045 formats_info = []
2046 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2047 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2048
2049 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2050 get_no_more = {'video': False, 'audio': False}
2051 for (i, fmt_info) in enumerate(formats_info):
2052 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2053 formats_info.pop(i)
2054 continue
2055 for aud_vid in ['audio', 'video']:
2056 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2057 if get_no_more[aud_vid]:
2058 formats_info.pop(i)
2059 break
2060 get_no_more[aud_vid] = True
2061
2062 if len(formats_info) == 1:
2063 return formats_info[0]
2064
2065 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2066 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2067
2068 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2069 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2070
2071 output_ext = self.params.get('merge_output_format')
2072 if not output_ext:
2073 if the_only_video:
2074 output_ext = the_only_video['ext']
2075 elif the_only_audio and not video_fmts:
2076 output_ext = the_only_audio['ext']
2077 else:
2078 output_ext = 'mkv'
2079
2080 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2081
2082 new_dict = {
2083 'requested_formats': formats_info,
2084 'format': '+'.join(filtered('format')),
2085 'format_id': '+'.join(filtered('format_id')),
2086 'ext': output_ext,
2087 'protocol': '+'.join(map(determine_protocol, formats_info)),
2088 'language': '+'.join(orderedSet(filtered('language'))) or None,
2089 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2090 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2091 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2092 }
2093
2094 if the_only_video:
2095 new_dict.update({
2096 'width': the_only_video.get('width'),
2097 'height': the_only_video.get('height'),
2098 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2099 'fps': the_only_video.get('fps'),
2100 'dynamic_range': the_only_video.get('dynamic_range'),
2101 'vcodec': the_only_video.get('vcodec'),
2102 'vbr': the_only_video.get('vbr'),
2103 'stretched_ratio': the_only_video.get('stretched_ratio'),
2104 })
2105
2106 if the_only_audio:
2107 new_dict.update({
2108 'acodec': the_only_audio.get('acodec'),
2109 'abr': the_only_audio.get('abr'),
2110 'asr': the_only_audio.get('asr'),
2111 })
2112
2113 return new_dict
2114
2115 def _check_formats(formats):
2116 if not check_formats:
2117 yield from formats
2118 return
2119 yield from self._check_formats(formats)
2120
2121 def _build_selector_function(selector):
2122 if isinstance(selector, list): # ,
2123 fs = [_build_selector_function(s) for s in selector]
2124
2125 def selector_function(ctx):
2126 for f in fs:
2127 yield from f(ctx)
2128 return selector_function
2129
2130 elif selector.type == GROUP: # ()
2131 selector_function = _build_selector_function(selector.selector)
2132
2133 elif selector.type == PICKFIRST: # /
2134 fs = [_build_selector_function(s) for s in selector.selector]
2135
2136 def selector_function(ctx):
2137 for f in fs:
2138 picked_formats = list(f(ctx))
2139 if picked_formats:
2140 return picked_formats
2141 return []
2142
2143 elif selector.type == MERGE: # +
2144 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2145
2146 def selector_function(ctx):
2147 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2148 yield _merge(pair)
2149
2150 elif selector.type == SINGLE: # atom
2151 format_spec = selector.selector or 'best'
2152
2153 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2154 if format_spec == 'all':
2155 def selector_function(ctx):
2156 yield from _check_formats(ctx['formats'][::-1])
2157 elif format_spec == 'mergeall':
2158 def selector_function(ctx):
2159 formats = list(_check_formats(ctx['formats']))
2160 if not formats:
2161 return
2162 merged_format = formats[-1]
2163 for f in formats[-2::-1]:
2164 merged_format = _merge((merged_format, f))
2165 yield merged_format
2166
2167 else:
2168 format_fallback, format_reverse, format_idx = False, True, 1
2169 mobj = re.match(
2170 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2171 format_spec)
2172 if mobj is not None:
2173 format_idx = int_or_none(mobj.group('n'), default=1)
2174 format_reverse = mobj.group('bw')[0] == 'b'
2175 format_type = (mobj.group('type') or [None])[0]
2176 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2177 format_modified = mobj.group('mod') is not None
2178
2179 format_fallback = not format_type and not format_modified # for b, w
2180 _filter_f = (
2181 (lambda f: f.get('%scodec' % format_type) != 'none')
2182 if format_type and format_modified # bv*, ba*, wv*, wa*
2183 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2184 if format_type # bv, ba, wv, wa
2185 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2186 if not format_modified # b, w
2187 else lambda f: True) # b*, w*
2188 filter_f = lambda f: _filter_f(f) and (
2189 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2190 else:
2191 if format_spec in self._format_selection_exts['audio']:
2192 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2193 elif format_spec in self._format_selection_exts['video']:
2194 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2195 elif format_spec in self._format_selection_exts['storyboards']:
2196 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2197 else:
2198 filter_f = lambda f: f.get('format_id') == format_spec # id
2199
2200 def selector_function(ctx):
2201 formats = list(ctx['formats'])
2202 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2203 if format_fallback and ctx['incomplete_formats'] and not matches:
2204 # for extractors with incomplete formats (audio only (soundcloud)
2205 # or video only (imgur)) best/worst will fallback to
2206 # best/worst {video,audio}-only format
2207 matches = formats
2208 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2209 try:
2210 yield matches[format_idx - 1]
2211 except IndexError:
2212 return
2213
2214 filters = [self._build_format_filter(f) for f in selector.filters]
2215
2216 def final_selector(ctx):
2217 ctx_copy = dict(ctx)
2218 for _filter in filters:
2219 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2220 return selector_function(ctx_copy)
2221 return final_selector
2222
2223 stream = io.BytesIO(format_spec.encode('utf-8'))
2224 try:
2225 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2226 except tokenize.TokenError:
2227 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2228
2229 class TokenIterator(object):
2230 def __init__(self, tokens):
2231 self.tokens = tokens
2232 self.counter = 0
2233
2234 def __iter__(self):
2235 return self
2236
2237 def __next__(self):
2238 if self.counter >= len(self.tokens):
2239 raise StopIteration()
2240 value = self.tokens[self.counter]
2241 self.counter += 1
2242 return value
2243
2244 next = __next__
2245
2246 def restore_last_token(self):
2247 self.counter -= 1
2248
2249 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2250 return _build_selector_function(parsed_selector)
2251
2252 def _calc_headers(self, info_dict):
2253 res = std_headers.copy()
2254 res.update(info_dict.get('http_headers') or {})
2255
2256 cookies = self._calc_cookies(info_dict)
2257 if cookies:
2258 res['Cookie'] = cookies
2259
2260 if 'X-Forwarded-For' not in res:
2261 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2262 if x_forwarded_for_ip:
2263 res['X-Forwarded-For'] = x_forwarded_for_ip
2264
2265 return res
2266
2267 def _calc_cookies(self, info_dict):
2268 pr = sanitized_Request(info_dict['url'])
2269 self.cookiejar.add_cookie_header(pr)
2270 return pr.get_header('Cookie')
2271
2272 def _sort_thumbnails(self, thumbnails):
2273 thumbnails.sort(key=lambda t: (
2274 t.get('preference') if t.get('preference') is not None else -1,
2275 t.get('width') if t.get('width') is not None else -1,
2276 t.get('height') if t.get('height') is not None else -1,
2277 t.get('id') if t.get('id') is not None else '',
2278 t.get('url')))
2279
2280 def _sanitize_thumbnails(self, info_dict):
2281 thumbnails = info_dict.get('thumbnails')
2282 if thumbnails is None:
2283 thumbnail = info_dict.get('thumbnail')
2284 if thumbnail:
2285 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2286 if not thumbnails:
2287 return
2288
2289 def check_thumbnails(thumbnails):
2290 for t in thumbnails:
2291 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2292 try:
2293 self.urlopen(HEADRequest(t['url']))
2294 except network_exceptions as err:
2295 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2296 continue
2297 yield t
2298
2299 self._sort_thumbnails(thumbnails)
2300 for i, t in enumerate(thumbnails):
2301 if t.get('id') is None:
2302 t['id'] = '%d' % i
2303 if t.get('width') and t.get('height'):
2304 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2305 t['url'] = sanitize_url(t['url'])
2306
2307 if self.params.get('check_formats') is True:
2308 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2309 else:
2310 info_dict['thumbnails'] = thumbnails
2311
2312 def process_video_result(self, info_dict, download=True):
2313 assert info_dict.get('_type', 'video') == 'video'
2314 self._num_videos += 1
2315
2316 if 'id' not in info_dict:
2317 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2318 elif not info_dict.get('id'):
2319 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2320
2321 info_dict['fulltitle'] = info_dict.get('title')
2322 if 'title' not in info_dict:
2323 raise ExtractorError('Missing "title" field in extractor result',
2324 video_id=info_dict['id'], ie=info_dict['extractor'])
2325 elif not info_dict.get('title'):
2326 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2327 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2328
2329 def report_force_conversion(field, field_not, conversion):
2330 self.report_warning(
2331 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2332 % (field, field_not, conversion))
2333
2334 def sanitize_string_field(info, string_field):
2335 field = info.get(string_field)
2336 if field is None or isinstance(field, compat_str):
2337 return
2338 report_force_conversion(string_field, 'a string', 'string')
2339 info[string_field] = compat_str(field)
2340
2341 def sanitize_numeric_fields(info):
2342 for numeric_field in self._NUMERIC_FIELDS:
2343 field = info.get(numeric_field)
2344 if field is None or isinstance(field, compat_numeric_types):
2345 continue
2346 report_force_conversion(numeric_field, 'numeric', 'int')
2347 info[numeric_field] = int_or_none(field)
2348
2349 sanitize_string_field(info_dict, 'id')
2350 sanitize_numeric_fields(info_dict)
2351
2352 if 'playlist' not in info_dict:
2353 # It isn't part of a playlist
2354 info_dict['playlist'] = None
2355 info_dict['playlist_index'] = None
2356
2357 self._sanitize_thumbnails(info_dict)
2358
2359 thumbnail = info_dict.get('thumbnail')
2360 thumbnails = info_dict.get('thumbnails')
2361 if thumbnail:
2362 info_dict['thumbnail'] = sanitize_url(thumbnail)
2363 elif thumbnails:
2364 info_dict['thumbnail'] = thumbnails[-1]['url']
2365
2366 if info_dict.get('display_id') is None and 'id' in info_dict:
2367 info_dict['display_id'] = info_dict['id']
2368
2369 if info_dict.get('duration') is not None:
2370 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2371
2372 for ts_key, date_key in (
2373 ('timestamp', 'upload_date'),
2374 ('release_timestamp', 'release_date'),
2375 ('modified_timestamp', 'modified_date'),
2376 ):
2377 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2378 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2379 # see http://bugs.python.org/issue1646728)
2380 try:
2381 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2382 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2383 except (ValueError, OverflowError, OSError):
2384 pass
2385
2386 live_keys = ('is_live', 'was_live')
2387 live_status = info_dict.get('live_status')
2388 if live_status is None:
2389 for key in live_keys:
2390 if info_dict.get(key) is False:
2391 continue
2392 if info_dict.get(key):
2393 live_status = key
2394 break
2395 if all(info_dict.get(key) is False for key in live_keys):
2396 live_status = 'not_live'
2397 if live_status:
2398 info_dict['live_status'] = live_status
2399 for key in live_keys:
2400 if info_dict.get(key) is None:
2401 info_dict[key] = (live_status == key)
2402
2403 # Auto generate title fields corresponding to the *_number fields when missing
2404 # in order to always have clean titles. This is very common for TV series.
2405 for field in ('chapter', 'season', 'episode'):
2406 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2407 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2408
2409 for cc_kind in ('subtitles', 'automatic_captions'):
2410 cc = info_dict.get(cc_kind)
2411 if cc:
2412 for _, subtitle in cc.items():
2413 for subtitle_format in subtitle:
2414 if subtitle_format.get('url'):
2415 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2416 if subtitle_format.get('ext') is None:
2417 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2418
2419 automatic_captions = info_dict.get('automatic_captions')
2420 subtitles = info_dict.get('subtitles')
2421
2422 info_dict['requested_subtitles'] = self.process_subtitles(
2423 info_dict['id'], subtitles, automatic_captions)
2424
2425 if info_dict.get('formats') is None:
2426 # There's only one format available
2427 formats = [info_dict]
2428 else:
2429 formats = info_dict['formats']
2430
2431 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2432 if not self.params.get('allow_unplayable_formats'):
2433 formats = [f for f in formats if not f.get('has_drm')]
2434
2435 if info_dict.get('is_live'):
2436 get_from_start = bool(self.params.get('live_from_start'))
2437 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2438 if not get_from_start:
2439 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2440
2441 if not formats:
2442 self.raise_no_formats(info_dict)
2443
2444 def is_wellformed(f):
2445 url = f.get('url')
2446 if not url:
2447 self.report_warning(
2448 '"url" field is missing or empty - skipping format, '
2449 'there is an error in extractor')
2450 return False
2451 if isinstance(url, bytes):
2452 sanitize_string_field(f, 'url')
2453 return True
2454
2455 # Filter out malformed formats for better extraction robustness
2456 formats = list(filter(is_wellformed, formats))
2457
2458 formats_dict = {}
2459
2460 # We check that all the formats have the format and format_id fields
2461 for i, format in enumerate(formats):
2462 sanitize_string_field(format, 'format_id')
2463 sanitize_numeric_fields(format)
2464 format['url'] = sanitize_url(format['url'])
2465 if not format.get('format_id'):
2466 format['format_id'] = compat_str(i)
2467 else:
2468 # Sanitize format_id from characters used in format selector expression
2469 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2470 format_id = format['format_id']
2471 if format_id not in formats_dict:
2472 formats_dict[format_id] = []
2473 formats_dict[format_id].append(format)
2474
2475 # Make sure all formats have unique format_id
2476 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2477 for format_id, ambiguous_formats in formats_dict.items():
2478 ambigious_id = len(ambiguous_formats) > 1
2479 for i, format in enumerate(ambiguous_formats):
2480 if ambigious_id:
2481 format['format_id'] = '%s-%d' % (format_id, i)
2482 if format.get('ext') is None:
2483 format['ext'] = determine_ext(format['url']).lower()
2484 # Ensure there is no conflict between id and ext in format selection
2485 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2486 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2487 format['format_id'] = 'f%s' % format['format_id']
2488
2489 for i, format in enumerate(formats):
2490 if format.get('format') is None:
2491 format['format'] = '{id} - {res}{note}'.format(
2492 id=format['format_id'],
2493 res=self.format_resolution(format),
2494 note=format_field(format, 'format_note', ' (%s)'),
2495 )
2496 if format.get('protocol') is None:
2497 format['protocol'] = determine_protocol(format)
2498 if format.get('resolution') is None:
2499 format['resolution'] = self.format_resolution(format, default=None)
2500 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2501 format['dynamic_range'] = 'SDR'
2502 if (info_dict.get('duration') and format.get('tbr')
2503 and not format.get('filesize') and not format.get('filesize_approx')):
2504 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2505
2506 # Add HTTP headers, so that external programs can use them from the
2507 # json output
2508 full_format_info = info_dict.copy()
2509 full_format_info.update(format)
2510 format['http_headers'] = self._calc_headers(full_format_info)
2511 # Remove private housekeeping stuff
2512 if '__x_forwarded_for_ip' in info_dict:
2513 del info_dict['__x_forwarded_for_ip']
2514
2515 # TODO Central sorting goes here
2516
2517 if self.params.get('check_formats') is True:
2518 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2519
2520 if not formats or formats[0] is not info_dict:
2521 # only set the 'formats' fields if the original info_dict list them
2522 # otherwise we end up with a circular reference, the first (and unique)
2523 # element in the 'formats' field in info_dict is info_dict itself,
2524 # which can't be exported to json
2525 info_dict['formats'] = formats
2526
2527 info_dict, _ = self.pre_process(info_dict)
2528
2529 # The pre-processors may have modified the formats
2530 formats = info_dict.get('formats', [info_dict])
2531
2532 list_only = self.params.get('simulate') is None and (
2533 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2534 interactive_format_selection = not list_only and self.format_selector == '-'
2535 if self.params.get('list_thumbnails'):
2536 self.list_thumbnails(info_dict)
2537 if self.params.get('listsubtitles'):
2538 if 'automatic_captions' in info_dict:
2539 self.list_subtitles(
2540 info_dict['id'], automatic_captions, 'automatic captions')
2541 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2542 if self.params.get('listformats') or interactive_format_selection:
2543 self.list_formats(info_dict)
2544 if list_only:
2545 # Without this printing, -F --print-json will not work
2546 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2547 return
2548
2549 format_selector = self.format_selector
2550 if format_selector is None:
2551 req_format = self._default_format_spec(info_dict, download=download)
2552 self.write_debug('Default format spec: %s' % req_format)
2553 format_selector = self.build_format_selector(req_format)
2554
2555 while True:
2556 if interactive_format_selection:
2557 req_format = input(
2558 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2559 try:
2560 format_selector = self.build_format_selector(req_format)
2561 except SyntaxError as err:
2562 self.report_error(err, tb=False, is_error=False)
2563 continue
2564
2565 # While in format selection we may need to have an access to the original
2566 # format set in order to calculate some metrics or do some processing.
2567 # For now we need to be able to guess whether original formats provided
2568 # by extractor are incomplete or not (i.e. whether extractor provides only
2569 # video-only or audio-only formats) for proper formats selection for
2570 # extractors with such incomplete formats (see
2571 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2572 # Since formats may be filtered during format selection and may not match
2573 # the original formats the results may be incorrect. Thus original formats
2574 # or pre-calculated metrics should be passed to format selection routines
2575 # as well.
2576 # We will pass a context object containing all necessary additional data
2577 # instead of just formats.
2578 # This fixes incorrect format selection issue (see
2579 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2580 incomplete_formats = (
2581 # All formats are video-only or
2582 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2583 # all formats are audio-only
2584 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2585
2586 ctx = {
2587 'formats': formats,
2588 'incomplete_formats': incomplete_formats,
2589 }
2590
2591 formats_to_download = list(format_selector(ctx))
2592 if interactive_format_selection and not formats_to_download:
2593 self.report_error('Requested format is not available', tb=False, is_error=False)
2594 continue
2595 break
2596
2597 if not formats_to_download:
2598 if not self.params.get('ignore_no_formats_error'):
2599 raise ExtractorError('Requested format is not available', expected=True,
2600 video_id=info_dict['id'], ie=info_dict['extractor'])
2601 self.report_warning('Requested format is not available')
2602 # Process what we can, even without any available formats.
2603 formats_to_download = [{}]
2604
2605 best_format = formats_to_download[-1]
2606 if download:
2607 if best_format:
2608 self.to_screen(
2609 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2610 + ', '.join([f['format_id'] for f in formats_to_download]))
2611 max_downloads_reached = False
2612 for i, fmt in enumerate(formats_to_download):
2613 formats_to_download[i] = new_info = dict(info_dict)
2614 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2615 new_info.update(fmt)
2616 new_info['__original_infodict'] = info_dict
2617 try:
2618 self.process_info(new_info)
2619 except MaxDownloadsReached:
2620 max_downloads_reached = True
2621 new_info.pop('__original_infodict')
2622 # Remove copied info
2623 for key, val in tuple(new_info.items()):
2624 if info_dict.get(key) == val:
2625 new_info.pop(key)
2626 if max_downloads_reached:
2627 break
2628
2629 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2630 assert write_archive.issubset({True, False, 'ignore'})
2631 if True in write_archive and False not in write_archive:
2632 self.record_download_archive(info_dict)
2633
2634 info_dict['requested_downloads'] = formats_to_download
2635 info_dict = self.run_all_pps('after_video', info_dict)
2636 if max_downloads_reached:
2637 raise MaxDownloadsReached()
2638
2639 # We update the info dict with the selected best quality format (backwards compatibility)
2640 info_dict.update(best_format)
2641 return info_dict
2642
2643 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2644 """Select the requested subtitles and their format"""
2645 available_subs = {}
2646 if normal_subtitles and self.params.get('writesubtitles'):
2647 available_subs.update(normal_subtitles)
2648 if automatic_captions and self.params.get('writeautomaticsub'):
2649 for lang, cap_info in automatic_captions.items():
2650 if lang not in available_subs:
2651 available_subs[lang] = cap_info
2652
2653 if (not self.params.get('writesubtitles') and not
2654 self.params.get('writeautomaticsub') or not
2655 available_subs):
2656 return None
2657
2658 all_sub_langs = available_subs.keys()
2659 if self.params.get('allsubtitles', False):
2660 requested_langs = all_sub_langs
2661 elif self.params.get('subtitleslangs', False):
2662 # A list is used so that the order of languages will be the same as
2663 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2664 requested_langs = []
2665 for lang_re in self.params.get('subtitleslangs'):
2666 discard = lang_re[0] == '-'
2667 if discard:
2668 lang_re = lang_re[1:]
2669 if lang_re == 'all':
2670 if discard:
2671 requested_langs = []
2672 else:
2673 requested_langs.extend(all_sub_langs)
2674 continue
2675 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2676 if discard:
2677 for lang in current_langs:
2678 while lang in requested_langs:
2679 requested_langs.remove(lang)
2680 else:
2681 requested_langs.extend(current_langs)
2682 requested_langs = orderedSet(requested_langs)
2683 elif 'en' in available_subs:
2684 requested_langs = ['en']
2685 else:
2686 requested_langs = [list(all_sub_langs)[0]]
2687 if requested_langs:
2688 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2689
2690 formats_query = self.params.get('subtitlesformat', 'best')
2691 formats_preference = formats_query.split('/') if formats_query else []
2692 subs = {}
2693 for lang in requested_langs:
2694 formats = available_subs.get(lang)
2695 if formats is None:
2696 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2697 continue
2698 for ext in formats_preference:
2699 if ext == 'best':
2700 f = formats[-1]
2701 break
2702 matches = list(filter(lambda f: f['ext'] == ext, formats))
2703 if matches:
2704 f = matches[-1]
2705 break
2706 else:
2707 f = formats[-1]
2708 self.report_warning(
2709 'No subtitle format found matching "%s" for language %s, '
2710 'using %s' % (formats_query, lang, f['ext']))
2711 subs[lang] = f
2712 return subs
2713
2714 def _forceprint(self, key, info_dict):
2715 if info_dict is None:
2716 return
2717 info_copy = info_dict.copy()
2718 info_copy['formats_table'] = self.render_formats_table(info_dict)
2719 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2720 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2721 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2722
2723 def format_tmpl(tmpl):
2724 mobj = re.match(r'\w+(=?)$', tmpl)
2725 if mobj and mobj.group(1):
2726 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2727 elif mobj:
2728 return f'%({tmpl})s'
2729 return tmpl
2730
2731 for tmpl in self.params['forceprint'].get(key, []):
2732 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2733
2734 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2735 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2736 tmpl = format_tmpl(tmpl)
2737 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2738 if self._ensure_dir_exists(filename):
2739 with io.open(filename, 'a', encoding='utf-8') as f:
2740 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2741
2742 def __forced_printings(self, info_dict, filename, incomplete):
2743 def print_mandatory(field, actual_field=None):
2744 if actual_field is None:
2745 actual_field = field
2746 if (self.params.get('force%s' % field, False)
2747 and (not incomplete or info_dict.get(actual_field) is not None)):
2748 self.to_stdout(info_dict[actual_field])
2749
2750 def print_optional(field):
2751 if (self.params.get('force%s' % field, False)
2752 and info_dict.get(field) is not None):
2753 self.to_stdout(info_dict[field])
2754
2755 info_dict = info_dict.copy()
2756 if filename is not None:
2757 info_dict['filename'] = filename
2758 if info_dict.get('requested_formats') is not None:
2759 # For RTMP URLs, also include the playpath
2760 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2761 elif 'url' in info_dict:
2762 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2763
2764 if (self.params.get('forcejson')
2765 or self.params['forceprint'].get('video')
2766 or self.params['print_to_file'].get('video')):
2767 self.post_extract(info_dict)
2768 self._forceprint('video', info_dict)
2769
2770 print_mandatory('title')
2771 print_mandatory('id')
2772 print_mandatory('url', 'urls')
2773 print_optional('thumbnail')
2774 print_optional('description')
2775 print_optional('filename')
2776 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2777 self.to_stdout(formatSeconds(info_dict['duration']))
2778 print_mandatory('format')
2779
2780 if self.params.get('forcejson'):
2781 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2782
2783 def dl(self, name, info, subtitle=False, test=False):
2784 if not info.get('url'):
2785 self.raise_no_formats(info, True)
2786
2787 if test:
2788 verbose = self.params.get('verbose')
2789 params = {
2790 'test': True,
2791 'quiet': self.params.get('quiet') or not verbose,
2792 'verbose': verbose,
2793 'noprogress': not verbose,
2794 'nopart': True,
2795 'skip_unavailable_fragments': False,
2796 'keep_fragments': False,
2797 'overwrites': True,
2798 '_no_ytdl_file': True,
2799 }
2800 else:
2801 params = self.params
2802 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2803 if not test:
2804 for ph in self._progress_hooks:
2805 fd.add_progress_hook(ph)
2806 urls = '", "'.join(
2807 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2808 for f in info.get('requested_formats', []) or [info])
2809 self.write_debug('Invoking downloader on "%s"' % urls)
2810
2811 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2812 # But it may contain objects that are not deep-copyable
2813 new_info = self._copy_infodict(info)
2814 if new_info.get('http_headers') is None:
2815 new_info['http_headers'] = self._calc_headers(new_info)
2816 return fd.download(name, new_info, subtitle)
2817
2818 def existing_file(self, filepaths, *, default_overwrite=True):
2819 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2820 if existing_files and not self.params.get('overwrites', default_overwrite):
2821 return existing_files[0]
2822
2823 for file in existing_files:
2824 self.report_file_delete(file)
2825 os.remove(file)
2826 return None
2827
2828 def process_info(self, info_dict):
2829 """Process a single resolved IE result. (Modified it in-place)"""
2830
2831 assert info_dict.get('_type', 'video') == 'video'
2832 original_infodict = info_dict
2833
2834 if 'format' not in info_dict and 'ext' in info_dict:
2835 info_dict['format'] = info_dict['ext']
2836
2837 if self._match_entry(info_dict) is not None:
2838 info_dict['__write_download_archive'] = 'ignore'
2839 return
2840
2841 self.post_extract(info_dict)
2842 self._num_downloads += 1
2843
2844 # info_dict['_filename'] needs to be set for backward compatibility
2845 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2846 temp_filename = self.prepare_filename(info_dict, 'temp')
2847 files_to_move = {}
2848
2849 # Forced printings
2850 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2851
2852 if self.params.get('simulate'):
2853 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2854 return
2855
2856 if full_filename is None:
2857 return
2858 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2859 return
2860 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2861 return
2862
2863 if self._write_description('video', info_dict,
2864 self.prepare_filename(info_dict, 'description')) is None:
2865 return
2866
2867 sub_files = self._write_subtitles(info_dict, temp_filename)
2868 if sub_files is None:
2869 return
2870 files_to_move.update(dict(sub_files))
2871
2872 thumb_files = self._write_thumbnails(
2873 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2874 if thumb_files is None:
2875 return
2876 files_to_move.update(dict(thumb_files))
2877
2878 infofn = self.prepare_filename(info_dict, 'infojson')
2879 _infojson_written = self._write_info_json('video', info_dict, infofn)
2880 if _infojson_written:
2881 info_dict['infojson_filename'] = infofn
2882 # For backward compatibility, even though it was a private field
2883 info_dict['__infojson_filename'] = infofn
2884 elif _infojson_written is None:
2885 return
2886
2887 # Note: Annotations are deprecated
2888 annofn = None
2889 if self.params.get('writeannotations', False):
2890 annofn = self.prepare_filename(info_dict, 'annotation')
2891 if annofn:
2892 if not self._ensure_dir_exists(encodeFilename(annofn)):
2893 return
2894 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2895 self.to_screen('[info] Video annotations are already present')
2896 elif not info_dict.get('annotations'):
2897 self.report_warning('There are no annotations to write.')
2898 else:
2899 try:
2900 self.to_screen('[info] Writing video annotations to: ' + annofn)
2901 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2902 annofile.write(info_dict['annotations'])
2903 except (KeyError, TypeError):
2904 self.report_warning('There are no annotations to write.')
2905 except (OSError, IOError):
2906 self.report_error('Cannot write annotations file: ' + annofn)
2907 return
2908
2909 # Write internet shortcut files
2910 def _write_link_file(link_type):
2911 url = try_get(info_dict['webpage_url'], iri_to_uri)
2912 if not url:
2913 self.report_warning(
2914 f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
2915 return True
2916 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2917 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2918 return False
2919 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2920 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2921 return True
2922 try:
2923 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2924 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2925 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2926 template_vars = {'url': url}
2927 if link_type == 'desktop':
2928 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2929 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2930 except (OSError, IOError):
2931 self.report_error(f'Cannot write internet shortcut {linkfn}')
2932 return False
2933 return True
2934
2935 write_links = {
2936 'url': self.params.get('writeurllink'),
2937 'webloc': self.params.get('writewebloclink'),
2938 'desktop': self.params.get('writedesktoplink'),
2939 }
2940 if self.params.get('writelink'):
2941 link_type = ('webloc' if sys.platform == 'darwin'
2942 else 'desktop' if sys.platform.startswith('linux')
2943 else 'url')
2944 write_links[link_type] = True
2945
2946 if any(should_write and not _write_link_file(link_type)
2947 for link_type, should_write in write_links.items()):
2948 return
2949
2950 def replace_info_dict(new_info):
2951 nonlocal info_dict
2952 if new_info == info_dict:
2953 return
2954 info_dict.clear()
2955 info_dict.update(new_info)
2956
2957 try:
2958 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2959 replace_info_dict(new_info)
2960 except PostProcessingError as err:
2961 self.report_error('Preprocessing: %s' % str(err))
2962 return
2963
2964 if self.params.get('skip_download'):
2965 info_dict['filepath'] = temp_filename
2966 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2967 info_dict['__files_to_move'] = files_to_move
2968 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2969 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2970 else:
2971 # Download
2972 info_dict.setdefault('__postprocessors', [])
2973 try:
2974
2975 def existing_video_file(*filepaths):
2976 ext = info_dict.get('ext')
2977 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2978 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2979 default_overwrite=False)
2980 if file:
2981 info_dict['ext'] = os.path.splitext(file)[1][1:]
2982 return file
2983
2984 success = True
2985 if info_dict.get('requested_formats') is not None:
2986
2987 def compatible_formats(formats):
2988 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2989 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2990 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2991 if len(video_formats) > 2 or len(audio_formats) > 2:
2992 return False
2993
2994 # Check extension
2995 exts = set(format.get('ext') for format in formats)
2996 COMPATIBLE_EXTS = (
2997 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2998 set(('webm',)),
2999 )
3000 for ext_sets in COMPATIBLE_EXTS:
3001 if ext_sets.issuperset(exts):
3002 return True
3003 # TODO: Check acodec/vcodec
3004 return False
3005
3006 requested_formats = info_dict['requested_formats']
3007 old_ext = info_dict['ext']
3008 if self.params.get('merge_output_format') is None:
3009 if not compatible_formats(requested_formats):
3010 info_dict['ext'] = 'mkv'
3011 self.report_warning(
3012 'Requested formats are incompatible for merge and will be merged into mkv')
3013 if (info_dict['ext'] == 'webm'
3014 and info_dict.get('thumbnails')
3015 # check with type instead of pp_key, __name__, or isinstance
3016 # since we dont want any custom PPs to trigger this
3017 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3018 info_dict['ext'] = 'mkv'
3019 self.report_warning(
3020 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3021 new_ext = info_dict['ext']
3022
3023 def correct_ext(filename, ext=new_ext):
3024 if filename == '-':
3025 return filename
3026 filename_real_ext = os.path.splitext(filename)[1][1:]
3027 filename_wo_ext = (
3028 os.path.splitext(filename)[0]
3029 if filename_real_ext in (old_ext, new_ext)
3030 else filename)
3031 return '%s.%s' % (filename_wo_ext, ext)
3032
3033 # Ensure filename always has a correct extension for successful merge
3034 full_filename = correct_ext(full_filename)
3035 temp_filename = correct_ext(temp_filename)
3036 dl_filename = existing_video_file(full_filename, temp_filename)
3037 info_dict['__real_download'] = False
3038
3039 downloaded = []
3040 merger = FFmpegMergerPP(self)
3041
3042 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3043 if dl_filename is not None:
3044 self.report_file_already_downloaded(dl_filename)
3045 elif fd:
3046 for f in requested_formats if fd != FFmpegFD else []:
3047 f['filepath'] = fname = prepend_extension(
3048 correct_ext(temp_filename, info_dict['ext']),
3049 'f%s' % f['format_id'], info_dict['ext'])
3050 downloaded.append(fname)
3051 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3052 success, real_download = self.dl(temp_filename, info_dict)
3053 info_dict['__real_download'] = real_download
3054 else:
3055 if self.params.get('allow_unplayable_formats'):
3056 self.report_warning(
3057 'You have requested merging of multiple formats '
3058 'while also allowing unplayable formats to be downloaded. '
3059 'The formats won\'t be merged to prevent data corruption.')
3060 elif not merger.available:
3061 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3062 if not self.params.get('ignoreerrors'):
3063 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3064 return
3065 self.report_warning(f'{msg}. The formats won\'t be merged')
3066
3067 if temp_filename == '-':
3068 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3069 else 'but the formats are incompatible for simultaneous download' if merger.available
3070 else 'but ffmpeg is not installed')
3071 self.report_warning(
3072 f'You have requested downloading multiple formats to stdout {reason}. '
3073 'The formats will be streamed one after the other')
3074 fname = temp_filename
3075 for f in requested_formats:
3076 new_info = dict(info_dict)
3077 del new_info['requested_formats']
3078 new_info.update(f)
3079 if temp_filename != '-':
3080 fname = prepend_extension(
3081 correct_ext(temp_filename, new_info['ext']),
3082 'f%s' % f['format_id'], new_info['ext'])
3083 if not self._ensure_dir_exists(fname):
3084 return
3085 f['filepath'] = fname
3086 downloaded.append(fname)
3087 partial_success, real_download = self.dl(fname, new_info)
3088 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3089 success = success and partial_success
3090
3091 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3092 info_dict['__postprocessors'].append(merger)
3093 info_dict['__files_to_merge'] = downloaded
3094 # Even if there were no downloads, it is being merged only now
3095 info_dict['__real_download'] = True
3096 else:
3097 for file in downloaded:
3098 files_to_move[file] = None
3099 else:
3100 # Just a single file
3101 dl_filename = existing_video_file(full_filename, temp_filename)
3102 if dl_filename is None or dl_filename == temp_filename:
3103 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3104 # So we should try to resume the download
3105 success, real_download = self.dl(temp_filename, info_dict)
3106 info_dict['__real_download'] = real_download
3107 else:
3108 self.report_file_already_downloaded(dl_filename)
3109
3110 dl_filename = dl_filename or temp_filename
3111 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3112
3113 except network_exceptions as err:
3114 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3115 return
3116 except (OSError, IOError) as err:
3117 raise UnavailableVideoError(err)
3118 except (ContentTooShortError, ) as err:
3119 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3120 return
3121
3122 if success and full_filename != '-':
3123
3124 def fixup():
3125 do_fixup = True
3126 fixup_policy = self.params.get('fixup')
3127 vid = info_dict['id']
3128
3129 if fixup_policy in ('ignore', 'never'):
3130 return
3131 elif fixup_policy == 'warn':
3132 do_fixup = False
3133 elif fixup_policy != 'force':
3134 assert fixup_policy in ('detect_or_warn', None)
3135 if not info_dict.get('__real_download'):
3136 do_fixup = False
3137
3138 def ffmpeg_fixup(cndn, msg, cls):
3139 if not cndn:
3140 return
3141 if not do_fixup:
3142 self.report_warning(f'{vid}: {msg}')
3143 return
3144 pp = cls(self)
3145 if pp.available:
3146 info_dict['__postprocessors'].append(pp)
3147 else:
3148 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3149
3150 stretched_ratio = info_dict.get('stretched_ratio')
3151 ffmpeg_fixup(
3152 stretched_ratio not in (1, None),
3153 f'Non-uniform pixel ratio {stretched_ratio}',
3154 FFmpegFixupStretchedPP)
3155
3156 ffmpeg_fixup(
3157 (info_dict.get('requested_formats') is None
3158 and info_dict.get('container') == 'm4a_dash'
3159 and info_dict.get('ext') == 'm4a'),
3160 'writing DASH m4a. Only some players support this container',
3161 FFmpegFixupM4aPP)
3162
3163 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3164 downloader = downloader.__name__ if downloader else None
3165
3166 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3167 ffmpeg_fixup(downloader == 'HlsFD',
3168 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3169 FFmpegFixupM3u8PP)
3170 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3171 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3172
3173 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3174 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3175
3176 fixup()
3177 try:
3178 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3179 except PostProcessingError as err:
3180 self.report_error('Postprocessing: %s' % str(err))
3181 return
3182 try:
3183 for ph in self._post_hooks:
3184 ph(info_dict['filepath'])
3185 except Exception as err:
3186 self.report_error('post hooks: %s' % str(err))
3187 return
3188 info_dict['__write_download_archive'] = True
3189
3190 if self.params.get('force_write_download_archive'):
3191 info_dict['__write_download_archive'] = True
3192
3193 # Make sure the info_dict was modified in-place
3194 assert info_dict is original_infodict
3195
3196 max_downloads = self.params.get('max_downloads')
3197 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3198 raise MaxDownloadsReached()
3199
3200 def __download_wrapper(self, func):
3201 @functools.wraps(func)
3202 def wrapper(*args, **kwargs):
3203 try:
3204 res = func(*args, **kwargs)
3205 except UnavailableVideoError as e:
3206 self.report_error(e)
3207 except MaxDownloadsReached as e:
3208 self.to_screen(f'[info] {e}')
3209 raise
3210 except DownloadCancelled as e:
3211 self.to_screen(f'[info] {e}')
3212 if not self.params.get('break_per_url'):
3213 raise
3214 else:
3215 if self.params.get('dump_single_json', False):
3216 self.post_extract(res)
3217 self.to_stdout(json.dumps(self.sanitize_info(res)))
3218 return wrapper
3219
3220 def download(self, url_list):
3221 """Download a given list of URLs."""
3222 url_list = variadic(url_list) # Passing a single URL is a common mistake
3223 outtmpl = self.outtmpl_dict['default']
3224 if (len(url_list) > 1
3225 and outtmpl != '-'
3226 and '%' not in outtmpl
3227 and self.params.get('max_downloads') != 1):
3228 raise SameFileError(outtmpl)
3229
3230 for url in url_list:
3231 self.__download_wrapper(self.extract_info)(
3232 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3233
3234 return self._download_retcode
3235
3236 def download_with_info_file(self, info_filename):
3237 with contextlib.closing(fileinput.FileInput(
3238 [info_filename], mode='r',
3239 openhook=fileinput.hook_encoded('utf-8'))) as f:
3240 # FileInput doesn't have a read method, we can't call json.load
3241 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3242 try:
3243 self.__download_wrapper(self.process_ie_result)(info, download=True)
3244 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3245 if not isinstance(e, EntryNotInPlaylist):
3246 self.to_stderr('\r')
3247 webpage_url = info.get('webpage_url')
3248 if webpage_url is not None:
3249 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3250 return self.download([webpage_url])
3251 else:
3252 raise
3253 return self._download_retcode
3254
3255 @staticmethod
3256 def sanitize_info(info_dict, remove_private_keys=False):
3257 ''' Sanitize the infodict for converting to json '''
3258 if info_dict is None:
3259 return info_dict
3260 info_dict.setdefault('epoch', int(time.time()))
3261 info_dict.setdefault('_type', 'video')
3262 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
3263 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
3264 if remove_private_keys:
3265 remove_keys |= {
3266 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3267 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3268 }
3269 reject = lambda k, v: k not in keep_keys and (
3270 k.startswith('_') or k in remove_keys or v is None)
3271 else:
3272 reject = lambda k, v: k in remove_keys
3273
3274 def filter_fn(obj):
3275 if isinstance(obj, dict):
3276 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3277 elif isinstance(obj, (list, tuple, set, LazyList)):
3278 return list(map(filter_fn, obj))
3279 elif obj is None or isinstance(obj, (str, int, float, bool)):
3280 return obj
3281 else:
3282 return repr(obj)
3283
3284 return filter_fn(info_dict)
3285
3286 @staticmethod
3287 def filter_requested_info(info_dict, actually_filter=True):
3288 ''' Alias of sanitize_info for backward compatibility '''
3289 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3290
3291 @staticmethod
3292 def post_extract(info_dict):
3293 def actual_post_extract(info_dict):
3294 if info_dict.get('_type') in ('playlist', 'multi_video'):
3295 for video_dict in info_dict.get('entries', {}):
3296 actual_post_extract(video_dict or {})
3297 return
3298
3299 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3300 extra = post_extractor().items()
3301 info_dict.update(extra)
3302 info_dict.pop('__post_extractor', None)
3303
3304 original_infodict = info_dict.get('__original_infodict') or {}
3305 original_infodict.update(extra)
3306 original_infodict.pop('__post_extractor', None)
3307
3308 actual_post_extract(info_dict or {})
3309
3310 def run_pp(self, pp, infodict):
3311 files_to_delete = []
3312 if '__files_to_move' not in infodict:
3313 infodict['__files_to_move'] = {}
3314 try:
3315 files_to_delete, infodict = pp.run(infodict)
3316 except PostProcessingError as e:
3317 # Must be True and not 'only_download'
3318 if self.params.get('ignoreerrors') is True:
3319 self.report_error(e)
3320 return infodict
3321 raise
3322
3323 if not files_to_delete:
3324 return infodict
3325 if self.params.get('keepvideo', False):
3326 for f in files_to_delete:
3327 infodict['__files_to_move'].setdefault(f, '')
3328 else:
3329 for old_filename in set(files_to_delete):
3330 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3331 try:
3332 os.remove(encodeFilename(old_filename))
3333 except (IOError, OSError):
3334 self.report_warning('Unable to remove downloaded original file')
3335 if old_filename in infodict['__files_to_move']:
3336 del infodict['__files_to_move'][old_filename]
3337 return infodict
3338
3339 def run_all_pps(self, key, info, *, additional_pps=None):
3340 self._forceprint(key, info)
3341 for pp in (additional_pps or []) + self._pps[key]:
3342 info = self.run_pp(pp, info)
3343 return info
3344
3345 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3346 info = dict(ie_info)
3347 info['__files_to_move'] = files_to_move or {}
3348 info = self.run_all_pps(key, info)
3349 return info, info.pop('__files_to_move', None)
3350
3351 def post_process(self, filename, info, files_to_move=None):
3352 """Run all the postprocessors on the given file."""
3353 info['filepath'] = filename
3354 info['__files_to_move'] = files_to_move or {}
3355 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3356 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3357 del info['__files_to_move']
3358 return self.run_all_pps('after_move', info)
3359
3360 def _make_archive_id(self, info_dict):
3361 video_id = info_dict.get('id')
3362 if not video_id:
3363 return
3364 # Future-proof against any change in case
3365 # and backwards compatibility with prior versions
3366 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3367 if extractor is None:
3368 url = str_or_none(info_dict.get('url'))
3369 if not url:
3370 return
3371 # Try to find matching extractor for the URL and take its ie_key
3372 for ie_key, ie in self._ies.items():
3373 if ie.suitable(url):
3374 extractor = ie_key
3375 break
3376 else:
3377 return
3378 return '%s %s' % (extractor.lower(), video_id)
3379
3380 def in_download_archive(self, info_dict):
3381 fn = self.params.get('download_archive')
3382 if fn is None:
3383 return False
3384
3385 vid_id = self._make_archive_id(info_dict)
3386 if not vid_id:
3387 return False # Incomplete video information
3388
3389 return vid_id in self.archive
3390
3391 def record_download_archive(self, info_dict):
3392 fn = self.params.get('download_archive')
3393 if fn is None:
3394 return
3395 vid_id = self._make_archive_id(info_dict)
3396 assert vid_id
3397 self.write_debug(f'Adding to archive: {vid_id}')
3398 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3399 archive_file.write(vid_id + '\n')
3400 self.archive.add(vid_id)
3401
3402 @staticmethod
3403 def format_resolution(format, default='unknown'):
3404 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3405 return 'audio only'
3406 if format.get('resolution') is not None:
3407 return format['resolution']
3408 if format.get('width') and format.get('height'):
3409 return '%dx%d' % (format['width'], format['height'])
3410 elif format.get('height'):
3411 return '%sp' % format['height']
3412 elif format.get('width'):
3413 return '%dx?' % format['width']
3414 return default
3415
3416 def _list_format_headers(self, *headers):
3417 if self.params.get('listformats_table', True) is not False:
3418 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3419 return headers
3420
3421 def _format_note(self, fdict):
3422 res = ''
3423 if fdict.get('ext') in ['f4f', 'f4m']:
3424 res += '(unsupported)'
3425 if fdict.get('language'):
3426 if res:
3427 res += ' '
3428 res += '[%s]' % fdict['language']
3429 if fdict.get('format_note') is not None:
3430 if res:
3431 res += ' '
3432 res += fdict['format_note']
3433 if fdict.get('tbr') is not None:
3434 if res:
3435 res += ', '
3436 res += '%4dk' % fdict['tbr']
3437 if fdict.get('container') is not None:
3438 if res:
3439 res += ', '
3440 res += '%s container' % fdict['container']
3441 if (fdict.get('vcodec') is not None
3442 and fdict.get('vcodec') != 'none'):
3443 if res:
3444 res += ', '
3445 res += fdict['vcodec']
3446 if fdict.get('vbr') is not None:
3447 res += '@'
3448 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3449 res += 'video@'
3450 if fdict.get('vbr') is not None:
3451 res += '%4dk' % fdict['vbr']
3452 if fdict.get('fps') is not None:
3453 if res:
3454 res += ', '
3455 res += '%sfps' % fdict['fps']
3456 if fdict.get('acodec') is not None:
3457 if res:
3458 res += ', '
3459 if fdict['acodec'] == 'none':
3460 res += 'video only'
3461 else:
3462 res += '%-5s' % fdict['acodec']
3463 elif fdict.get('abr') is not None:
3464 if res:
3465 res += ', '
3466 res += 'audio'
3467 if fdict.get('abr') is not None:
3468 res += '@%3dk' % fdict['abr']
3469 if fdict.get('asr') is not None:
3470 res += ' (%5dHz)' % fdict['asr']
3471 if fdict.get('filesize') is not None:
3472 if res:
3473 res += ', '
3474 res += format_bytes(fdict['filesize'])
3475 elif fdict.get('filesize_approx') is not None:
3476 if res:
3477 res += ', '
3478 res += '~' + format_bytes(fdict['filesize_approx'])
3479 return res
3480
3481 def render_formats_table(self, info_dict):
3482 if not info_dict.get('formats') and not info_dict.get('url'):
3483 return None
3484
3485 formats = info_dict.get('formats', [info_dict])
3486 if not self.params.get('listformats_table', True) is not False:
3487 table = [
3488 [
3489 format_field(f, 'format_id'),
3490 format_field(f, 'ext'),
3491 self.format_resolution(f),
3492 self._format_note(f)
3493 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3494 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3495
3496 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3497 table = [
3498 [
3499 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3500 format_field(f, 'ext'),
3501 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3502 format_field(f, 'fps', '\t%d'),
3503 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3504 delim,
3505 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3506 format_field(f, 'tbr', '\t%dk'),
3507 shorten_protocol_name(f.get('protocol', '')),
3508 delim,
3509 format_field(f, 'vcodec', default='unknown').replace(
3510 'none', 'images' if f.get('acodec') == 'none'
3511 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3512 format_field(f, 'vbr', '\t%dk'),
3513 format_field(f, 'acodec', default='unknown').replace(
3514 'none', '' if f.get('vcodec') == 'none'
3515 else self._format_screen('video only', self.Styles.SUPPRESS)),
3516 format_field(f, 'abr', '\t%dk'),
3517 format_field(f, 'asr', '\t%dHz'),
3518 join_nonempty(
3519 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3520 format_field(f, 'language', '[%s]'),
3521 join_nonempty(format_field(f, 'format_note'),
3522 format_field(f, 'container', ignore=(None, f.get('ext'))),
3523 delim=', '),
3524 delim=' '),
3525 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3526 header_line = self._list_format_headers(
3527 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3528 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3529
3530 return render_table(
3531 header_line, table, hide_empty=True,
3532 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3533
3534 def render_thumbnails_table(self, info_dict):
3535 thumbnails = list(info_dict.get('thumbnails') or [])
3536 if not thumbnails:
3537 return None
3538 return render_table(
3539 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3540 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3541
3542 def render_subtitles_table(self, video_id, subtitles):
3543 def _row(lang, formats):
3544 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3545 if len(set(names)) == 1:
3546 names = [] if names[0] == 'unknown' else names[:1]
3547 return [lang, ', '.join(names), ', '.join(exts)]
3548
3549 if not subtitles:
3550 return None
3551 return render_table(
3552 self._list_format_headers('Language', 'Name', 'Formats'),
3553 [_row(lang, formats) for lang, formats in subtitles.items()],
3554 hide_empty=True)
3555
3556 def __list_table(self, video_id, name, func, *args):
3557 table = func(*args)
3558 if not table:
3559 self.to_screen(f'{video_id} has no {name}')
3560 return
3561 self.to_screen(f'[info] Available {name} for {video_id}:')
3562 self.to_stdout(table)
3563
3564 def list_formats(self, info_dict):
3565 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3566
3567 def list_thumbnails(self, info_dict):
3568 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3569
3570 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3571 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3572
3573 def urlopen(self, req):
3574 """ Start an HTTP download """
3575 if isinstance(req, compat_basestring):
3576 req = sanitized_Request(req)
3577 return self._opener.open(req, timeout=self._socket_timeout)
3578
3579 def print_debug_header(self):
3580 if not self.params.get('verbose'):
3581 return
3582
3583 def get_encoding(stream):
3584 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3585 if not supports_terminal_sequences(stream):
3586 from .compat import WINDOWS_VT_MODE
3587 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3588 return ret
3589
3590 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3591 locale.getpreferredencoding(),
3592 sys.getfilesystemencoding(),
3593 get_encoding(self._screen_file), get_encoding(self._err_file),
3594 self.get_encoding())
3595
3596 logger = self.params.get('logger')
3597 if logger:
3598 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3599 write_debug(encoding_str)
3600 else:
3601 write_string(f'[debug] {encoding_str}\n', encoding=None)
3602 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3603
3604 source = detect_variant()
3605 write_debug(join_nonempty(
3606 'yt-dlp version', __version__,
3607 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3608 '' if source == 'unknown' else f'({source})',
3609 delim=' '))
3610 if not _LAZY_LOADER:
3611 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3612 write_debug('Lazy loading extractors is forcibly disabled')
3613 else:
3614 write_debug('Lazy loading extractors is disabled')
3615 if plugin_extractors or plugin_postprocessors:
3616 write_debug('Plugins: %s' % [
3617 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3618 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3619 if self.params.get('compat_opts'):
3620 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3621
3622 if source == 'source':
3623 try:
3624 sp = Popen(
3625 ['git', 'rev-parse', '--short', 'HEAD'],
3626 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3627 cwd=os.path.dirname(os.path.abspath(__file__)))
3628 out, err = sp.communicate_or_kill()
3629 out = out.decode().strip()
3630 if re.match('[0-9a-f]+', out):
3631 write_debug('Git HEAD: %s' % out)
3632 except Exception:
3633 try:
3634 sys.exc_clear()
3635 except Exception:
3636 pass
3637
3638 def python_implementation():
3639 impl_name = platform.python_implementation()
3640 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3641 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3642 return impl_name
3643
3644 write_debug('Python version %s (%s %s) - %s' % (
3645 platform.python_version(),
3646 python_implementation(),
3647 platform.architecture()[0],
3648 platform_name()))
3649
3650 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3651 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3652 if ffmpeg_features:
3653 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3654
3655 exe_versions['rtmpdump'] = rtmpdump_version()
3656 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3657 exe_str = ', '.join(
3658 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3659 ) or 'none'
3660 write_debug('exe versions: %s' % exe_str)
3661
3662 from .downloader.websocket import has_websockets
3663 from .postprocessor.embedthumbnail import has_mutagen
3664 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3665
3666 lib_str = join_nonempty(
3667 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3668 SECRETSTORAGE_AVAILABLE and 'secretstorage',
3669 has_mutagen and 'mutagen',
3670 SQLITE_AVAILABLE and 'sqlite',
3671 has_websockets and 'websockets',
3672 delim=', ') or 'none'
3673 write_debug('Optional libraries: %s' % lib_str)
3674
3675 proxy_map = {}
3676 for handler in self._opener.handlers:
3677 if hasattr(handler, 'proxies'):
3678 proxy_map.update(handler.proxies)
3679 write_debug(f'Proxy map: {proxy_map}')
3680
3681 # Not implemented
3682 if False and self.params.get('call_home'):
3683 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3684 write_debug('Public IP address: %s' % ipaddr)
3685 latest_version = self.urlopen(
3686 'https://yt-dl.org/latest/version').read().decode('utf-8')
3687 if version_tuple(latest_version) > version_tuple(__version__):
3688 self.report_warning(
3689 'You are using an outdated version (newest version: %s)! '
3690 'See https://yt-dl.org/update if you need help updating.' %
3691 latest_version)
3692
3693 def _setup_opener(self):
3694 timeout_val = self.params.get('socket_timeout')
3695 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3696
3697 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3698 opts_cookiefile = self.params.get('cookiefile')
3699 opts_proxy = self.params.get('proxy')
3700
3701 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3702
3703 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3704 if opts_proxy is not None:
3705 if opts_proxy == '':
3706 proxies = {}
3707 else:
3708 proxies = {'http': opts_proxy, 'https': opts_proxy}
3709 else:
3710 proxies = compat_urllib_request.getproxies()
3711 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3712 if 'http' in proxies and 'https' not in proxies:
3713 proxies['https'] = proxies['http']
3714 proxy_handler = PerRequestProxyHandler(proxies)
3715
3716 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3717 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3718 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3719 redirect_handler = YoutubeDLRedirectHandler()
3720 data_handler = compat_urllib_request_DataHandler()
3721
3722 # When passing our own FileHandler instance, build_opener won't add the
3723 # default FileHandler and allows us to disable the file protocol, which
3724 # can be used for malicious purposes (see
3725 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3726 file_handler = compat_urllib_request.FileHandler()
3727
3728 def file_open(*args, **kwargs):
3729 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3730 file_handler.file_open = file_open
3731
3732 opener = compat_urllib_request.build_opener(
3733 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3734
3735 # Delete the default user-agent header, which would otherwise apply in
3736 # cases where our custom HTTP handler doesn't come into play
3737 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3738 opener.addheaders = []
3739 self._opener = opener
3740
3741 def encode(self, s):
3742 if isinstance(s, bytes):
3743 return s # Already encoded
3744
3745 try:
3746 return s.encode(self.get_encoding())
3747 except UnicodeEncodeError as err:
3748 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3749 raise
3750
3751 def get_encoding(self):
3752 encoding = self.params.get('encoding')
3753 if encoding is None:
3754 encoding = preferredencoding()
3755 return encoding
3756
3757 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3758 ''' Write infojson and returns True = written, False = skip, None = error '''
3759 if overwrite is None:
3760 overwrite = self.params.get('overwrites', True)
3761 if not self.params.get('writeinfojson'):
3762 return False
3763 elif not infofn:
3764 self.write_debug(f'Skipping writing {label} infojson')
3765 return False
3766 elif not self._ensure_dir_exists(infofn):
3767 return None
3768 elif not overwrite and os.path.exists(infofn):
3769 self.to_screen(f'[info] {label.title()} metadata is already present')
3770 else:
3771 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3772 try:
3773 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3774 except (OSError, IOError):
3775 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3776 return None
3777 return True
3778
3779 def _write_description(self, label, ie_result, descfn):
3780 ''' Write description and returns True = written, False = skip, None = error '''
3781 if not self.params.get('writedescription'):
3782 return False
3783 elif not descfn:
3784 self.write_debug(f'Skipping writing {label} description')
3785 return False
3786 elif not self._ensure_dir_exists(descfn):
3787 return None
3788 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3789 self.to_screen(f'[info] {label.title()} description is already present')
3790 elif ie_result.get('description') is None:
3791 self.report_warning(f'There\'s no {label} description to write')
3792 return False
3793 else:
3794 try:
3795 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3796 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3797 descfile.write(ie_result['description'])
3798 except (OSError, IOError):
3799 self.report_error(f'Cannot write {label} description file {descfn}')
3800 return None
3801 return True
3802
3803 def _write_subtitles(self, info_dict, filename):
3804 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3805 ret = []
3806 subtitles = info_dict.get('requested_subtitles')
3807 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3808 # subtitles download errors are already managed as troubles in relevant IE
3809 # that way it will silently go on when used with unsupporting IE
3810 return ret
3811
3812 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3813 if not sub_filename_base:
3814 self.to_screen('[info] Skipping writing video subtitles')
3815 return ret
3816 for sub_lang, sub_info in subtitles.items():
3817 sub_format = sub_info['ext']
3818 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3819 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3820 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3821 if existing_sub:
3822 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3823 sub_info['filepath'] = existing_sub
3824 ret.append((existing_sub, sub_filename_final))
3825 continue
3826
3827 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3828 if sub_info.get('data') is not None:
3829 try:
3830 # Use newline='' to prevent conversion of newline characters
3831 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3832 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3833 subfile.write(sub_info['data'])
3834 sub_info['filepath'] = sub_filename
3835 ret.append((sub_filename, sub_filename_final))
3836 continue
3837 except (OSError, IOError):
3838 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3839 return None
3840
3841 try:
3842 sub_copy = sub_info.copy()
3843 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3844 self.dl(sub_filename, sub_copy, subtitle=True)
3845 sub_info['filepath'] = sub_filename
3846 ret.append((sub_filename, sub_filename_final))
3847 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3848 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3849 raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3850 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3851 return ret
3852
3853 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3854 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3855 write_all = self.params.get('write_all_thumbnails', False)
3856 thumbnails, ret = [], []
3857 if write_all or self.params.get('writethumbnail', False):
3858 thumbnails = info_dict.get('thumbnails') or []
3859 multiple = write_all and len(thumbnails) > 1
3860
3861 if thumb_filename_base is None:
3862 thumb_filename_base = filename
3863 if thumbnails and not thumb_filename_base:
3864 self.write_debug(f'Skipping writing {label} thumbnail')
3865 return ret
3866
3867 for idx, t in list(enumerate(thumbnails))[::-1]:
3868 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3869 thumb_display_id = f'{label} thumbnail {t["id"]}'
3870 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3871 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3872
3873 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3874 if existing_thumb:
3875 self.to_screen('[info] %s is already present' % (
3876 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3877 t['filepath'] = existing_thumb
3878 ret.append((existing_thumb, thumb_filename_final))
3879 else:
3880 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3881 try:
3882 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3883 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3884 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3885 shutil.copyfileobj(uf, thumbf)
3886 ret.append((thumb_filename, thumb_filename_final))
3887 t['filepath'] = thumb_filename
3888 except network_exceptions as err:
3889 thumbnails.pop(idx)
3890 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3891 if ret and not write_all:
3892 break
3893 return ret