]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
Obey `--abort-on-error` for "ffmpeg not installed"
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import functools
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28 import unicodedata
29
30 from enum import Enum
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DownloadCancelled,
59 DownloadError,
60 encode_compat_str,
61 encodeFilename,
62 EntryNotInPlaylist,
63 error_to_compat_str,
64 ExistingVideoReached,
65 expand_path,
66 ExtractorError,
67 float_or_none,
68 format_bytes,
69 format_field,
70 format_decimal_suffix,
71 formatSeconds,
72 GeoRestrictedError,
73 get_domain,
74 HEADRequest,
75 InAdvancePagedList,
76 int_or_none,
77 iri_to_uri,
78 ISO3166Utils,
79 join_nonempty,
80 LazyList,
81 LINK_TEMPLATES,
82 locked_file,
83 make_dir,
84 make_HTTPS_handler,
85 MaxDownloadsReached,
86 network_exceptions,
87 number_of_digits,
88 orderedSet,
89 OUTTMPL_TYPES,
90 PagedList,
91 parse_filesize,
92 PerRequestProxyHandler,
93 platform_name,
94 Popen,
95 POSTPROCESS_WHEN,
96 PostProcessingError,
97 preferredencoding,
98 prepend_extension,
99 ReExtractInfo,
100 register_socks_protocols,
101 RejectedVideoReached,
102 remove_terminal_sequences,
103 render_table,
104 replace_extension,
105 SameFileError,
106 sanitize_filename,
107 sanitize_path,
108 sanitize_url,
109 sanitized_Request,
110 std_headers,
111 STR_FORMAT_RE_TMPL,
112 STR_FORMAT_TYPES,
113 str_or_none,
114 strftime_or_none,
115 subtitles_filename,
116 supports_terminal_sequences,
117 timetuple_from_msec,
118 to_high_limit_path,
119 traverse_obj,
120 try_get,
121 UnavailableVideoError,
122 url_basename,
123 variadic,
124 version_tuple,
125 write_json_file,
126 write_string,
127 YoutubeDLCookieProcessor,
128 YoutubeDLHandler,
129 YoutubeDLRedirectHandler,
130 )
131 from .cache import Cache
132 from .minicurses import format_text
133 from .extractor import (
134 gen_extractor_classes,
135 get_info_extractor,
136 _LAZY_LOADER,
137 _PLUGIN_CLASSES as plugin_extractors
138 )
139 from .extractor.openload import PhantomJSwrapper
140 from .downloader import (
141 FFmpegFD,
142 get_suitable_downloader,
143 shorten_protocol_name
144 )
145 from .downloader.rtmp import rtmpdump_version
146 from .postprocessor import (
147 get_postprocessor,
148 EmbedThumbnailPP,
149 FFmpegFixupDuplicateMoovPP,
150 FFmpegFixupDurationPP,
151 FFmpegFixupM3u8PP,
152 FFmpegFixupM4aPP,
153 FFmpegFixupStretchedPP,
154 FFmpegFixupTimestampPP,
155 FFmpegMergerPP,
156 FFmpegPostProcessor,
157 MoveFilesAfterDownloadPP,
158 _PLUGIN_CLASSES as plugin_postprocessors
159 )
160 from .update import detect_variant
161 from .version import __version__, RELEASE_GIT_HEAD
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL(object):
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 verbose: Print additional info to stdout.
202 quiet: Do not print messages to stdout.
203 no_warnings: Do not print out anything for warnings.
204 forceprint: A dict with keys WHEN mapped to a list of templates to
205 print to stdout. The allowed keys are video or any of the
206 items in utils.POSTPROCESS_WHEN.
207 For compatibility, a single list is also accepted
208 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
209 a list of tuples with (template, filename)
210 forceurl: Force printing final URL. (Deprecated)
211 forcetitle: Force printing title. (Deprecated)
212 forceid: Force printing ID. (Deprecated)
213 forcethumbnail: Force printing thumbnail URL. (Deprecated)
214 forcedescription: Force printing description. (Deprecated)
215 forcefilename: Force printing final filename. (Deprecated)
216 forceduration: Force printing duration. (Deprecated)
217 forcejson: Force printing info_dict as JSON.
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
224 format: Video format code. see "FORMAT SELECTION" for more details.
225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
234 format_sort_force: Force the given format_sort. see "Sorting Formats"
235 for more details.
236 allow_multiple_video_streams: Allow multiple video streams to be merged
237 into a single file
238 allow_multiple_audio_streams: Allow multiple audio streams to be merged
239 into a single file
240 check_formats Whether to test if the formats are downloadable.
241 Can be True (check all), False (check none),
242 'selected' (check selected formats),
243 or None (check only if requested by extractor)
244 paths: Dictionary of output paths. The allowed keys are 'home'
245 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
246 outtmpl: Dictionary of templates for output names. Allowed keys
247 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
248 For compatibility with youtube-dl, a single string can also be used
249 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
250 restrictfilenames: Do not allow "&" and spaces in file names
251 trim_file_name: Limit length of filename (extension excluded)
252 windowsfilenames: Force the filenames to be windows compatible
253 ignoreerrors: Do not stop on download/postprocessing errors.
254 Can be 'only_download' to ignore only download errors.
255 Default is 'only_download' for CLI, but False for API
256 skip_playlist_after_errors: Number of allowed failures until the rest of
257 the playlist is skipped
258 force_generic_extractor: Force downloader to use the generic extractor
259 overwrites: Overwrite all video and metadata files if True,
260 overwrite only non-video files if None
261 and don't overwrite any file if False
262 For compatibility with youtube-dl,
263 "nooverwrites" may also be used instead
264 playliststart: Playlist item to start at.
265 playlistend: Playlist item to end at.
266 playlist_items: Specific indices of playlist to download.
267 playlistreverse: Download playlist items in reverse order.
268 playlistrandom: Download playlist items in random order.
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
271 logger: Log messages to a logging.Logger instance.
272 logtostderr: Log messages to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
276 clean_infojson: Remove private fields from the infojson
277 getcomments: Extract video comments. This will not be written to disk
278 unless writeinfojson is also given
279 writeannotations: Write the video annotations to a .annotations.xml file
280 writethumbnail: Write the thumbnail image to a file
281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
283 write_all_thumbnails: Write all thumbnail formats to files
284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
289 writesubtitles: Write the video subtitles to a file
290 writeautomaticsub: Write the automatically generated subtitles to a file
291 allsubtitles: Deprecated - Use subtitleslangs = ['all']
292 Downloads all the subtitles of the video
293 (requires writesubtitles or writeautomaticsub)
294 listsubtitles: Lists all available subtitles for the video
295 subtitlesformat: The format code for subtitles
296 subtitleslangs: List of languages of the subtitles to download (can be regex).
297 The list may contain "all" to refer to all the available
298 subtitles. The language can be prefixed with a "-" to
299 exclude it from the requested languages. Eg: ['all', '-live_chat']
300 keepvideo: Keep the video file after post-processing
301 daterange: A DateRange object, download only if the upload_date is in the range.
302 skip_download: Skip the actual download of the video file
303 cachedir: Location of the cache files in the filesystem.
304 False to disable filesystem cache.
305 noplaylist: Download single video instead of a playlist if in doubt.
306 age_limit: An integer representing the user's age in years.
307 Unsuitable videos for the given age are skipped.
308 min_views: An integer representing the minimum view count the video
309 must have in order to not be skipped.
310 Videos without view count information are always
311 downloaded. None for no limit.
312 max_views: An integer representing the maximum view count.
313 Videos that are more popular than that are not
314 downloaded.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 download_archive: File name of a file where all downloads are recorded.
318 Videos already present in the file are not downloaded
319 again.
320 break_on_existing: Stop the download process after attempting to download a
321 file that is in the archive.
322 break_on_reject: Stop the download process when encountering a video that
323 has been filtered out.
324 break_per_url: Whether break_on_reject and break_on_existing
325 should act on each input URL as opposed to for the entire queue
326 cookiefile: File name where cookies should be read from and dumped to
327 cookiesfrombrowser: A tuple containing the name of the browser, the profile
328 name/pathfrom where cookies are loaded, and the name of the
329 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
330 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
331 support RFC 5746 secure renegotiation
332 nocheckcertificate: Do not verify SSL certificates
333 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
334 At the moment, this is only supported by YouTube.
335 proxy: URL of the proxy server to use
336 geo_verification_proxy: URL of the proxy to use for IP address verification
337 on geo-restricted sites.
338 socket_timeout: Time to wait for unresponsive hosts, in seconds
339 bidi_workaround: Work around buggy terminals without bidirectional text
340 support, using fridibi
341 debug_printtraffic:Print out sent and received HTTP traffic
342 include_ads: Download ads as well (deprecated)
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
345 encoding: Use this encoding instead of the system-specified.
346 extract_flat: Do not resolve URLs, return the immediate result.
347 Pass in 'in_playlist' to only show this behavior for
348 playlist items.
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 post_hooks: Deprecated - Register a custom postprocessor instead
359 A list of functions that get called as the final step
360 for each video file, after all postprocessors have been
361 called. The filename will be passed as the only argument.
362 progress_hooks: A list of functions that get called on download
363 progress, with a dictionary with the entries
364 * status: One of "downloading", "error", or "finished".
365 Check this first and ignore unknown values.
366 * info_dict: The extracted info_dict
367
368 If status is one of "downloading", or "finished", the
369 following properties may also be present:
370 * filename: The final filename (always present)
371 * tmpfilename: The filename we're currently writing to
372 * downloaded_bytes: Bytes on disk
373 * total_bytes: Size of the whole file, None if unknown
374 * total_bytes_estimate: Guess of the eventual file size,
375 None if unavailable.
376 * elapsed: The number of seconds since download started.
377 * eta: The estimated time in seconds, None if unknown
378 * speed: The download speed in bytes/second, None if
379 unknown
380 * fragment_index: The counter of the currently
381 downloaded video fragment.
382 * fragment_count: The number of fragments (= individual
383 files that will be merged)
384
385 Progress hooks are guaranteed to be called at least once
386 (with status "finished") if the download is successful.
387 postprocessor_hooks: A list of functions that get called on postprocessing
388 progress, with a dictionary with the entries
389 * status: One of "started", "processing", or "finished".
390 Check this first and ignore unknown values.
391 * postprocessor: Name of the postprocessor
392 * info_dict: The extracted info_dict
393
394 Progress hooks are guaranteed to be called at least twice
395 (with status "started" and "finished") if the processing is successful.
396 merge_output_format: Extension to use when merging formats.
397 final_ext: Expected final extension; used to detect when the file was
398 already downloaded and converted
399 fixup: Automatically correct known faults of the file.
400 One of:
401 - "never": do nothing
402 - "warn": only emit a warning
403 - "detect_or_warn": check whether we can do anything
404 about it, warn otherwise (default)
405 source_address: Client-side IP address to bind to.
406 call_home: Boolean, true iff we are allowed to contact the
407 yt-dlp servers for debugging. (BROKEN)
408 sleep_interval_requests: Number of seconds to sleep between requests
409 during extraction
410 sleep_interval: Number of seconds to sleep before each download when
411 used alone or a lower bound of a range for randomized
412 sleep before each download (minimum possible number
413 of seconds to sleep) when used along with
414 max_sleep_interval.
415 max_sleep_interval:Upper bound of a range for randomized sleep before each
416 download (maximum possible number of seconds to sleep).
417 Must only be used along with sleep_interval.
418 Actual sleep time will be a random float from range
419 [sleep_interval; max_sleep_interval].
420 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
421 listformats: Print an overview of available video formats and exit.
422 list_thumbnails: Print a table of all thumbnails and exit.
423 match_filter: A function that gets called with the info_dict of
424 every video.
425 If it returns a message, the video is ignored.
426 If it returns None, the video is downloaded.
427 match_filter_func in utils.py is one example for this.
428 no_color: Do not emit color codes in output.
429 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
430 HTTP header
431 geo_bypass_country:
432 Two-letter ISO 3166-2 country code that will be used for
433 explicit geographic restriction bypassing via faking
434 X-Forwarded-For HTTP header
435 geo_bypass_ip_block:
436 IP range in CIDR notation that will be used similarly to
437 geo_bypass_country
438
439 The following options determine which downloader is picked:
440 external_downloader: A dictionary of protocol keys and the executable of the
441 external downloader to use for it. The allowed protocols
442 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
443 Set the value to 'native' to use the native downloader
444 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
445 or {'m3u8': 'ffmpeg'} instead.
446 Use the native HLS downloader instead of ffmpeg/avconv
447 if True, otherwise use ffmpeg/avconv if False, otherwise
448 use downloader suggested by extractor if None.
449 compat_opts: Compatibility options. See "Differences in default behavior".
450 The following options do not work when used through the API:
451 filename, abort-on-error, multistreams, no-live-chat, format-sort
452 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
453 Refer __init__.py for their implementation
454 progress_template: Dictionary of templates for progress outputs.
455 Allowed keys are 'download', 'postprocess',
456 'download-title' (console title) and 'postprocess-title'.
457 The template is mapped on a dictionary with keys 'progress' and 'info'
458
459 The following parameters are not used by YoutubeDL itself, they are used by
460 the downloader (see yt_dlp/downloader/common.py):
461 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
462 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
463 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
464 external_downloader_args, concurrent_fragment_downloads.
465
466 The following options are used by the post processors:
467 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
468 otherwise prefer ffmpeg. (avconv support is deprecated)
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
477 can also be used
478
479 The following options are used by the extractors:
480 extractor_retries: Number of times to retry for known errors
481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
482 hls_split_discontinuity: Split HLS playlists to different formats at
483 discontinuities such as ad breaks (default: False)
484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
486 Eg: {'youtube': {'skip': ['dash', 'hls']}}
487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
488 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
489 If True (default), DASH manifests and related
490 data will be downloaded and processed by extractor.
491 You can reduce network I/O by disabling it if you don't
492 care about DASH. (only for youtube)
493 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
494 If True (default), HLS manifests and related
495 data will be downloaded and processed by extractor.
496 You can reduce network I/O by disabling it if you don't
497 care about HLS. (only for youtube)
498 """
499
500 _NUMERIC_FIELDS = set((
501 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
502 'timestamp', 'release_timestamp',
503 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
504 'average_rating', 'comment_count', 'age_limit',
505 'start_time', 'end_time',
506 'chapter_number', 'season_number', 'episode_number',
507 'track_number', 'disc_number', 'release_year',
508 ))
509
510 _format_selection_exts = {
511 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
512 'video': {'mp4', 'flv', 'webm', '3gp'},
513 'storyboards': {'mhtml'},
514 }
515
516 params = None
517 _ies = {}
518 _pps = {k: [] for k in POSTPROCESS_WHEN}
519 _printed_messages = set()
520 _first_webpage_request = True
521 _download_retcode = None
522 _num_downloads = None
523 _playlist_level = 0
524 _playlist_urls = set()
525 _screen_file = None
526
527 def __init__(self, params=None, auto_init=True):
528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
530 Set to 'no_verbose_header' to not print the header
531 """
532 if params is None:
533 params = {}
534 self._ies = {}
535 self._ies_instances = {}
536 self._pps = {k: [] for k in POSTPROCESS_WHEN}
537 self._printed_messages = set()
538 self._first_webpage_request = True
539 self._post_hooks = []
540 self._progress_hooks = []
541 self._postprocessor_hooks = []
542 self._download_retcode = 0
543 self._num_downloads = 0
544 self._num_videos = 0
545 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
546 self._err_file = sys.stderr
547 self.params = params
548 self.cache = Cache(self)
549
550 windows_enable_vt_mode()
551 self._allow_colors = {
552 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
553 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
554 }
555
556 if sys.version_info < (3, 6):
557 self.report_warning(
558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
559
560 if self.params.get('allow_unplayable_formats'):
561 self.report_warning(
562 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
565 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
566
567 def check_deprecated(param, option, suggestion):
568 if self.params.get(param) is not None:
569 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
570 return True
571 return False
572
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
574 if self.params.get('geo_verification_proxy') is None:
575 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
576
577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
580
581 for msg in self.params.get('_warnings', []):
582 self.report_warning(msg)
583 for msg in self.params.get('_deprecation_warnings', []):
584 self.deprecation_warning(msg)
585
586 if 'list-formats' in self.params.get('compat_opts', []):
587 self.params['listformats_table'] = False
588
589 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self.params['overwrites'] = not self.params['nooverwrites']
594 elif self.params.get('overwrites') is None:
595 self.params.pop('overwrites', None)
596 else:
597 self.params['nooverwrites'] = not self.params['overwrites']
598
599 self.params.setdefault('forceprint', {})
600 self.params.setdefault('print_to_file', {})
601
602 # Compatibility with older syntax
603 if not isinstance(params['forceprint'], dict):
604 self.params['forceprint'] = {'video': params['forceprint']}
605
606 if self.params.get('bidi_workaround', False):
607 try:
608 import pty
609 master, slave = pty.openpty()
610 width = compat_get_terminal_size().columns
611 if width is None:
612 width_args = []
613 else:
614 width_args = ['-w', str(width)]
615 sp_kwargs = dict(
616 stdin=subprocess.PIPE,
617 stdout=slave,
618 stderr=self._err_file)
619 try:
620 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
621 except OSError:
622 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
623 self._output_channel = os.fdopen(master, 'rb')
624 except OSError as ose:
625 if ose.errno == errno.ENOENT:
626 self.report_warning(
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
629 else:
630 raise
631
632 if (sys.platform != 'win32'
633 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
634 and not self.params.get('restrictfilenames', False)):
635 # Unicode filesystem API will throw errors (#1474, #13027)
636 self.report_warning(
637 'Assuming --restrict-filenames since file system encoding '
638 'cannot encode all characters. '
639 'Set the LC_ALL environment variable to fix this.')
640 self.params['restrictfilenames'] = True
641
642 self.outtmpl_dict = self.parse_outtmpl()
643
644 # Creating format selector here allows us to catch syntax errors before the extraction
645 self.format_selector = (
646 self.params.get('format') if self.params.get('format') in (None, '-')
647 else self.params['format'] if callable(self.params['format'])
648 else self.build_format_selector(self.params['format']))
649
650 self._setup_opener()
651
652 if auto_init:
653 if auto_init != 'no_verbose_header':
654 self.print_debug_header()
655 self.add_default_info_extractors()
656
657 hooks = {
658 'post_hooks': self.add_post_hook,
659 'progress_hooks': self.add_progress_hook,
660 'postprocessor_hooks': self.add_postprocessor_hook,
661 }
662 for opt, fn in hooks.items():
663 for ph in self.params.get(opt, []):
664 fn(ph)
665
666 for pp_def_raw in self.params.get('postprocessors', []):
667 pp_def = dict(pp_def_raw)
668 when = pp_def.pop('when', 'post_process')
669 self.add_post_processor(
670 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
671 when=when)
672
673 register_socks_protocols()
674
675 def preload_download_archive(fn):
676 """Preload the archive, if any is specified"""
677 if fn is None:
678 return False
679 self.write_debug(f'Loading archive file {fn!r}')
680 try:
681 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
682 for line in archive_file:
683 self.archive.add(line.strip())
684 except IOError as ioe:
685 if ioe.errno != errno.ENOENT:
686 raise
687 return False
688 return True
689
690 self.archive = set()
691 preload_download_archive(self.params.get('download_archive'))
692
693 def warn_if_short_id(self, argv):
694 # short YouTube ID starting with dash?
695 idxs = [
696 i for i, a in enumerate(argv)
697 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
698 if idxs:
699 correct_argv = (
700 ['yt-dlp']
701 + [a for i, a in enumerate(argv) if i not in idxs]
702 + ['--'] + [argv[i] for i in idxs]
703 )
704 self.report_warning(
705 'Long argument string detected. '
706 'Use -- to separate parameters and URLs, like this:\n%s' %
707 args_to_str(correct_argv))
708
709 def add_info_extractor(self, ie):
710 """Add an InfoExtractor object to the end of the list."""
711 ie_key = ie.ie_key()
712 self._ies[ie_key] = ie
713 if not isinstance(ie, type):
714 self._ies_instances[ie_key] = ie
715 ie.set_downloader(self)
716
717 def _get_info_extractor_class(self, ie_key):
718 ie = self._ies.get(ie_key)
719 if ie is None:
720 ie = get_info_extractor(ie_key)
721 self.add_info_extractor(ie)
722 return ie
723
724 def get_info_extractor(self, ie_key):
725 """
726 Get an instance of an IE with name ie_key, it will try to get one from
727 the _ies list, if there's no instance it will create a new one and add
728 it to the extractor list.
729 """
730 ie = self._ies_instances.get(ie_key)
731 if ie is None:
732 ie = get_info_extractor(ie_key)()
733 self.add_info_extractor(ie)
734 return ie
735
736 def add_default_info_extractors(self):
737 """
738 Add the InfoExtractors returned by gen_extractors to the end of the list
739 """
740 for ie in gen_extractor_classes():
741 self.add_info_extractor(ie)
742
743 def add_post_processor(self, pp, when='post_process'):
744 """Add a PostProcessor object to the end of the chain."""
745 self._pps[when].append(pp)
746 pp.set_downloader(self)
747
748 def add_post_hook(self, ph):
749 """Add the post hook"""
750 self._post_hooks.append(ph)
751
752 def add_progress_hook(self, ph):
753 """Add the download progress hook"""
754 self._progress_hooks.append(ph)
755
756 def add_postprocessor_hook(self, ph):
757 """Add the postprocessing progress hook"""
758 self._postprocessor_hooks.append(ph)
759 for pps in self._pps.values():
760 for pp in pps:
761 pp.add_progress_hook(ph)
762
763 def _bidi_workaround(self, message):
764 if not hasattr(self, '_output_channel'):
765 return message
766
767 assert hasattr(self, '_output_process')
768 assert isinstance(message, compat_str)
769 line_count = message.count('\n') + 1
770 self._output_process.stdin.write((message + '\n').encode('utf-8'))
771 self._output_process.stdin.flush()
772 res = ''.join(self._output_channel.readline().decode('utf-8')
773 for _ in range(line_count))
774 return res[:-len('\n')]
775
776 def _write_string(self, message, out=None, only_once=False):
777 if only_once:
778 if message in self._printed_messages:
779 return
780 self._printed_messages.add(message)
781 write_string(message, out=out, encoding=self.params.get('encoding'))
782
783 def to_stdout(self, message, skip_eol=False, quiet=False):
784 """Print message to stdout"""
785 if self.params.get('logger'):
786 self.params['logger'].debug(message)
787 elif not quiet or self.params.get('verbose'):
788 self._write_string(
789 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
790 self._err_file if quiet else self._screen_file)
791
792 def to_stderr(self, message, only_once=False):
793 """Print message to stderr"""
794 assert isinstance(message, compat_str)
795 if self.params.get('logger'):
796 self.params['logger'].error(message)
797 else:
798 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
799
800 def to_console_title(self, message):
801 if not self.params.get('consoletitle', False):
802 return
803 message = remove_terminal_sequences(message)
804 if compat_os_name == 'nt':
805 if ctypes.windll.kernel32.GetConsoleWindow():
806 # c_wchar_p() might not be necessary if `message` is
807 # already of type unicode()
808 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
809 elif 'TERM' in os.environ:
810 self._write_string('\033]0;%s\007' % message, self._screen_file)
811
812 def save_console_title(self):
813 if not self.params.get('consoletitle', False):
814 return
815 if self.params.get('simulate'):
816 return
817 if compat_os_name != 'nt' and 'TERM' in os.environ:
818 # Save the title on stack
819 self._write_string('\033[22;0t', self._screen_file)
820
821 def restore_console_title(self):
822 if not self.params.get('consoletitle', False):
823 return
824 if self.params.get('simulate'):
825 return
826 if compat_os_name != 'nt' and 'TERM' in os.environ:
827 # Restore the title from stack
828 self._write_string('\033[23;0t', self._screen_file)
829
830 def __enter__(self):
831 self.save_console_title()
832 return self
833
834 def __exit__(self, *args):
835 self.restore_console_title()
836
837 if self.params.get('cookiefile') is not None:
838 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
839
840 def trouble(self, message=None, tb=None, is_error=True):
841 """Determine action to take when a download problem appears.
842
843 Depending on if the downloader has been configured to ignore
844 download errors or not, this method may throw an exception or
845 not when errors are found, after printing the message.
846
847 @param tb If given, is additional traceback information
848 @param is_error Whether to raise error according to ignorerrors
849 """
850 if message is not None:
851 self.to_stderr(message)
852 if self.params.get('verbose'):
853 if tb is None:
854 if sys.exc_info()[0]: # if .trouble has been called from an except block
855 tb = ''
856 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
857 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
858 tb += encode_compat_str(traceback.format_exc())
859 else:
860 tb_data = traceback.format_list(traceback.extract_stack())
861 tb = ''.join(tb_data)
862 if tb:
863 self.to_stderr(tb)
864 if not is_error:
865 return
866 if not self.params.get('ignoreerrors'):
867 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
868 exc_info = sys.exc_info()[1].exc_info
869 else:
870 exc_info = sys.exc_info()
871 raise DownloadError(message, exc_info)
872 self._download_retcode = 1
873
874 def to_screen(self, message, skip_eol=False):
875 """Print message to stdout if not in quiet mode"""
876 self.to_stdout(
877 message, skip_eol, quiet=self.params.get('quiet', False))
878
879 class Styles(Enum):
880 HEADERS = 'yellow'
881 EMPHASIS = 'light blue'
882 ID = 'green'
883 DELIM = 'blue'
884 ERROR = 'red'
885 WARNING = 'yellow'
886 SUPPRESS = 'light black'
887
888 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
889 if test_encoding:
890 original_text = text
891 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
892 text = text.encode(encoding, 'ignore').decode(encoding)
893 if fallback is not None and text != original_text:
894 text = fallback
895 if isinstance(f, self.Styles):
896 f = f.value
897 return format_text(text, f) if allow_colors else text if fallback is None else fallback
898
899 def _format_screen(self, *args, **kwargs):
900 return self._format_text(
901 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
902
903 def _format_err(self, *args, **kwargs):
904 return self._format_text(
905 self._err_file, self._allow_colors['err'], *args, **kwargs)
906
907 def report_warning(self, message, only_once=False):
908 '''
909 Print the message to stderr, it will be prefixed with 'WARNING:'
910 If stderr is a tty file the 'WARNING:' will be colored
911 '''
912 if self.params.get('logger') is not None:
913 self.params['logger'].warning(message)
914 else:
915 if self.params.get('no_warnings'):
916 return
917 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
918
919 def deprecation_warning(self, message):
920 if self.params.get('logger') is not None:
921 self.params['logger'].warning('DeprecationWarning: {message}')
922 else:
923 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
924
925 def report_error(self, message, *args, **kwargs):
926 '''
927 Do the same as trouble, but prefixes the message with 'ERROR:', colored
928 in red if stderr is a tty file.
929 '''
930 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
931
932 def write_debug(self, message, only_once=False):
933 '''Log debug message or Print message to stderr'''
934 if not self.params.get('verbose', False):
935 return
936 message = '[debug] %s' % message
937 if self.params.get('logger'):
938 self.params['logger'].debug(message)
939 else:
940 self.to_stderr(message, only_once)
941
942 def report_file_already_downloaded(self, file_name):
943 """Report file has already been fully downloaded."""
944 try:
945 self.to_screen('[download] %s has already been downloaded' % file_name)
946 except UnicodeEncodeError:
947 self.to_screen('[download] The file has already been downloaded')
948
949 def report_file_delete(self, file_name):
950 """Report that existing file will be deleted."""
951 try:
952 self.to_screen('Deleting existing file %s' % file_name)
953 except UnicodeEncodeError:
954 self.to_screen('Deleting existing file')
955
956 def raise_no_formats(self, info, forced=False):
957 has_drm = info.get('__has_drm')
958 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
959 expected = self.params.get('ignore_no_formats_error')
960 if forced or not expected:
961 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
962 expected=has_drm or expected)
963 else:
964 self.report_warning(msg)
965
966 def parse_outtmpl(self):
967 outtmpl_dict = self.params.get('outtmpl', {})
968 if not isinstance(outtmpl_dict, dict):
969 outtmpl_dict = {'default': outtmpl_dict}
970 # Remove spaces in the default template
971 if self.params.get('restrictfilenames'):
972 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
973 else:
974 sanitize = lambda x: x
975 outtmpl_dict.update({
976 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
977 if outtmpl_dict.get(k) is None})
978 for key, val in outtmpl_dict.items():
979 if isinstance(val, bytes):
980 self.report_warning(
981 'Parameter outtmpl is bytes, but should be a unicode string. '
982 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
983 return outtmpl_dict
984
985 def get_output_path(self, dir_type='', filename=None):
986 paths = self.params.get('paths', {})
987 assert isinstance(paths, dict)
988 path = os.path.join(
989 expand_path(paths.get('home', '').strip()),
990 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
991 filename or '')
992
993 # Temporary fix for #4787
994 # 'Treat' all problem characters by passing filename through preferredencoding
995 # to workaround encoding issues with subprocess on python2 @ Windows
996 if sys.version_info < (3, 0) and sys.platform == 'win32':
997 path = encodeFilename(path, True).decode(preferredencoding())
998 return sanitize_path(path, force=self.params.get('windowsfilenames'))
999
1000 @staticmethod
1001 def _outtmpl_expandpath(outtmpl):
1002 # expand_path translates '%%' into '%' and '$$' into '$'
1003 # correspondingly that is not what we want since we need to keep
1004 # '%%' intact for template dict substitution step. Working around
1005 # with boundary-alike separator hack.
1006 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009 # outtmpl should be expand_path'ed before template dict substitution
1010 # because meta fields may contain env variables we don't want to
1011 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013 return expand_path(outtmpl).replace(sep, '')
1014
1015 @staticmethod
1016 def escape_outtmpl(outtmpl):
1017 ''' Escape any remaining strings like %s, %abc% etc. '''
1018 return re.sub(
1019 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021 outtmpl)
1022
1023 @classmethod
1024 def validate_outtmpl(cls, outtmpl):
1025 ''' @return None or Exception object '''
1026 outtmpl = re.sub(
1027 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028 lambda mobj: f'{mobj.group(0)[:-1]}s',
1029 cls._outtmpl_expandpath(outtmpl))
1030 try:
1031 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032 return None
1033 except ValueError as err:
1034 return err
1035
1036 @staticmethod
1037 def _copy_infodict(info_dict):
1038 info_dict = dict(info_dict)
1039 for key in ('__original_infodict', '__postprocessors'):
1040 info_dict.pop(key, None)
1041 return info_dict
1042
1043 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045 @param sanitize Whether to sanitize the output as a filename.
1046 For backward compatibility, a function can also be passed
1047 """
1048
1049 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1050
1051 info_dict = self._copy_infodict(info_dict)
1052 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054 if info_dict.get('duration', None) is not None
1055 else None)
1056 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057 info_dict['video_autonumber'] = self._num_videos
1058 if info_dict.get('resolution') is None:
1059 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062 # of %(field)s to %(field)0Nd for backward compatibility
1063 field_size_compat_map = {
1064 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066 'autonumber': self.params.get('autonumber_size') or 5,
1067 }
1068
1069 TMPL_DICT = {}
1070 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071 MATH_FUNCTIONS = {
1072 '+': float.__add__,
1073 '-': float.__sub__,
1074 }
1075 # Field is of the form key1.key2...
1076 # where keys (except first) can be string, int or slice
1077 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081 (?P<negate>-)?
1082 (?P<fields>{field})
1083 (?P<maths>(?:{math_op}{math_field})*)
1084 (?:>(?P<strf_format>.+?))?
1085 (?P<alternate>(?<!\\),[^|&)]+)?
1086 (?:&(?P<replacement>.*?))?
1087 (?:\|(?P<default>.*?))?
1088 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090 def _traverse_infodict(k):
1091 k = k.split('.')
1092 if k[0] == '':
1093 k.pop(0)
1094 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096 def get_value(mdict):
1097 # Object traversal
1098 value = _traverse_infodict(mdict['fields'])
1099 # Negative
1100 if mdict['negate']:
1101 value = float_or_none(value)
1102 if value is not None:
1103 value *= -1
1104 # Do maths
1105 offset_key = mdict['maths']
1106 if offset_key:
1107 value = float_or_none(value)
1108 operator = None
1109 while offset_key:
1110 item = re.match(
1111 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112 offset_key).group(0)
1113 offset_key = offset_key[len(item):]
1114 if operator is None:
1115 operator = MATH_FUNCTIONS[item]
1116 continue
1117 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118 offset = float_or_none(item)
1119 if offset is None:
1120 offset = float_or_none(_traverse_infodict(item))
1121 try:
1122 value = operator(value, multiplier * offset)
1123 except (TypeError, ZeroDivisionError):
1124 return None
1125 operator = None
1126 # Datetime formatting
1127 if mdict['strf_format']:
1128 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130 return value
1131
1132 na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135 return sanitize_filename(str(value), restricted=restricted,
1136 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139 sanitize = bool(sanitize)
1140
1141 def _dumpjson_default(obj):
1142 if isinstance(obj, (set, LazyList)):
1143 return list(obj)
1144 return repr(obj)
1145
1146 def create_key(outer_mobj):
1147 if not outer_mobj.group('has_key'):
1148 return outer_mobj.group(0)
1149 key = outer_mobj.group('key')
1150 mobj = re.match(INTERNAL_FORMAT_RE, key)
1151 initial_field = mobj.group('fields') if mobj else ''
1152 value, replacement, default = None, None, na
1153 while mobj:
1154 mobj = mobj.groupdict()
1155 default = mobj['default'] if mobj['default'] is not None else default
1156 value = get_value(mobj)
1157 replacement = mobj['replacement']
1158 if value is None and mobj['alternate']:
1159 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160 else:
1161 break
1162
1163 fmt = outer_mobj.group('format')
1164 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167 value = default if value is None else value if replacement is None else replacement
1168
1169 flags = outer_mobj.group('conversion') or ''
1170 str_fmt = f'{fmt[:-1]}s'
1171 if fmt[-1] == 'l': # list
1172 delim = '\n' if '#' in flags else ', '
1173 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174 elif fmt[-1] == 'j': # json
1175 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176 elif fmt[-1] == 'q': # quoted
1177 value = map(str, variadic(value) if '#' in flags else [value])
1178 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179 elif fmt[-1] == 'B': # bytes
1180 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182 elif fmt[-1] == 'U': # unicode normalized
1183 value, fmt = unicodedata.normalize(
1184 # "+" = compatibility equivalence, "#" = NFD
1185 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186 value), str_fmt
1187 elif fmt[-1] == 'D': # decimal suffix
1188 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190 factor=1024 if '#' in flags else 1000)
1191 elif fmt[-1] == 'S': # filename sanitization
1192 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193 elif fmt[-1] == 'c':
1194 if value:
1195 value = str(value)[0]
1196 else:
1197 fmt = str_fmt
1198 elif fmt[-1] not in 'rs': # numeric
1199 value = float_or_none(value)
1200 if value is None:
1201 value, fmt = default, 's'
1202
1203 if sanitize:
1204 if fmt[-1] == 'r':
1205 # If value is an object, sanitize might convert it to a string
1206 # So we convert it to repr first
1207 value, fmt = repr(value), str_fmt
1208 if fmt[-1] in 'csr':
1209 value = sanitizer(initial_field, value)
1210
1211 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212 TMPL_DICT[key] = value
1213 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219 return self.escape_outtmpl(outtmpl) % info_dict
1220
1221 def _prepare_filename(self, info_dict, tmpl_type='default'):
1222 try:
1223 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225 if not filename:
1226 return None
1227
1228 if tmpl_type in ('default', 'temp'):
1229 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231 filename = replace_extension(filename, ext, final_ext)
1232 else:
1233 force_ext = OUTTMPL_TYPES[tmpl_type]
1234 if force_ext:
1235 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name = self.params.get('trim_file_name', False)
1239 if trim_file_name:
1240 no_ext, *ext = filename.rsplit('.', 2)
1241 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243 return filename
1244 except ValueError as err:
1245 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246 return None
1247
1248 def prepare_filename(self, info_dict, dir_type='', warn=False):
1249 """Generate the output filename."""
1250
1251 filename = self._prepare_filename(info_dict, dir_type or 'default')
1252 if not filename and dir_type not in ('', 'temp'):
1253 return ''
1254
1255 if warn:
1256 if not self.params.get('paths'):
1257 pass
1258 elif filename == '-':
1259 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260 elif os.path.isabs(filename):
1261 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262 if filename == '-' or not filename:
1263 return filename
1264
1265 return self.get_output_path(dir_type, filename)
1266
1267 def _match_entry(self, info_dict, incomplete=False, silent=False):
1268 """ Returns None if the file should be downloaded """
1269
1270 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272 def check_filter():
1273 if 'title' in info_dict:
1274 # This can happen when we're just evaluating the playlist
1275 title = info_dict['title']
1276 matchtitle = self.params.get('matchtitle', False)
1277 if matchtitle:
1278 if not re.search(matchtitle, title, re.IGNORECASE):
1279 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280 rejecttitle = self.params.get('rejecttitle', False)
1281 if rejecttitle:
1282 if re.search(rejecttitle, title, re.IGNORECASE):
1283 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284 date = info_dict.get('upload_date')
1285 if date is not None:
1286 dateRange = self.params.get('daterange', DateRange())
1287 if date not in dateRange:
1288 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289 view_count = info_dict.get('view_count')
1290 if view_count is not None:
1291 min_views = self.params.get('min_views')
1292 if min_views is not None and view_count < min_views:
1293 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294 max_views = self.params.get('max_views')
1295 if max_views is not None and view_count > max_views:
1296 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300 match_filter = self.params.get('match_filter')
1301 if match_filter is not None:
1302 try:
1303 ret = match_filter(info_dict, incomplete=incomplete)
1304 except TypeError:
1305 # For backward compatibility
1306 ret = None if incomplete else match_filter(info_dict)
1307 if ret is not None:
1308 return ret
1309 return None
1310
1311 if self.in_download_archive(info_dict):
1312 reason = '%s has already been recorded in the archive' % video_title
1313 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314 else:
1315 reason = check_filter()
1316 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317 if reason is not None:
1318 if not silent:
1319 self.to_screen('[download] ' + reason)
1320 if self.params.get(break_opt, False):
1321 raise break_err()
1322 return reason
1323
1324 @staticmethod
1325 def add_extra_info(info_dict, extra_info):
1326 '''Set the keys from extra_info in info dict if they are missing'''
1327 for key, value in extra_info.items():
1328 info_dict.setdefault(key, value)
1329
1330 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331 process=True, force_generic_extractor=False):
1332 """
1333 Return a list with a dictionary for each video extracted.
1334
1335 Arguments:
1336 url -- URL to extract
1337
1338 Keyword arguments:
1339 download -- whether to download videos during extraction
1340 ie_key -- extractor key hint
1341 extra_info -- dictionary containing the extra values to add to each result
1342 process -- whether to resolve all unresolved references (URLs, playlist items),
1343 must be True for download to work.
1344 force_generic_extractor -- force using the generic extractor
1345 """
1346
1347 if extra_info is None:
1348 extra_info = {}
1349
1350 if not ie_key and force_generic_extractor:
1351 ie_key = 'Generic'
1352
1353 if ie_key:
1354 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355 else:
1356 ies = self._ies
1357
1358 for ie_key, ie in ies.items():
1359 if not ie.suitable(url):
1360 continue
1361
1362 if not ie.working():
1363 self.report_warning('The program functionality for this site has been marked as broken, '
1364 'and will probably not work.')
1365
1366 temp_id = ie.get_temp_id(url)
1367 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369 if self.params.get('break_on_existing', False):
1370 raise ExistingVideoReached()
1371 break
1372 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373 else:
1374 self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376 def __handle_extraction_exceptions(func):
1377 @functools.wraps(func)
1378 def wrapper(self, *args, **kwargs):
1379 while True:
1380 try:
1381 return func(self, *args, **kwargs)
1382 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383 raise
1384 except ReExtractInfo as e:
1385 if e.expected:
1386 self.to_screen(f'{e}; Re-extracting data')
1387 else:
1388 self.to_stderr('\r')
1389 self.report_warning(f'{e}; Re-extracting data')
1390 continue
1391 except GeoRestrictedError as e:
1392 msg = e.msg
1393 if e.countries:
1394 msg += '\nThis video is available in %s.' % ', '.join(
1395 map(ISO3166Utils.short2full, e.countries))
1396 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397 self.report_error(msg)
1398 except ExtractorError as e: # An error we somewhat expected
1399 self.report_error(str(e), e.format_traceback())
1400 except Exception as e:
1401 if self.params.get('ignoreerrors'):
1402 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403 else:
1404 raise
1405 break
1406 return wrapper
1407
1408 def _wait_for_video(self, ie_result):
1409 if (not self.params.get('wait_for_video')
1410 or ie_result.get('_type', 'video') != 'video'
1411 or ie_result.get('formats') or ie_result.get('url')):
1412 return
1413
1414 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415 last_msg = ''
1416
1417 def progress(msg):
1418 nonlocal last_msg
1419 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420 last_msg = msg
1421
1422 min_wait, max_wait = self.params.get('wait_for_video')
1423 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426 self.report_warning('Release time of video is not known')
1427 elif (diff or 0) <= 0:
1428 self.report_warning('Video should already be available according to extracted info')
1429 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432 wait_till = time.time() + diff
1433 try:
1434 while True:
1435 diff = wait_till - time.time()
1436 if diff <= 0:
1437 progress('')
1438 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440 time.sleep(1)
1441 except KeyboardInterrupt:
1442 progress('')
1443 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444 except BaseException as e:
1445 if not isinstance(e, ReExtractInfo):
1446 self.to_screen('')
1447 raise
1448
1449 @__handle_extraction_exceptions
1450 def __extract_info(self, url, ie, download, extra_info, process):
1451 ie_result = ie.extract(url)
1452 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1453 return
1454 if isinstance(ie_result, list):
1455 # Backwards compatibility: old IE result format
1456 ie_result = {
1457 '_type': 'compat_list',
1458 'entries': ie_result,
1459 }
1460 if extra_info.get('original_url'):
1461 ie_result.setdefault('original_url', extra_info['original_url'])
1462 self.add_default_extra_info(ie_result, ie, url)
1463 if process:
1464 self._wait_for_video(ie_result)
1465 return self.process_ie_result(ie_result, download, extra_info)
1466 else:
1467 return ie_result
1468
1469 def add_default_extra_info(self, ie_result, ie, url):
1470 if url is not None:
1471 self.add_extra_info(ie_result, {
1472 'webpage_url': url,
1473 'original_url': url,
1474 })
1475 webpage_url = ie_result.get('webpage_url')
1476 if webpage_url:
1477 self.add_extra_info(ie_result, {
1478 'webpage_url_basename': url_basename(webpage_url),
1479 'webpage_url_domain': get_domain(webpage_url),
1480 })
1481 if ie is not None:
1482 self.add_extra_info(ie_result, {
1483 'extractor': ie.IE_NAME,
1484 'extractor_key': ie.ie_key(),
1485 })
1486
1487 def process_ie_result(self, ie_result, download=True, extra_info=None):
1488 """
1489 Take the result of the ie(may be modified) and resolve all unresolved
1490 references (URLs, playlist items).
1491
1492 It will also download the videos if 'download'.
1493 Returns the resolved ie_result.
1494 """
1495 if extra_info is None:
1496 extra_info = {}
1497 result_type = ie_result.get('_type', 'video')
1498
1499 if result_type in ('url', 'url_transparent'):
1500 ie_result['url'] = sanitize_url(ie_result['url'])
1501 if ie_result.get('original_url'):
1502 extra_info.setdefault('original_url', ie_result['original_url'])
1503
1504 extract_flat = self.params.get('extract_flat', False)
1505 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1506 or extract_flat is True):
1507 info_copy = ie_result.copy()
1508 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1509 if ie and not ie_result.get('id'):
1510 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1511 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1512 self.add_extra_info(info_copy, extra_info)
1513 info_copy, _ = self.pre_process(info_copy)
1514 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1515 if self.params.get('force_write_download_archive', False):
1516 self.record_download_archive(info_copy)
1517 return ie_result
1518
1519 if result_type == 'video':
1520 self.add_extra_info(ie_result, extra_info)
1521 ie_result = self.process_video_result(ie_result, download=download)
1522 additional_urls = (ie_result or {}).get('additional_urls')
1523 if additional_urls:
1524 # TODO: Improve MetadataParserPP to allow setting a list
1525 if isinstance(additional_urls, compat_str):
1526 additional_urls = [additional_urls]
1527 self.to_screen(
1528 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1529 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1530 ie_result['additional_entries'] = [
1531 self.extract_info(
1532 url, download, extra_info=extra_info,
1533 force_generic_extractor=self.params.get('force_generic_extractor'))
1534 for url in additional_urls
1535 ]
1536 return ie_result
1537 elif result_type == 'url':
1538 # We have to add extra_info to the results because it may be
1539 # contained in a playlist
1540 return self.extract_info(
1541 ie_result['url'], download,
1542 ie_key=ie_result.get('ie_key'),
1543 extra_info=extra_info)
1544 elif result_type == 'url_transparent':
1545 # Use the information from the embedding page
1546 info = self.extract_info(
1547 ie_result['url'], ie_key=ie_result.get('ie_key'),
1548 extra_info=extra_info, download=False, process=False)
1549
1550 # extract_info may return None when ignoreerrors is enabled and
1551 # extraction failed with an error, don't crash and return early
1552 # in this case
1553 if not info:
1554 return info
1555
1556 force_properties = dict(
1557 (k, v) for k, v in ie_result.items() if v is not None)
1558 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1559 if f in force_properties:
1560 del force_properties[f]
1561 new_result = info.copy()
1562 new_result.update(force_properties)
1563
1564 # Extracted info may not be a video result (i.e.
1565 # info.get('_type', 'video') != video) but rather an url or
1566 # url_transparent. In such cases outer metadata (from ie_result)
1567 # should be propagated to inner one (info). For this to happen
1568 # _type of info should be overridden with url_transparent. This
1569 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1570 if new_result.get('_type') == 'url':
1571 new_result['_type'] = 'url_transparent'
1572
1573 return self.process_ie_result(
1574 new_result, download=download, extra_info=extra_info)
1575 elif result_type in ('playlist', 'multi_video'):
1576 # Protect from infinite recursion due to recursively nested playlists
1577 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1578 webpage_url = ie_result['webpage_url']
1579 if webpage_url in self._playlist_urls:
1580 self.to_screen(
1581 '[download] Skipping already downloaded playlist: %s'
1582 % ie_result.get('title') or ie_result.get('id'))
1583 return
1584
1585 self._playlist_level += 1
1586 self._playlist_urls.add(webpage_url)
1587 self._sanitize_thumbnails(ie_result)
1588 try:
1589 return self.__process_playlist(ie_result, download)
1590 finally:
1591 self._playlist_level -= 1
1592 if not self._playlist_level:
1593 self._playlist_urls.clear()
1594 elif result_type == 'compat_list':
1595 self.report_warning(
1596 'Extractor %s returned a compat_list result. '
1597 'It needs to be updated.' % ie_result.get('extractor'))
1598
1599 def _fixup(r):
1600 self.add_extra_info(r, {
1601 'extractor': ie_result['extractor'],
1602 'webpage_url': ie_result['webpage_url'],
1603 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1604 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1605 'extractor_key': ie_result['extractor_key'],
1606 })
1607 return r
1608 ie_result['entries'] = [
1609 self.process_ie_result(_fixup(r), download, extra_info)
1610 for r in ie_result['entries']
1611 ]
1612 return ie_result
1613 else:
1614 raise Exception('Invalid result type: %s' % result_type)
1615
1616 def _ensure_dir_exists(self, path):
1617 return make_dir(path, self.report_error)
1618
1619 @staticmethod
1620 def _playlist_infodict(ie_result, **kwargs):
1621 return {
1622 **ie_result,
1623 'playlist': ie_result.get('title') or ie_result.get('id'),
1624 'playlist_id': ie_result.get('id'),
1625 'playlist_title': ie_result.get('title'),
1626 'playlist_uploader': ie_result.get('uploader'),
1627 'playlist_uploader_id': ie_result.get('uploader_id'),
1628 'playlist_index': 0,
1629 **kwargs,
1630 }
1631
1632 def __process_playlist(self, ie_result, download):
1633 # We process each entry in the playlist
1634 playlist = ie_result.get('title') or ie_result.get('id')
1635 self.to_screen('[download] Downloading playlist: %s' % playlist)
1636
1637 if 'entries' not in ie_result:
1638 raise EntryNotInPlaylist('There are no entries')
1639
1640 MissingEntry = object()
1641 incomplete_entries = bool(ie_result.get('requested_entries'))
1642 if incomplete_entries:
1643 def fill_missing_entries(entries, indices):
1644 ret = [MissingEntry] * max(indices)
1645 for i, entry in zip(indices, entries):
1646 ret[i - 1] = entry
1647 return ret
1648 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1649
1650 playlist_results = []
1651
1652 playliststart = self.params.get('playliststart', 1)
1653 playlistend = self.params.get('playlistend')
1654 # For backwards compatibility, interpret -1 as whole list
1655 if playlistend == -1:
1656 playlistend = None
1657
1658 playlistitems_str = self.params.get('playlist_items')
1659 playlistitems = None
1660 if playlistitems_str is not None:
1661 def iter_playlistitems(format):
1662 for string_segment in format.split(','):
1663 if '-' in string_segment:
1664 start, end = string_segment.split('-')
1665 for item in range(int(start), int(end) + 1):
1666 yield int(item)
1667 else:
1668 yield int(string_segment)
1669 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1670
1671 ie_entries = ie_result['entries']
1672 if isinstance(ie_entries, list):
1673 playlist_count = len(ie_entries)
1674 msg = f'Collected {playlist_count} videos; downloading %d of them'
1675 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1676
1677 def get_entry(i):
1678 return ie_entries[i - 1]
1679 else:
1680 msg = 'Downloading %d videos'
1681 if not isinstance(ie_entries, (PagedList, LazyList)):
1682 ie_entries = LazyList(ie_entries)
1683 elif isinstance(ie_entries, InAdvancePagedList):
1684 if ie_entries._pagesize == 1:
1685 playlist_count = ie_entries._pagecount
1686
1687 def get_entry(i):
1688 return YoutubeDL.__handle_extraction_exceptions(
1689 lambda self, i: ie_entries[i - 1]
1690 )(self, i)
1691
1692 entries, broken = [], False
1693 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1694 for i in items:
1695 if i == 0:
1696 continue
1697 if playlistitems is None and playlistend is not None and playlistend < i:
1698 break
1699 entry = None
1700 try:
1701 entry = get_entry(i)
1702 if entry is MissingEntry:
1703 raise EntryNotInPlaylist()
1704 except (IndexError, EntryNotInPlaylist):
1705 if incomplete_entries:
1706 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1707 elif not playlistitems:
1708 break
1709 entries.append(entry)
1710 try:
1711 if entry is not None:
1712 self._match_entry(entry, incomplete=True, silent=True)
1713 except (ExistingVideoReached, RejectedVideoReached):
1714 broken = True
1715 break
1716 ie_result['entries'] = entries
1717
1718 # Save playlist_index before re-ordering
1719 entries = [
1720 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1721 for i, entry in enumerate(entries, 1)
1722 if entry is not None]
1723 n_entries = len(entries)
1724
1725 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1726 ie_result['playlist_count'] = n_entries
1727
1728 if not playlistitems and (playliststart != 1 or playlistend):
1729 playlistitems = list(range(playliststart, playliststart + n_entries))
1730 ie_result['requested_entries'] = playlistitems
1731
1732 _infojson_written = False
1733 write_playlist_files = self.params.get('allow_playlist_files', True)
1734 if write_playlist_files and self.params.get('list_thumbnails'):
1735 self.list_thumbnails(ie_result)
1736 if write_playlist_files and not self.params.get('simulate'):
1737 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1738 _infojson_written = self._write_info_json(
1739 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1740 if _infojson_written is None:
1741 return
1742 if self._write_description('playlist', ie_result,
1743 self.prepare_filename(ie_copy, 'pl_description')) is None:
1744 return
1745 # TODO: This should be passed to ThumbnailsConvertor if necessary
1746 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1747
1748 if self.params.get('playlistreverse', False):
1749 entries = entries[::-1]
1750 if self.params.get('playlistrandom', False):
1751 random.shuffle(entries)
1752
1753 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1754
1755 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1756 failures = 0
1757 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1758 for i, entry_tuple in enumerate(entries, 1):
1759 playlist_index, entry = entry_tuple
1760 if 'playlist-index' in self.params.get('compat_opts', []):
1761 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1762 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1763 # This __x_forwarded_for_ip thing is a bit ugly but requires
1764 # minimal changes
1765 if x_forwarded_for:
1766 entry['__x_forwarded_for_ip'] = x_forwarded_for
1767 extra = {
1768 'n_entries': n_entries,
1769 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1770 'playlist_count': ie_result.get('playlist_count'),
1771 'playlist_index': playlist_index,
1772 'playlist_autonumber': i,
1773 'playlist': playlist,
1774 'playlist_id': ie_result.get('id'),
1775 'playlist_title': ie_result.get('title'),
1776 'playlist_uploader': ie_result.get('uploader'),
1777 'playlist_uploader_id': ie_result.get('uploader_id'),
1778 'extractor': ie_result['extractor'],
1779 'webpage_url': ie_result['webpage_url'],
1780 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1781 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1782 'extractor_key': ie_result['extractor_key'],
1783 }
1784
1785 if self._match_entry(entry, incomplete=True) is not None:
1786 continue
1787
1788 entry_result = self.__process_iterable_entry(entry, download, extra)
1789 if not entry_result:
1790 failures += 1
1791 if failures >= max_failures:
1792 self.report_error(
1793 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1794 break
1795 playlist_results.append(entry_result)
1796 ie_result['entries'] = playlist_results
1797
1798 # Write the updated info to json
1799 if _infojson_written and self._write_info_json(
1800 'updated playlist', ie_result,
1801 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1802 return
1803
1804 ie_result = self.run_all_pps('playlist', ie_result)
1805 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1806 return ie_result
1807
1808 @__handle_extraction_exceptions
1809 def __process_iterable_entry(self, entry, download, extra_info):
1810 return self.process_ie_result(
1811 entry, download=download, extra_info=extra_info)
1812
1813 def _build_format_filter(self, filter_spec):
1814 " Returns a function to filter the formats according to the filter_spec "
1815
1816 OPERATORS = {
1817 '<': operator.lt,
1818 '<=': operator.le,
1819 '>': operator.gt,
1820 '>=': operator.ge,
1821 '=': operator.eq,
1822 '!=': operator.ne,
1823 }
1824 operator_rex = re.compile(r'''(?x)\s*
1825 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1826 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1827 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1828 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1829 m = operator_rex.fullmatch(filter_spec)
1830 if m:
1831 try:
1832 comparison_value = int(m.group('value'))
1833 except ValueError:
1834 comparison_value = parse_filesize(m.group('value'))
1835 if comparison_value is None:
1836 comparison_value = parse_filesize(m.group('value') + 'B')
1837 if comparison_value is None:
1838 raise ValueError(
1839 'Invalid value %r in format specification %r' % (
1840 m.group('value'), filter_spec))
1841 op = OPERATORS[m.group('op')]
1842
1843 if not m:
1844 STR_OPERATORS = {
1845 '=': operator.eq,
1846 '^=': lambda attr, value: attr.startswith(value),
1847 '$=': lambda attr, value: attr.endswith(value),
1848 '*=': lambda attr, value: value in attr,
1849 '~=': lambda attr, value: value.search(attr) is not None
1850 }
1851 str_operator_rex = re.compile(r'''(?x)\s*
1852 (?P<key>[a-zA-Z0-9._-]+)\s*
1853 (?P<negation>!\s*)?(?P<op>%s)\s*(?P<none_inclusive>\?\s*)?
1854 (?P<quote>["'])?
1855 (?P<value>(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+))
1856 (?(quote)(?P=quote))\s*
1857 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1858 m = str_operator_rex.fullmatch(filter_spec)
1859 if m:
1860 if m.group('op') == '~=':
1861 comparison_value = re.compile(m.group('value'))
1862 else:
1863 comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value'))
1864 str_op = STR_OPERATORS[m.group('op')]
1865 if m.group('negation'):
1866 op = lambda attr, value: not str_op(attr, value)
1867 else:
1868 op = str_op
1869
1870 if not m:
1871 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1872
1873 def _filter(f):
1874 actual_value = f.get(m.group('key'))
1875 if actual_value is None:
1876 return m.group('none_inclusive')
1877 return op(actual_value, comparison_value)
1878 return _filter
1879
1880 def _check_formats(self, formats):
1881 for f in formats:
1882 self.to_screen('[info] Testing format %s' % f['format_id'])
1883 path = self.get_output_path('temp')
1884 if not self._ensure_dir_exists(f'{path}/'):
1885 continue
1886 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1887 temp_file.close()
1888 try:
1889 success, _ = self.dl(temp_file.name, f, test=True)
1890 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1891 success = False
1892 finally:
1893 if os.path.exists(temp_file.name):
1894 try:
1895 os.remove(temp_file.name)
1896 except OSError:
1897 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1898 if success:
1899 yield f
1900 else:
1901 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1902
1903 def _default_format_spec(self, info_dict, download=True):
1904
1905 def can_merge():
1906 merger = FFmpegMergerPP(self)
1907 return merger.available and merger.can_merge()
1908
1909 prefer_best = (
1910 not self.params.get('simulate')
1911 and download
1912 and (
1913 not can_merge()
1914 or info_dict.get('is_live', False)
1915 or self.outtmpl_dict['default'] == '-'))
1916 compat = (
1917 prefer_best
1918 or self.params.get('allow_multiple_audio_streams', False)
1919 or 'format-spec' in self.params.get('compat_opts', []))
1920
1921 return (
1922 'best/bestvideo+bestaudio' if prefer_best
1923 else 'bestvideo*+bestaudio/best' if not compat
1924 else 'bestvideo+bestaudio/best')
1925
1926 def build_format_selector(self, format_spec):
1927 def syntax_error(note, start):
1928 message = (
1929 'Invalid format specification: '
1930 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1931 return SyntaxError(message)
1932
1933 PICKFIRST = 'PICKFIRST'
1934 MERGE = 'MERGE'
1935 SINGLE = 'SINGLE'
1936 GROUP = 'GROUP'
1937 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1938
1939 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1940 'video': self.params.get('allow_multiple_video_streams', False)}
1941
1942 check_formats = self.params.get('check_formats') == 'selected'
1943
1944 def _parse_filter(tokens):
1945 filter_parts = []
1946 for type, string, start, _, _ in tokens:
1947 if type == tokenize.OP and string == ']':
1948 return ''.join(filter_parts)
1949 else:
1950 filter_parts.append(string)
1951
1952 def _remove_unused_ops(tokens):
1953 # Remove operators that we don't use and join them with the surrounding strings
1954 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1955 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1956 last_string, last_start, last_end, last_line = None, None, None, None
1957 for type, string, start, end, line in tokens:
1958 if type == tokenize.OP and string == '[':
1959 if last_string:
1960 yield tokenize.NAME, last_string, last_start, last_end, last_line
1961 last_string = None
1962 yield type, string, start, end, line
1963 # everything inside brackets will be handled by _parse_filter
1964 for type, string, start, end, line in tokens:
1965 yield type, string, start, end, line
1966 if type == tokenize.OP and string == ']':
1967 break
1968 elif type == tokenize.OP and string in ALLOWED_OPS:
1969 if last_string:
1970 yield tokenize.NAME, last_string, last_start, last_end, last_line
1971 last_string = None
1972 yield type, string, start, end, line
1973 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1974 if not last_string:
1975 last_string = string
1976 last_start = start
1977 last_end = end
1978 else:
1979 last_string += string
1980 if last_string:
1981 yield tokenize.NAME, last_string, last_start, last_end, last_line
1982
1983 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1984 selectors = []
1985 current_selector = None
1986 for type, string, start, _, _ in tokens:
1987 # ENCODING is only defined in python 3.x
1988 if type == getattr(tokenize, 'ENCODING', None):
1989 continue
1990 elif type in [tokenize.NAME, tokenize.NUMBER]:
1991 current_selector = FormatSelector(SINGLE, string, [])
1992 elif type == tokenize.OP:
1993 if string == ')':
1994 if not inside_group:
1995 # ')' will be handled by the parentheses group
1996 tokens.restore_last_token()
1997 break
1998 elif inside_merge and string in ['/', ',']:
1999 tokens.restore_last_token()
2000 break
2001 elif inside_choice and string == ',':
2002 tokens.restore_last_token()
2003 break
2004 elif string == ',':
2005 if not current_selector:
2006 raise syntax_error('"," must follow a format selector', start)
2007 selectors.append(current_selector)
2008 current_selector = None
2009 elif string == '/':
2010 if not current_selector:
2011 raise syntax_error('"/" must follow a format selector', start)
2012 first_choice = current_selector
2013 second_choice = _parse_format_selection(tokens, inside_choice=True)
2014 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2015 elif string == '[':
2016 if not current_selector:
2017 current_selector = FormatSelector(SINGLE, 'best', [])
2018 format_filter = _parse_filter(tokens)
2019 current_selector.filters.append(format_filter)
2020 elif string == '(':
2021 if current_selector:
2022 raise syntax_error('Unexpected "("', start)
2023 group = _parse_format_selection(tokens, inside_group=True)
2024 current_selector = FormatSelector(GROUP, group, [])
2025 elif string == '+':
2026 if not current_selector:
2027 raise syntax_error('Unexpected "+"', start)
2028 selector_1 = current_selector
2029 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2030 if not selector_2:
2031 raise syntax_error('Expected a selector', start)
2032 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2033 else:
2034 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2035 elif type == tokenize.ENDMARKER:
2036 break
2037 if current_selector:
2038 selectors.append(current_selector)
2039 return selectors
2040
2041 def _merge(formats_pair):
2042 format_1, format_2 = formats_pair
2043
2044 formats_info = []
2045 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2046 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2047
2048 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2049 get_no_more = {'video': False, 'audio': False}
2050 for (i, fmt_info) in enumerate(formats_info):
2051 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2052 formats_info.pop(i)
2053 continue
2054 for aud_vid in ['audio', 'video']:
2055 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2056 if get_no_more[aud_vid]:
2057 formats_info.pop(i)
2058 break
2059 get_no_more[aud_vid] = True
2060
2061 if len(formats_info) == 1:
2062 return formats_info[0]
2063
2064 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2065 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2066
2067 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2068 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2069
2070 output_ext = self.params.get('merge_output_format')
2071 if not output_ext:
2072 if the_only_video:
2073 output_ext = the_only_video['ext']
2074 elif the_only_audio and not video_fmts:
2075 output_ext = the_only_audio['ext']
2076 else:
2077 output_ext = 'mkv'
2078
2079 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2080
2081 new_dict = {
2082 'requested_formats': formats_info,
2083 'format': '+'.join(filtered('format')),
2084 'format_id': '+'.join(filtered('format_id')),
2085 'ext': output_ext,
2086 'protocol': '+'.join(map(determine_protocol, formats_info)),
2087 'language': '+'.join(orderedSet(filtered('language'))) or None,
2088 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2089 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2090 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2091 }
2092
2093 if the_only_video:
2094 new_dict.update({
2095 'width': the_only_video.get('width'),
2096 'height': the_only_video.get('height'),
2097 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2098 'fps': the_only_video.get('fps'),
2099 'dynamic_range': the_only_video.get('dynamic_range'),
2100 'vcodec': the_only_video.get('vcodec'),
2101 'vbr': the_only_video.get('vbr'),
2102 'stretched_ratio': the_only_video.get('stretched_ratio'),
2103 })
2104
2105 if the_only_audio:
2106 new_dict.update({
2107 'acodec': the_only_audio.get('acodec'),
2108 'abr': the_only_audio.get('abr'),
2109 'asr': the_only_audio.get('asr'),
2110 })
2111
2112 return new_dict
2113
2114 def _check_formats(formats):
2115 if not check_formats:
2116 yield from formats
2117 return
2118 yield from self._check_formats(formats)
2119
2120 def _build_selector_function(selector):
2121 if isinstance(selector, list): # ,
2122 fs = [_build_selector_function(s) for s in selector]
2123
2124 def selector_function(ctx):
2125 for f in fs:
2126 yield from f(ctx)
2127 return selector_function
2128
2129 elif selector.type == GROUP: # ()
2130 selector_function = _build_selector_function(selector.selector)
2131
2132 elif selector.type == PICKFIRST: # /
2133 fs = [_build_selector_function(s) for s in selector.selector]
2134
2135 def selector_function(ctx):
2136 for f in fs:
2137 picked_formats = list(f(ctx))
2138 if picked_formats:
2139 return picked_formats
2140 return []
2141
2142 elif selector.type == MERGE: # +
2143 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2144
2145 def selector_function(ctx):
2146 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2147 yield _merge(pair)
2148
2149 elif selector.type == SINGLE: # atom
2150 format_spec = selector.selector or 'best'
2151
2152 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2153 if format_spec == 'all':
2154 def selector_function(ctx):
2155 yield from _check_formats(ctx['formats'][::-1])
2156 elif format_spec == 'mergeall':
2157 def selector_function(ctx):
2158 formats = list(_check_formats(ctx['formats']))
2159 if not formats:
2160 return
2161 merged_format = formats[-1]
2162 for f in formats[-2::-1]:
2163 merged_format = _merge((merged_format, f))
2164 yield merged_format
2165
2166 else:
2167 format_fallback, format_reverse, format_idx = False, True, 1
2168 mobj = re.match(
2169 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2170 format_spec)
2171 if mobj is not None:
2172 format_idx = int_or_none(mobj.group('n'), default=1)
2173 format_reverse = mobj.group('bw')[0] == 'b'
2174 format_type = (mobj.group('type') or [None])[0]
2175 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2176 format_modified = mobj.group('mod') is not None
2177
2178 format_fallback = not format_type and not format_modified # for b, w
2179 _filter_f = (
2180 (lambda f: f.get('%scodec' % format_type) != 'none')
2181 if format_type and format_modified # bv*, ba*, wv*, wa*
2182 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2183 if format_type # bv, ba, wv, wa
2184 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2185 if not format_modified # b, w
2186 else lambda f: True) # b*, w*
2187 filter_f = lambda f: _filter_f(f) and (
2188 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2189 else:
2190 if format_spec in self._format_selection_exts['audio']:
2191 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2192 elif format_spec in self._format_selection_exts['video']:
2193 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2194 elif format_spec in self._format_selection_exts['storyboards']:
2195 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2196 else:
2197 filter_f = lambda f: f.get('format_id') == format_spec # id
2198
2199 def selector_function(ctx):
2200 formats = list(ctx['formats'])
2201 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2202 if format_fallback and ctx['incomplete_formats'] and not matches:
2203 # for extractors with incomplete formats (audio only (soundcloud)
2204 # or video only (imgur)) best/worst will fallback to
2205 # best/worst {video,audio}-only format
2206 matches = formats
2207 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2208 try:
2209 yield matches[format_idx - 1]
2210 except IndexError:
2211 return
2212
2213 filters = [self._build_format_filter(f) for f in selector.filters]
2214
2215 def final_selector(ctx):
2216 ctx_copy = dict(ctx)
2217 for _filter in filters:
2218 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2219 return selector_function(ctx_copy)
2220 return final_selector
2221
2222 stream = io.BytesIO(format_spec.encode('utf-8'))
2223 try:
2224 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2225 except tokenize.TokenError:
2226 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2227
2228 class TokenIterator(object):
2229 def __init__(self, tokens):
2230 self.tokens = tokens
2231 self.counter = 0
2232
2233 def __iter__(self):
2234 return self
2235
2236 def __next__(self):
2237 if self.counter >= len(self.tokens):
2238 raise StopIteration()
2239 value = self.tokens[self.counter]
2240 self.counter += 1
2241 return value
2242
2243 next = __next__
2244
2245 def restore_last_token(self):
2246 self.counter -= 1
2247
2248 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2249 return _build_selector_function(parsed_selector)
2250
2251 def _calc_headers(self, info_dict):
2252 res = std_headers.copy()
2253 res.update(info_dict.get('http_headers') or {})
2254
2255 cookies = self._calc_cookies(info_dict)
2256 if cookies:
2257 res['Cookie'] = cookies
2258
2259 if 'X-Forwarded-For' not in res:
2260 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2261 if x_forwarded_for_ip:
2262 res['X-Forwarded-For'] = x_forwarded_for_ip
2263
2264 return res
2265
2266 def _calc_cookies(self, info_dict):
2267 pr = sanitized_Request(info_dict['url'])
2268 self.cookiejar.add_cookie_header(pr)
2269 return pr.get_header('Cookie')
2270
2271 def _sort_thumbnails(self, thumbnails):
2272 thumbnails.sort(key=lambda t: (
2273 t.get('preference') if t.get('preference') is not None else -1,
2274 t.get('width') if t.get('width') is not None else -1,
2275 t.get('height') if t.get('height') is not None else -1,
2276 t.get('id') if t.get('id') is not None else '',
2277 t.get('url')))
2278
2279 def _sanitize_thumbnails(self, info_dict):
2280 thumbnails = info_dict.get('thumbnails')
2281 if thumbnails is None:
2282 thumbnail = info_dict.get('thumbnail')
2283 if thumbnail:
2284 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2285 if not thumbnails:
2286 return
2287
2288 def check_thumbnails(thumbnails):
2289 for t in thumbnails:
2290 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2291 try:
2292 self.urlopen(HEADRequest(t['url']))
2293 except network_exceptions as err:
2294 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2295 continue
2296 yield t
2297
2298 self._sort_thumbnails(thumbnails)
2299 for i, t in enumerate(thumbnails):
2300 if t.get('id') is None:
2301 t['id'] = '%d' % i
2302 if t.get('width') and t.get('height'):
2303 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2304 t['url'] = sanitize_url(t['url'])
2305
2306 if self.params.get('check_formats') is True:
2307 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2308 else:
2309 info_dict['thumbnails'] = thumbnails
2310
2311 def process_video_result(self, info_dict, download=True):
2312 assert info_dict.get('_type', 'video') == 'video'
2313 self._num_videos += 1
2314
2315 if 'id' not in info_dict:
2316 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2317 elif not info_dict.get('id'):
2318 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2319
2320 info_dict['fulltitle'] = info_dict.get('title')
2321 if 'title' not in info_dict:
2322 raise ExtractorError('Missing "title" field in extractor result',
2323 video_id=info_dict['id'], ie=info_dict['extractor'])
2324 elif not info_dict.get('title'):
2325 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2326 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2327
2328 def report_force_conversion(field, field_not, conversion):
2329 self.report_warning(
2330 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2331 % (field, field_not, conversion))
2332
2333 def sanitize_string_field(info, string_field):
2334 field = info.get(string_field)
2335 if field is None or isinstance(field, compat_str):
2336 return
2337 report_force_conversion(string_field, 'a string', 'string')
2338 info[string_field] = compat_str(field)
2339
2340 def sanitize_numeric_fields(info):
2341 for numeric_field in self._NUMERIC_FIELDS:
2342 field = info.get(numeric_field)
2343 if field is None or isinstance(field, compat_numeric_types):
2344 continue
2345 report_force_conversion(numeric_field, 'numeric', 'int')
2346 info[numeric_field] = int_or_none(field)
2347
2348 sanitize_string_field(info_dict, 'id')
2349 sanitize_numeric_fields(info_dict)
2350
2351 if 'playlist' not in info_dict:
2352 # It isn't part of a playlist
2353 info_dict['playlist'] = None
2354 info_dict['playlist_index'] = None
2355
2356 self._sanitize_thumbnails(info_dict)
2357
2358 thumbnail = info_dict.get('thumbnail')
2359 thumbnails = info_dict.get('thumbnails')
2360 if thumbnail:
2361 info_dict['thumbnail'] = sanitize_url(thumbnail)
2362 elif thumbnails:
2363 info_dict['thumbnail'] = thumbnails[-1]['url']
2364
2365 if info_dict.get('display_id') is None and 'id' in info_dict:
2366 info_dict['display_id'] = info_dict['id']
2367
2368 if info_dict.get('duration') is not None:
2369 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2370
2371 for ts_key, date_key in (
2372 ('timestamp', 'upload_date'),
2373 ('release_timestamp', 'release_date'),
2374 ('modified_timestamp', 'modified_date'),
2375 ):
2376 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2377 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2378 # see http://bugs.python.org/issue1646728)
2379 try:
2380 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2381 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2382 except (ValueError, OverflowError, OSError):
2383 pass
2384
2385 live_keys = ('is_live', 'was_live')
2386 live_status = info_dict.get('live_status')
2387 if live_status is None:
2388 for key in live_keys:
2389 if info_dict.get(key) is False:
2390 continue
2391 if info_dict.get(key):
2392 live_status = key
2393 break
2394 if all(info_dict.get(key) is False for key in live_keys):
2395 live_status = 'not_live'
2396 if live_status:
2397 info_dict['live_status'] = live_status
2398 for key in live_keys:
2399 if info_dict.get(key) is None:
2400 info_dict[key] = (live_status == key)
2401
2402 # Auto generate title fields corresponding to the *_number fields when missing
2403 # in order to always have clean titles. This is very common for TV series.
2404 for field in ('chapter', 'season', 'episode'):
2405 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2406 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2407
2408 for cc_kind in ('subtitles', 'automatic_captions'):
2409 cc = info_dict.get(cc_kind)
2410 if cc:
2411 for _, subtitle in cc.items():
2412 for subtitle_format in subtitle:
2413 if subtitle_format.get('url'):
2414 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2415 if subtitle_format.get('ext') is None:
2416 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2417
2418 automatic_captions = info_dict.get('automatic_captions')
2419 subtitles = info_dict.get('subtitles')
2420
2421 info_dict['requested_subtitles'] = self.process_subtitles(
2422 info_dict['id'], subtitles, automatic_captions)
2423
2424 if info_dict.get('formats') is None:
2425 # There's only one format available
2426 formats = [info_dict]
2427 else:
2428 formats = info_dict['formats']
2429
2430 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2431 if not self.params.get('allow_unplayable_formats'):
2432 formats = [f for f in formats if not f.get('has_drm')]
2433
2434 if info_dict.get('is_live'):
2435 get_from_start = bool(self.params.get('live_from_start'))
2436 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2437 if not get_from_start:
2438 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2439
2440 if not formats:
2441 self.raise_no_formats(info_dict)
2442
2443 def is_wellformed(f):
2444 url = f.get('url')
2445 if not url:
2446 self.report_warning(
2447 '"url" field is missing or empty - skipping format, '
2448 'there is an error in extractor')
2449 return False
2450 if isinstance(url, bytes):
2451 sanitize_string_field(f, 'url')
2452 return True
2453
2454 # Filter out malformed formats for better extraction robustness
2455 formats = list(filter(is_wellformed, formats))
2456
2457 formats_dict = {}
2458
2459 # We check that all the formats have the format and format_id fields
2460 for i, format in enumerate(formats):
2461 sanitize_string_field(format, 'format_id')
2462 sanitize_numeric_fields(format)
2463 format['url'] = sanitize_url(format['url'])
2464 if not format.get('format_id'):
2465 format['format_id'] = compat_str(i)
2466 else:
2467 # Sanitize format_id from characters used in format selector expression
2468 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2469 format_id = format['format_id']
2470 if format_id not in formats_dict:
2471 formats_dict[format_id] = []
2472 formats_dict[format_id].append(format)
2473
2474 # Make sure all formats have unique format_id
2475 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2476 for format_id, ambiguous_formats in formats_dict.items():
2477 ambigious_id = len(ambiguous_formats) > 1
2478 for i, format in enumerate(ambiguous_formats):
2479 if ambigious_id:
2480 format['format_id'] = '%s-%d' % (format_id, i)
2481 if format.get('ext') is None:
2482 format['ext'] = determine_ext(format['url']).lower()
2483 # Ensure there is no conflict between id and ext in format selection
2484 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2485 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2486 format['format_id'] = 'f%s' % format['format_id']
2487
2488 for i, format in enumerate(formats):
2489 if format.get('format') is None:
2490 format['format'] = '{id} - {res}{note}'.format(
2491 id=format['format_id'],
2492 res=self.format_resolution(format),
2493 note=format_field(format, 'format_note', ' (%s)'),
2494 )
2495 if format.get('protocol') is None:
2496 format['protocol'] = determine_protocol(format)
2497 if format.get('resolution') is None:
2498 format['resolution'] = self.format_resolution(format, default=None)
2499 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2500 format['dynamic_range'] = 'SDR'
2501 if (info_dict.get('duration') and format.get('tbr')
2502 and not format.get('filesize') and not format.get('filesize_approx')):
2503 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2504
2505 # Add HTTP headers, so that external programs can use them from the
2506 # json output
2507 full_format_info = info_dict.copy()
2508 full_format_info.update(format)
2509 format['http_headers'] = self._calc_headers(full_format_info)
2510 # Remove private housekeeping stuff
2511 if '__x_forwarded_for_ip' in info_dict:
2512 del info_dict['__x_forwarded_for_ip']
2513
2514 # TODO Central sorting goes here
2515
2516 if self.params.get('check_formats') is True:
2517 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2518
2519 if not formats or formats[0] is not info_dict:
2520 # only set the 'formats' fields if the original info_dict list them
2521 # otherwise we end up with a circular reference, the first (and unique)
2522 # element in the 'formats' field in info_dict is info_dict itself,
2523 # which can't be exported to json
2524 info_dict['formats'] = formats
2525
2526 info_dict, _ = self.pre_process(info_dict)
2527
2528 # The pre-processors may have modified the formats
2529 formats = info_dict.get('formats', [info_dict])
2530
2531 list_only = self.params.get('simulate') is None and (
2532 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2533 interactive_format_selection = not list_only and self.format_selector == '-'
2534 if self.params.get('list_thumbnails'):
2535 self.list_thumbnails(info_dict)
2536 if self.params.get('listsubtitles'):
2537 if 'automatic_captions' in info_dict:
2538 self.list_subtitles(
2539 info_dict['id'], automatic_captions, 'automatic captions')
2540 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2541 if self.params.get('listformats') or interactive_format_selection:
2542 self.list_formats(info_dict)
2543 if list_only:
2544 # Without this printing, -F --print-json will not work
2545 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2546 return
2547
2548 format_selector = self.format_selector
2549 if format_selector is None:
2550 req_format = self._default_format_spec(info_dict, download=download)
2551 self.write_debug('Default format spec: %s' % req_format)
2552 format_selector = self.build_format_selector(req_format)
2553
2554 while True:
2555 if interactive_format_selection:
2556 req_format = input(
2557 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2558 try:
2559 format_selector = self.build_format_selector(req_format)
2560 except SyntaxError as err:
2561 self.report_error(err, tb=False, is_error=False)
2562 continue
2563
2564 # While in format selection we may need to have an access to the original
2565 # format set in order to calculate some metrics or do some processing.
2566 # For now we need to be able to guess whether original formats provided
2567 # by extractor are incomplete or not (i.e. whether extractor provides only
2568 # video-only or audio-only formats) for proper formats selection for
2569 # extractors with such incomplete formats (see
2570 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2571 # Since formats may be filtered during format selection and may not match
2572 # the original formats the results may be incorrect. Thus original formats
2573 # or pre-calculated metrics should be passed to format selection routines
2574 # as well.
2575 # We will pass a context object containing all necessary additional data
2576 # instead of just formats.
2577 # This fixes incorrect format selection issue (see
2578 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2579 incomplete_formats = (
2580 # All formats are video-only or
2581 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2582 # all formats are audio-only
2583 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2584
2585 ctx = {
2586 'formats': formats,
2587 'incomplete_formats': incomplete_formats,
2588 }
2589
2590 formats_to_download = list(format_selector(ctx))
2591 if interactive_format_selection and not formats_to_download:
2592 self.report_error('Requested format is not available', tb=False, is_error=False)
2593 continue
2594 break
2595
2596 if not formats_to_download:
2597 if not self.params.get('ignore_no_formats_error'):
2598 raise ExtractorError('Requested format is not available', expected=True,
2599 video_id=info_dict['id'], ie=info_dict['extractor'])
2600 self.report_warning('Requested format is not available')
2601 # Process what we can, even without any available formats.
2602 formats_to_download = [{}]
2603
2604 best_format = formats_to_download[-1]
2605 if download:
2606 if best_format:
2607 self.to_screen(
2608 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2609 + ', '.join([f['format_id'] for f in formats_to_download]))
2610 max_downloads_reached = False
2611 for i, fmt in enumerate(formats_to_download):
2612 formats_to_download[i] = new_info = dict(info_dict)
2613 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2614 new_info.update(fmt)
2615 new_info['__original_infodict'] = info_dict
2616 try:
2617 self.process_info(new_info)
2618 except MaxDownloadsReached:
2619 max_downloads_reached = True
2620 new_info.pop('__original_infodict')
2621 # Remove copied info
2622 for key, val in tuple(new_info.items()):
2623 if info_dict.get(key) == val:
2624 new_info.pop(key)
2625 if max_downloads_reached:
2626 break
2627
2628 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2629 assert write_archive.issubset({True, False, 'ignore'})
2630 if True in write_archive and False not in write_archive:
2631 self.record_download_archive(info_dict)
2632
2633 info_dict['requested_downloads'] = formats_to_download
2634 info_dict = self.run_all_pps('after_video', info_dict)
2635 if max_downloads_reached:
2636 raise MaxDownloadsReached()
2637
2638 # We update the info dict with the selected best quality format (backwards compatibility)
2639 info_dict.update(best_format)
2640 return info_dict
2641
2642 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2643 """Select the requested subtitles and their format"""
2644 available_subs = {}
2645 if normal_subtitles and self.params.get('writesubtitles'):
2646 available_subs.update(normal_subtitles)
2647 if automatic_captions and self.params.get('writeautomaticsub'):
2648 for lang, cap_info in automatic_captions.items():
2649 if lang not in available_subs:
2650 available_subs[lang] = cap_info
2651
2652 if (not self.params.get('writesubtitles') and not
2653 self.params.get('writeautomaticsub') or not
2654 available_subs):
2655 return None
2656
2657 all_sub_langs = available_subs.keys()
2658 if self.params.get('allsubtitles', False):
2659 requested_langs = all_sub_langs
2660 elif self.params.get('subtitleslangs', False):
2661 # A list is used so that the order of languages will be the same as
2662 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2663 requested_langs = []
2664 for lang_re in self.params.get('subtitleslangs'):
2665 if lang_re == 'all':
2666 requested_langs.extend(all_sub_langs)
2667 continue
2668 discard = lang_re[0] == '-'
2669 if discard:
2670 lang_re = lang_re[1:]
2671 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2672 if discard:
2673 for lang in current_langs:
2674 while lang in requested_langs:
2675 requested_langs.remove(lang)
2676 else:
2677 requested_langs.extend(current_langs)
2678 requested_langs = orderedSet(requested_langs)
2679 elif 'en' in available_subs:
2680 requested_langs = ['en']
2681 else:
2682 requested_langs = [list(all_sub_langs)[0]]
2683 if requested_langs:
2684 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2685
2686 formats_query = self.params.get('subtitlesformat', 'best')
2687 formats_preference = formats_query.split('/') if formats_query else []
2688 subs = {}
2689 for lang in requested_langs:
2690 formats = available_subs.get(lang)
2691 if formats is None:
2692 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2693 continue
2694 for ext in formats_preference:
2695 if ext == 'best':
2696 f = formats[-1]
2697 break
2698 matches = list(filter(lambda f: f['ext'] == ext, formats))
2699 if matches:
2700 f = matches[-1]
2701 break
2702 else:
2703 f = formats[-1]
2704 self.report_warning(
2705 'No subtitle format found matching "%s" for language %s, '
2706 'using %s' % (formats_query, lang, f['ext']))
2707 subs[lang] = f
2708 return subs
2709
2710 def _forceprint(self, key, info_dict):
2711 if info_dict is None:
2712 return
2713 info_copy = info_dict.copy()
2714 info_copy['formats_table'] = self.render_formats_table(info_dict)
2715 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2716 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2717 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2718
2719 def format_tmpl(tmpl):
2720 mobj = re.match(r'\w+(=?)$', tmpl)
2721 if mobj and mobj.group(1):
2722 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2723 elif mobj:
2724 return f'%({tmpl})s'
2725 return tmpl
2726
2727 for tmpl in self.params['forceprint'].get(key, []):
2728 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2729
2730 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2731 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2732 tmpl = format_tmpl(tmpl)
2733 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2734 with io.open(filename, 'a', encoding='utf-8') as f:
2735 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2736
2737 def __forced_printings(self, info_dict, filename, incomplete):
2738 def print_mandatory(field, actual_field=None):
2739 if actual_field is None:
2740 actual_field = field
2741 if (self.params.get('force%s' % field, False)
2742 and (not incomplete or info_dict.get(actual_field) is not None)):
2743 self.to_stdout(info_dict[actual_field])
2744
2745 def print_optional(field):
2746 if (self.params.get('force%s' % field, False)
2747 and info_dict.get(field) is not None):
2748 self.to_stdout(info_dict[field])
2749
2750 info_dict = info_dict.copy()
2751 if filename is not None:
2752 info_dict['filename'] = filename
2753 if info_dict.get('requested_formats') is not None:
2754 # For RTMP URLs, also include the playpath
2755 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2756 elif 'url' in info_dict:
2757 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2758
2759 if (self.params.get('forcejson')
2760 or self.params['forceprint'].get('video')
2761 or self.params['print_to_file'].get('video')):
2762 self.post_extract(info_dict)
2763 self._forceprint('video', info_dict)
2764
2765 print_mandatory('title')
2766 print_mandatory('id')
2767 print_mandatory('url', 'urls')
2768 print_optional('thumbnail')
2769 print_optional('description')
2770 print_optional('filename')
2771 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2772 self.to_stdout(formatSeconds(info_dict['duration']))
2773 print_mandatory('format')
2774
2775 if self.params.get('forcejson'):
2776 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2777
2778 def dl(self, name, info, subtitle=False, test=False):
2779 if not info.get('url'):
2780 self.raise_no_formats(info, True)
2781
2782 if test:
2783 verbose = self.params.get('verbose')
2784 params = {
2785 'test': True,
2786 'quiet': self.params.get('quiet') or not verbose,
2787 'verbose': verbose,
2788 'noprogress': not verbose,
2789 'nopart': True,
2790 'skip_unavailable_fragments': False,
2791 'keep_fragments': False,
2792 'overwrites': True,
2793 '_no_ytdl_file': True,
2794 }
2795 else:
2796 params = self.params
2797 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2798 if not test:
2799 for ph in self._progress_hooks:
2800 fd.add_progress_hook(ph)
2801 urls = '", "'.join(
2802 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2803 for f in info.get('requested_formats', []) or [info])
2804 self.write_debug('Invoking downloader on "%s"' % urls)
2805
2806 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2807 # But it may contain objects that are not deep-copyable
2808 new_info = self._copy_infodict(info)
2809 if new_info.get('http_headers') is None:
2810 new_info['http_headers'] = self._calc_headers(new_info)
2811 return fd.download(name, new_info, subtitle)
2812
2813 def existing_file(self, filepaths, *, default_overwrite=True):
2814 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2815 if existing_files and not self.params.get('overwrites', default_overwrite):
2816 return existing_files[0]
2817
2818 for file in existing_files:
2819 self.report_file_delete(file)
2820 os.remove(file)
2821 return None
2822
2823 def process_info(self, info_dict):
2824 """Process a single resolved IE result. (Modified it in-place)"""
2825
2826 assert info_dict.get('_type', 'video') == 'video'
2827 original_infodict = info_dict
2828
2829 if 'format' not in info_dict and 'ext' in info_dict:
2830 info_dict['format'] = info_dict['ext']
2831
2832 if self._match_entry(info_dict) is not None:
2833 info_dict['__write_download_archive'] = 'ignore'
2834 return
2835
2836 self.post_extract(info_dict)
2837 self._num_downloads += 1
2838
2839 # info_dict['_filename'] needs to be set for backward compatibility
2840 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2841 temp_filename = self.prepare_filename(info_dict, 'temp')
2842 files_to_move = {}
2843
2844 # Forced printings
2845 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2846
2847 if self.params.get('simulate'):
2848 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2849 return
2850
2851 if full_filename is None:
2852 return
2853 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2854 return
2855 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2856 return
2857
2858 if self._write_description('video', info_dict,
2859 self.prepare_filename(info_dict, 'description')) is None:
2860 return
2861
2862 sub_files = self._write_subtitles(info_dict, temp_filename)
2863 if sub_files is None:
2864 return
2865 files_to_move.update(dict(sub_files))
2866
2867 thumb_files = self._write_thumbnails(
2868 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2869 if thumb_files is None:
2870 return
2871 files_to_move.update(dict(thumb_files))
2872
2873 infofn = self.prepare_filename(info_dict, 'infojson')
2874 _infojson_written = self._write_info_json('video', info_dict, infofn)
2875 if _infojson_written:
2876 info_dict['infojson_filename'] = infofn
2877 # For backward compatibility, even though it was a private field
2878 info_dict['__infojson_filename'] = infofn
2879 elif _infojson_written is None:
2880 return
2881
2882 # Note: Annotations are deprecated
2883 annofn = None
2884 if self.params.get('writeannotations', False):
2885 annofn = self.prepare_filename(info_dict, 'annotation')
2886 if annofn:
2887 if not self._ensure_dir_exists(encodeFilename(annofn)):
2888 return
2889 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2890 self.to_screen('[info] Video annotations are already present')
2891 elif not info_dict.get('annotations'):
2892 self.report_warning('There are no annotations to write.')
2893 else:
2894 try:
2895 self.to_screen('[info] Writing video annotations to: ' + annofn)
2896 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2897 annofile.write(info_dict['annotations'])
2898 except (KeyError, TypeError):
2899 self.report_warning('There are no annotations to write.')
2900 except (OSError, IOError):
2901 self.report_error('Cannot write annotations file: ' + annofn)
2902 return
2903
2904 # Write internet shortcut files
2905 def _write_link_file(link_type):
2906 if 'webpage_url' not in info_dict:
2907 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2908 return False
2909 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2910 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2911 return False
2912 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2913 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2914 return True
2915 try:
2916 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2917 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2918 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2919 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2920 if link_type == 'desktop':
2921 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2922 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2923 except (OSError, IOError):
2924 self.report_error(f'Cannot write internet shortcut {linkfn}')
2925 return False
2926 return True
2927
2928 write_links = {
2929 'url': self.params.get('writeurllink'),
2930 'webloc': self.params.get('writewebloclink'),
2931 'desktop': self.params.get('writedesktoplink'),
2932 }
2933 if self.params.get('writelink'):
2934 link_type = ('webloc' if sys.platform == 'darwin'
2935 else 'desktop' if sys.platform.startswith('linux')
2936 else 'url')
2937 write_links[link_type] = True
2938
2939 if any(should_write and not _write_link_file(link_type)
2940 for link_type, should_write in write_links.items()):
2941 return
2942
2943 def replace_info_dict(new_info):
2944 nonlocal info_dict
2945 if new_info == info_dict:
2946 return
2947 info_dict.clear()
2948 info_dict.update(new_info)
2949
2950 try:
2951 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2952 replace_info_dict(new_info)
2953 except PostProcessingError as err:
2954 self.report_error('Preprocessing: %s' % str(err))
2955 return
2956
2957 if self.params.get('skip_download'):
2958 info_dict['filepath'] = temp_filename
2959 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2960 info_dict['__files_to_move'] = files_to_move
2961 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2962 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2963 else:
2964 # Download
2965 info_dict.setdefault('__postprocessors', [])
2966 try:
2967
2968 def existing_video_file(*filepaths):
2969 ext = info_dict.get('ext')
2970 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2971 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2972 default_overwrite=False)
2973 if file:
2974 info_dict['ext'] = os.path.splitext(file)[1][1:]
2975 return file
2976
2977 success = True
2978 if info_dict.get('requested_formats') is not None:
2979
2980 def compatible_formats(formats):
2981 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2982 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2983 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2984 if len(video_formats) > 2 or len(audio_formats) > 2:
2985 return False
2986
2987 # Check extension
2988 exts = set(format.get('ext') for format in formats)
2989 COMPATIBLE_EXTS = (
2990 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2991 set(('webm',)),
2992 )
2993 for ext_sets in COMPATIBLE_EXTS:
2994 if ext_sets.issuperset(exts):
2995 return True
2996 # TODO: Check acodec/vcodec
2997 return False
2998
2999 requested_formats = info_dict['requested_formats']
3000 old_ext = info_dict['ext']
3001 if self.params.get('merge_output_format') is None:
3002 if not compatible_formats(requested_formats):
3003 info_dict['ext'] = 'mkv'
3004 self.report_warning(
3005 'Requested formats are incompatible for merge and will be merged into mkv')
3006 if (info_dict['ext'] == 'webm'
3007 and info_dict.get('thumbnails')
3008 # check with type instead of pp_key, __name__, or isinstance
3009 # since we dont want any custom PPs to trigger this
3010 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3011 info_dict['ext'] = 'mkv'
3012 self.report_warning(
3013 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3014 new_ext = info_dict['ext']
3015
3016 def correct_ext(filename, ext=new_ext):
3017 if filename == '-':
3018 return filename
3019 filename_real_ext = os.path.splitext(filename)[1][1:]
3020 filename_wo_ext = (
3021 os.path.splitext(filename)[0]
3022 if filename_real_ext in (old_ext, new_ext)
3023 else filename)
3024 return '%s.%s' % (filename_wo_ext, ext)
3025
3026 # Ensure filename always has a correct extension for successful merge
3027 full_filename = correct_ext(full_filename)
3028 temp_filename = correct_ext(temp_filename)
3029 dl_filename = existing_video_file(full_filename, temp_filename)
3030 info_dict['__real_download'] = False
3031
3032 downloaded = []
3033 merger = FFmpegMergerPP(self)
3034
3035 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3036 if dl_filename is not None:
3037 self.report_file_already_downloaded(dl_filename)
3038 elif fd:
3039 for f in requested_formats if fd != FFmpegFD else []:
3040 f['filepath'] = fname = prepend_extension(
3041 correct_ext(temp_filename, info_dict['ext']),
3042 'f%s' % f['format_id'], info_dict['ext'])
3043 downloaded.append(fname)
3044 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3045 success, real_download = self.dl(temp_filename, info_dict)
3046 info_dict['__real_download'] = real_download
3047 else:
3048 if self.params.get('allow_unplayable_formats'):
3049 self.report_warning(
3050 'You have requested merging of multiple formats '
3051 'while also allowing unplayable formats to be downloaded. '
3052 'The formats won\'t be merged to prevent data corruption.')
3053 elif not merger.available:
3054 msg = 'You have requested merging of multiple formats but ffmpeg is not installed'
3055 if not self.params.get('ignoreerrors'):
3056 self.report_error(f'{msg}. Aborting due to --abort-on-error')
3057 return
3058 self.report_warning(f'{msg}. The formats won\'t be merged')
3059
3060 if temp_filename == '-':
3061 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3062 else 'but the formats are incompatible for simultaneous download' if merger.available
3063 else 'but ffmpeg is not installed')
3064 self.report_warning(
3065 f'You have requested downloading multiple formats to stdout {reason}. '
3066 'The formats will be streamed one after the other')
3067 fname = temp_filename
3068 for f in requested_formats:
3069 new_info = dict(info_dict)
3070 del new_info['requested_formats']
3071 new_info.update(f)
3072 if temp_filename != '-':
3073 fname = prepend_extension(
3074 correct_ext(temp_filename, new_info['ext']),
3075 'f%s' % f['format_id'], new_info['ext'])
3076 if not self._ensure_dir_exists(fname):
3077 return
3078 f['filepath'] = fname
3079 downloaded.append(fname)
3080 partial_success, real_download = self.dl(fname, new_info)
3081 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3082 success = success and partial_success
3083
3084 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3085 info_dict['__postprocessors'].append(merger)
3086 info_dict['__files_to_merge'] = downloaded
3087 # Even if there were no downloads, it is being merged only now
3088 info_dict['__real_download'] = True
3089 else:
3090 for file in downloaded:
3091 files_to_move[file] = None
3092 else:
3093 # Just a single file
3094 dl_filename = existing_video_file(full_filename, temp_filename)
3095 if dl_filename is None or dl_filename == temp_filename:
3096 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3097 # So we should try to resume the download
3098 success, real_download = self.dl(temp_filename, info_dict)
3099 info_dict['__real_download'] = real_download
3100 else:
3101 self.report_file_already_downloaded(dl_filename)
3102
3103 dl_filename = dl_filename or temp_filename
3104 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3105
3106 except network_exceptions as err:
3107 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3108 return
3109 except (OSError, IOError) as err:
3110 raise UnavailableVideoError(err)
3111 except (ContentTooShortError, ) as err:
3112 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3113 return
3114
3115 if success and full_filename != '-':
3116
3117 def fixup():
3118 do_fixup = True
3119 fixup_policy = self.params.get('fixup')
3120 vid = info_dict['id']
3121
3122 if fixup_policy in ('ignore', 'never'):
3123 return
3124 elif fixup_policy == 'warn':
3125 do_fixup = False
3126 elif fixup_policy != 'force':
3127 assert fixup_policy in ('detect_or_warn', None)
3128 if not info_dict.get('__real_download'):
3129 do_fixup = False
3130
3131 def ffmpeg_fixup(cndn, msg, cls):
3132 if not cndn:
3133 return
3134 if not do_fixup:
3135 self.report_warning(f'{vid}: {msg}')
3136 return
3137 pp = cls(self)
3138 if pp.available:
3139 info_dict['__postprocessors'].append(pp)
3140 else:
3141 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3142
3143 stretched_ratio = info_dict.get('stretched_ratio')
3144 ffmpeg_fixup(
3145 stretched_ratio not in (1, None),
3146 f'Non-uniform pixel ratio {stretched_ratio}',
3147 FFmpegFixupStretchedPP)
3148
3149 ffmpeg_fixup(
3150 (info_dict.get('requested_formats') is None
3151 and info_dict.get('container') == 'm4a_dash'
3152 and info_dict.get('ext') == 'm4a'),
3153 'writing DASH m4a. Only some players support this container',
3154 FFmpegFixupM4aPP)
3155
3156 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3157 downloader = downloader.__name__ if downloader else None
3158
3159 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3160 ffmpeg_fixup(downloader == 'HlsFD',
3161 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3162 FFmpegFixupM3u8PP)
3163 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3164 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3165
3166 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3167 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3168
3169 fixup()
3170 try:
3171 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3172 except PostProcessingError as err:
3173 self.report_error('Postprocessing: %s' % str(err))
3174 return
3175 try:
3176 for ph in self._post_hooks:
3177 ph(info_dict['filepath'])
3178 except Exception as err:
3179 self.report_error('post hooks: %s' % str(err))
3180 return
3181 info_dict['__write_download_archive'] = True
3182
3183 if self.params.get('force_write_download_archive'):
3184 info_dict['__write_download_archive'] = True
3185
3186 # Make sure the info_dict was modified in-place
3187 assert info_dict is original_infodict
3188
3189 max_downloads = self.params.get('max_downloads')
3190 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3191 raise MaxDownloadsReached()
3192
3193 def __download_wrapper(self, func):
3194 @functools.wraps(func)
3195 def wrapper(*args, **kwargs):
3196 try:
3197 res = func(*args, **kwargs)
3198 except UnavailableVideoError as e:
3199 self.report_error(e)
3200 except MaxDownloadsReached as e:
3201 self.to_screen(f'[info] {e}')
3202 raise
3203 except DownloadCancelled as e:
3204 self.to_screen(f'[info] {e}')
3205 if not self.params.get('break_per_url'):
3206 raise
3207 else:
3208 if self.params.get('dump_single_json', False):
3209 self.post_extract(res)
3210 self.to_stdout(json.dumps(self.sanitize_info(res)))
3211 return wrapper
3212
3213 def download(self, url_list):
3214 """Download a given list of URLs."""
3215 url_list = variadic(url_list) # Passing a single URL is a common mistake
3216 outtmpl = self.outtmpl_dict['default']
3217 if (len(url_list) > 1
3218 and outtmpl != '-'
3219 and '%' not in outtmpl
3220 and self.params.get('max_downloads') != 1):
3221 raise SameFileError(outtmpl)
3222
3223 for url in url_list:
3224 self.__download_wrapper(self.extract_info)(
3225 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3226
3227 return self._download_retcode
3228
3229 def download_with_info_file(self, info_filename):
3230 with contextlib.closing(fileinput.FileInput(
3231 [info_filename], mode='r',
3232 openhook=fileinput.hook_encoded('utf-8'))) as f:
3233 # FileInput doesn't have a read method, we can't call json.load
3234 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3235 try:
3236 self.__download_wrapper(self.process_ie_result)(info, download=True)
3237 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3238 if not isinstance(e, EntryNotInPlaylist):
3239 self.to_stderr('\r')
3240 webpage_url = info.get('webpage_url')
3241 if webpage_url is not None:
3242 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3243 return self.download([webpage_url])
3244 else:
3245 raise
3246 return self._download_retcode
3247
3248 @staticmethod
3249 def sanitize_info(info_dict, remove_private_keys=False):
3250 ''' Sanitize the infodict for converting to json '''
3251 if info_dict is None:
3252 return info_dict
3253 info_dict.setdefault('epoch', int(time.time()))
3254 info_dict.setdefault('_type', 'video')
3255 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
3256 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
3257 if remove_private_keys:
3258 remove_keys |= {
3259 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3260 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3261 }
3262 reject = lambda k, v: k not in keep_keys and (
3263 k.startswith('_') or k in remove_keys or v is None)
3264 else:
3265 reject = lambda k, v: k in remove_keys
3266
3267 def filter_fn(obj):
3268 if isinstance(obj, dict):
3269 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3270 elif isinstance(obj, (list, tuple, set, LazyList)):
3271 return list(map(filter_fn, obj))
3272 elif obj is None or isinstance(obj, (str, int, float, bool)):
3273 return obj
3274 else:
3275 return repr(obj)
3276
3277 return filter_fn(info_dict)
3278
3279 @staticmethod
3280 def filter_requested_info(info_dict, actually_filter=True):
3281 ''' Alias of sanitize_info for backward compatibility '''
3282 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3283
3284 @staticmethod
3285 def post_extract(info_dict):
3286 def actual_post_extract(info_dict):
3287 if info_dict.get('_type') in ('playlist', 'multi_video'):
3288 for video_dict in info_dict.get('entries', {}):
3289 actual_post_extract(video_dict or {})
3290 return
3291
3292 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3293 extra = post_extractor().items()
3294 info_dict.update(extra)
3295 info_dict.pop('__post_extractor', None)
3296
3297 original_infodict = info_dict.get('__original_infodict') or {}
3298 original_infodict.update(extra)
3299 original_infodict.pop('__post_extractor', None)
3300
3301 actual_post_extract(info_dict or {})
3302
3303 def run_pp(self, pp, infodict):
3304 files_to_delete = []
3305 if '__files_to_move' not in infodict:
3306 infodict['__files_to_move'] = {}
3307 try:
3308 files_to_delete, infodict = pp.run(infodict)
3309 except PostProcessingError as e:
3310 # Must be True and not 'only_download'
3311 if self.params.get('ignoreerrors') is True:
3312 self.report_error(e)
3313 return infodict
3314 raise
3315
3316 if not files_to_delete:
3317 return infodict
3318 if self.params.get('keepvideo', False):
3319 for f in files_to_delete:
3320 infodict['__files_to_move'].setdefault(f, '')
3321 else:
3322 for old_filename in set(files_to_delete):
3323 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3324 try:
3325 os.remove(encodeFilename(old_filename))
3326 except (IOError, OSError):
3327 self.report_warning('Unable to remove downloaded original file')
3328 if old_filename in infodict['__files_to_move']:
3329 del infodict['__files_to_move'][old_filename]
3330 return infodict
3331
3332 def run_all_pps(self, key, info, *, additional_pps=None):
3333 self._forceprint(key, info)
3334 for pp in (additional_pps or []) + self._pps[key]:
3335 info = self.run_pp(pp, info)
3336 return info
3337
3338 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3339 info = dict(ie_info)
3340 info['__files_to_move'] = files_to_move or {}
3341 info = self.run_all_pps(key, info)
3342 return info, info.pop('__files_to_move', None)
3343
3344 def post_process(self, filename, info, files_to_move=None):
3345 """Run all the postprocessors on the given file."""
3346 info['filepath'] = filename
3347 info['__files_to_move'] = files_to_move or {}
3348 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3349 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3350 del info['__files_to_move']
3351 return self.run_all_pps('after_move', info)
3352
3353 def _make_archive_id(self, info_dict):
3354 video_id = info_dict.get('id')
3355 if not video_id:
3356 return
3357 # Future-proof against any change in case
3358 # and backwards compatibility with prior versions
3359 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3360 if extractor is None:
3361 url = str_or_none(info_dict.get('url'))
3362 if not url:
3363 return
3364 # Try to find matching extractor for the URL and take its ie_key
3365 for ie_key, ie in self._ies.items():
3366 if ie.suitable(url):
3367 extractor = ie_key
3368 break
3369 else:
3370 return
3371 return '%s %s' % (extractor.lower(), video_id)
3372
3373 def in_download_archive(self, info_dict):
3374 fn = self.params.get('download_archive')
3375 if fn is None:
3376 return False
3377
3378 vid_id = self._make_archive_id(info_dict)
3379 if not vid_id:
3380 return False # Incomplete video information
3381
3382 return vid_id in self.archive
3383
3384 def record_download_archive(self, info_dict):
3385 fn = self.params.get('download_archive')
3386 if fn is None:
3387 return
3388 vid_id = self._make_archive_id(info_dict)
3389 assert vid_id
3390 self.write_debug(f'Adding to archive: {vid_id}')
3391 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3392 archive_file.write(vid_id + '\n')
3393 self.archive.add(vid_id)
3394
3395 @staticmethod
3396 def format_resolution(format, default='unknown'):
3397 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3398 return 'audio only'
3399 if format.get('resolution') is not None:
3400 return format['resolution']
3401 if format.get('width') and format.get('height'):
3402 return '%dx%d' % (format['width'], format['height'])
3403 elif format.get('height'):
3404 return '%sp' % format['height']
3405 elif format.get('width'):
3406 return '%dx?' % format['width']
3407 return default
3408
3409 def _list_format_headers(self, *headers):
3410 if self.params.get('listformats_table', True) is not False:
3411 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3412 return headers
3413
3414 def _format_note(self, fdict):
3415 res = ''
3416 if fdict.get('ext') in ['f4f', 'f4m']:
3417 res += '(unsupported)'
3418 if fdict.get('language'):
3419 if res:
3420 res += ' '
3421 res += '[%s]' % fdict['language']
3422 if fdict.get('format_note') is not None:
3423 if res:
3424 res += ' '
3425 res += fdict['format_note']
3426 if fdict.get('tbr') is not None:
3427 if res:
3428 res += ', '
3429 res += '%4dk' % fdict['tbr']
3430 if fdict.get('container') is not None:
3431 if res:
3432 res += ', '
3433 res += '%s container' % fdict['container']
3434 if (fdict.get('vcodec') is not None
3435 and fdict.get('vcodec') != 'none'):
3436 if res:
3437 res += ', '
3438 res += fdict['vcodec']
3439 if fdict.get('vbr') is not None:
3440 res += '@'
3441 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3442 res += 'video@'
3443 if fdict.get('vbr') is not None:
3444 res += '%4dk' % fdict['vbr']
3445 if fdict.get('fps') is not None:
3446 if res:
3447 res += ', '
3448 res += '%sfps' % fdict['fps']
3449 if fdict.get('acodec') is not None:
3450 if res:
3451 res += ', '
3452 if fdict['acodec'] == 'none':
3453 res += 'video only'
3454 else:
3455 res += '%-5s' % fdict['acodec']
3456 elif fdict.get('abr') is not None:
3457 if res:
3458 res += ', '
3459 res += 'audio'
3460 if fdict.get('abr') is not None:
3461 res += '@%3dk' % fdict['abr']
3462 if fdict.get('asr') is not None:
3463 res += ' (%5dHz)' % fdict['asr']
3464 if fdict.get('filesize') is not None:
3465 if res:
3466 res += ', '
3467 res += format_bytes(fdict['filesize'])
3468 elif fdict.get('filesize_approx') is not None:
3469 if res:
3470 res += ', '
3471 res += '~' + format_bytes(fdict['filesize_approx'])
3472 return res
3473
3474 def render_formats_table(self, info_dict):
3475 if not info_dict.get('formats') and not info_dict.get('url'):
3476 return None
3477
3478 formats = info_dict.get('formats', [info_dict])
3479 if not self.params.get('listformats_table', True) is not False:
3480 table = [
3481 [
3482 format_field(f, 'format_id'),
3483 format_field(f, 'ext'),
3484 self.format_resolution(f),
3485 self._format_note(f)
3486 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3487 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3488
3489 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3490 table = [
3491 [
3492 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3493 format_field(f, 'ext'),
3494 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3495 format_field(f, 'fps', '\t%d'),
3496 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3497 delim,
3498 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3499 format_field(f, 'tbr', '\t%dk'),
3500 shorten_protocol_name(f.get('protocol', '')),
3501 delim,
3502 format_field(f, 'vcodec', default='unknown').replace(
3503 'none', 'images' if f.get('acodec') == 'none'
3504 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3505 format_field(f, 'vbr', '\t%dk'),
3506 format_field(f, 'acodec', default='unknown').replace(
3507 'none', '' if f.get('vcodec') == 'none'
3508 else self._format_screen('video only', self.Styles.SUPPRESS)),
3509 format_field(f, 'abr', '\t%dk'),
3510 format_field(f, 'asr', '\t%dHz'),
3511 join_nonempty(
3512 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3513 format_field(f, 'language', '[%s]'),
3514 join_nonempty(format_field(f, 'format_note'),
3515 format_field(f, 'container', ignore=(None, f.get('ext'))),
3516 delim=', '),
3517 delim=' '),
3518 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3519 header_line = self._list_format_headers(
3520 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3521 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3522
3523 return render_table(
3524 header_line, table, hide_empty=True,
3525 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3526
3527 def render_thumbnails_table(self, info_dict):
3528 thumbnails = list(info_dict.get('thumbnails') or [])
3529 if not thumbnails:
3530 return None
3531 return render_table(
3532 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3533 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3534
3535 def render_subtitles_table(self, video_id, subtitles):
3536 def _row(lang, formats):
3537 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3538 if len(set(names)) == 1:
3539 names = [] if names[0] == 'unknown' else names[:1]
3540 return [lang, ', '.join(names), ', '.join(exts)]
3541
3542 if not subtitles:
3543 return None
3544 return render_table(
3545 self._list_format_headers('Language', 'Name', 'Formats'),
3546 [_row(lang, formats) for lang, formats in subtitles.items()],
3547 hide_empty=True)
3548
3549 def __list_table(self, video_id, name, func, *args):
3550 table = func(*args)
3551 if not table:
3552 self.to_screen(f'{video_id} has no {name}')
3553 return
3554 self.to_screen(f'[info] Available {name} for {video_id}:')
3555 self.to_stdout(table)
3556
3557 def list_formats(self, info_dict):
3558 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3559
3560 def list_thumbnails(self, info_dict):
3561 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3562
3563 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3564 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3565
3566 def urlopen(self, req):
3567 """ Start an HTTP download """
3568 if isinstance(req, compat_basestring):
3569 req = sanitized_Request(req)
3570 return self._opener.open(req, timeout=self._socket_timeout)
3571
3572 def print_debug_header(self):
3573 if not self.params.get('verbose'):
3574 return
3575
3576 def get_encoding(stream):
3577 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3578 if not supports_terminal_sequences(stream):
3579 from .compat import WINDOWS_VT_MODE
3580 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3581 return ret
3582
3583 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3584 locale.getpreferredencoding(),
3585 sys.getfilesystemencoding(),
3586 get_encoding(self._screen_file), get_encoding(self._err_file),
3587 self.get_encoding())
3588
3589 logger = self.params.get('logger')
3590 if logger:
3591 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3592 write_debug(encoding_str)
3593 else:
3594 write_string(f'[debug] {encoding_str}\n', encoding=None)
3595 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3596
3597 source = detect_variant()
3598 write_debug(join_nonempty(
3599 'yt-dlp version', __version__,
3600 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3601 '' if source == 'unknown' else f'({source})',
3602 delim=' '))
3603 if not _LAZY_LOADER:
3604 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3605 write_debug('Lazy loading extractors is forcibly disabled')
3606 else:
3607 write_debug('Lazy loading extractors is disabled')
3608 if plugin_extractors or plugin_postprocessors:
3609 write_debug('Plugins: %s' % [
3610 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3611 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3612 if self.params.get('compat_opts'):
3613 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3614
3615 if source == 'source':
3616 try:
3617 sp = Popen(
3618 ['git', 'rev-parse', '--short', 'HEAD'],
3619 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3620 cwd=os.path.dirname(os.path.abspath(__file__)))
3621 out, err = sp.communicate_or_kill()
3622 out = out.decode().strip()
3623 if re.match('[0-9a-f]+', out):
3624 write_debug('Git HEAD: %s' % out)
3625 except Exception:
3626 try:
3627 sys.exc_clear()
3628 except Exception:
3629 pass
3630
3631 def python_implementation():
3632 impl_name = platform.python_implementation()
3633 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3634 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3635 return impl_name
3636
3637 write_debug('Python version %s (%s %s) - %s' % (
3638 platform.python_version(),
3639 python_implementation(),
3640 platform.architecture()[0],
3641 platform_name()))
3642
3643 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3644 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3645 if ffmpeg_features:
3646 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3647
3648 exe_versions['rtmpdump'] = rtmpdump_version()
3649 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3650 exe_str = ', '.join(
3651 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3652 ) or 'none'
3653 write_debug('exe versions: %s' % exe_str)
3654
3655 from .downloader.websocket import has_websockets
3656 from .postprocessor.embedthumbnail import has_mutagen
3657 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3658
3659 lib_str = join_nonempty(
3660 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3661 SECRETSTORAGE_AVAILABLE and 'secretstorage',
3662 has_mutagen and 'mutagen',
3663 SQLITE_AVAILABLE and 'sqlite',
3664 has_websockets and 'websockets',
3665 delim=', ') or 'none'
3666 write_debug('Optional libraries: %s' % lib_str)
3667
3668 proxy_map = {}
3669 for handler in self._opener.handlers:
3670 if hasattr(handler, 'proxies'):
3671 proxy_map.update(handler.proxies)
3672 write_debug(f'Proxy map: {proxy_map}')
3673
3674 # Not implemented
3675 if False and self.params.get('call_home'):
3676 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3677 write_debug('Public IP address: %s' % ipaddr)
3678 latest_version = self.urlopen(
3679 'https://yt-dl.org/latest/version').read().decode('utf-8')
3680 if version_tuple(latest_version) > version_tuple(__version__):
3681 self.report_warning(
3682 'You are using an outdated version (newest version: %s)! '
3683 'See https://yt-dl.org/update if you need help updating.' %
3684 latest_version)
3685
3686 def _setup_opener(self):
3687 timeout_val = self.params.get('socket_timeout')
3688 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3689
3690 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3691 opts_cookiefile = self.params.get('cookiefile')
3692 opts_proxy = self.params.get('proxy')
3693
3694 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3695
3696 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3697 if opts_proxy is not None:
3698 if opts_proxy == '':
3699 proxies = {}
3700 else:
3701 proxies = {'http': opts_proxy, 'https': opts_proxy}
3702 else:
3703 proxies = compat_urllib_request.getproxies()
3704 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3705 if 'http' in proxies and 'https' not in proxies:
3706 proxies['https'] = proxies['http']
3707 proxy_handler = PerRequestProxyHandler(proxies)
3708
3709 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3710 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3711 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3712 redirect_handler = YoutubeDLRedirectHandler()
3713 data_handler = compat_urllib_request_DataHandler()
3714
3715 # When passing our own FileHandler instance, build_opener won't add the
3716 # default FileHandler and allows us to disable the file protocol, which
3717 # can be used for malicious purposes (see
3718 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3719 file_handler = compat_urllib_request.FileHandler()
3720
3721 def file_open(*args, **kwargs):
3722 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3723 file_handler.file_open = file_open
3724
3725 opener = compat_urllib_request.build_opener(
3726 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3727
3728 # Delete the default user-agent header, which would otherwise apply in
3729 # cases where our custom HTTP handler doesn't come into play
3730 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3731 opener.addheaders = []
3732 self._opener = opener
3733
3734 def encode(self, s):
3735 if isinstance(s, bytes):
3736 return s # Already encoded
3737
3738 try:
3739 return s.encode(self.get_encoding())
3740 except UnicodeEncodeError as err:
3741 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3742 raise
3743
3744 def get_encoding(self):
3745 encoding = self.params.get('encoding')
3746 if encoding is None:
3747 encoding = preferredencoding()
3748 return encoding
3749
3750 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3751 ''' Write infojson and returns True = written, False = skip, None = error '''
3752 if overwrite is None:
3753 overwrite = self.params.get('overwrites', True)
3754 if not self.params.get('writeinfojson'):
3755 return False
3756 elif not infofn:
3757 self.write_debug(f'Skipping writing {label} infojson')
3758 return False
3759 elif not self._ensure_dir_exists(infofn):
3760 return None
3761 elif not overwrite and os.path.exists(infofn):
3762 self.to_screen(f'[info] {label.title()} metadata is already present')
3763 else:
3764 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3765 try:
3766 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3767 except (OSError, IOError):
3768 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3769 return None
3770 return True
3771
3772 def _write_description(self, label, ie_result, descfn):
3773 ''' Write description and returns True = written, False = skip, None = error '''
3774 if not self.params.get('writedescription'):
3775 return False
3776 elif not descfn:
3777 self.write_debug(f'Skipping writing {label} description')
3778 return False
3779 elif not self._ensure_dir_exists(descfn):
3780 return None
3781 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3782 self.to_screen(f'[info] {label.title()} description is already present')
3783 elif ie_result.get('description') is None:
3784 self.report_warning(f'There\'s no {label} description to write')
3785 return False
3786 else:
3787 try:
3788 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3789 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3790 descfile.write(ie_result['description'])
3791 except (OSError, IOError):
3792 self.report_error(f'Cannot write {label} description file {descfn}')
3793 return None
3794 return True
3795
3796 def _write_subtitles(self, info_dict, filename):
3797 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3798 ret = []
3799 subtitles = info_dict.get('requested_subtitles')
3800 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3801 # subtitles download errors are already managed as troubles in relevant IE
3802 # that way it will silently go on when used with unsupporting IE
3803 return ret
3804
3805 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3806 if not sub_filename_base:
3807 self.to_screen('[info] Skipping writing video subtitles')
3808 return ret
3809 for sub_lang, sub_info in subtitles.items():
3810 sub_format = sub_info['ext']
3811 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3812 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3813 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3814 if existing_sub:
3815 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3816 sub_info['filepath'] = existing_sub
3817 ret.append((existing_sub, sub_filename_final))
3818 continue
3819
3820 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3821 if sub_info.get('data') is not None:
3822 try:
3823 # Use newline='' to prevent conversion of newline characters
3824 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3825 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3826 subfile.write(sub_info['data'])
3827 sub_info['filepath'] = sub_filename
3828 ret.append((sub_filename, sub_filename_final))
3829 continue
3830 except (OSError, IOError):
3831 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3832 return None
3833
3834 try:
3835 sub_copy = sub_info.copy()
3836 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3837 self.dl(sub_filename, sub_copy, subtitle=True)
3838 sub_info['filepath'] = sub_filename
3839 ret.append((sub_filename, sub_filename_final))
3840 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3841 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3842 raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3843 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3844 return ret
3845
3846 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3847 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3848 write_all = self.params.get('write_all_thumbnails', False)
3849 thumbnails, ret = [], []
3850 if write_all or self.params.get('writethumbnail', False):
3851 thumbnails = info_dict.get('thumbnails') or []
3852 multiple = write_all and len(thumbnails) > 1
3853
3854 if thumb_filename_base is None:
3855 thumb_filename_base = filename
3856 if thumbnails and not thumb_filename_base:
3857 self.write_debug(f'Skipping writing {label} thumbnail')
3858 return ret
3859
3860 for idx, t in list(enumerate(thumbnails))[::-1]:
3861 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3862 thumb_display_id = f'{label} thumbnail {t["id"]}'
3863 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3864 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3865
3866 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3867 if existing_thumb:
3868 self.to_screen('[info] %s is already present' % (
3869 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3870 t['filepath'] = existing_thumb
3871 ret.append((existing_thumb, thumb_filename_final))
3872 else:
3873 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3874 try:
3875 uf = self.urlopen(sanitized_Request(t['url'], headers=t.get('http_headers', {})))
3876 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3877 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3878 shutil.copyfileobj(uf, thumbf)
3879 ret.append((thumb_filename, thumb_filename_final))
3880 t['filepath'] = thumb_filename
3881 except network_exceptions as err:
3882 thumbnails.pop(idx)
3883 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3884 if ret and not write_all:
3885 break
3886 return ret