]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[outtmpl] Handle hard-coded file extension better
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import functools
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28 import unicodedata
29
30 from enum import Enum
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DownloadCancelled,
59 DownloadError,
60 encode_compat_str,
61 encodeFilename,
62 EntryNotInPlaylist,
63 error_to_compat_str,
64 ExistingVideoReached,
65 expand_path,
66 ExtractorError,
67 float_or_none,
68 format_bytes,
69 format_field,
70 format_decimal_suffix,
71 formatSeconds,
72 GeoRestrictedError,
73 get_domain,
74 HEADRequest,
75 InAdvancePagedList,
76 int_or_none,
77 iri_to_uri,
78 ISO3166Utils,
79 join_nonempty,
80 LazyList,
81 LINK_TEMPLATES,
82 locked_file,
83 make_dir,
84 make_HTTPS_handler,
85 MaxDownloadsReached,
86 network_exceptions,
87 number_of_digits,
88 orderedSet,
89 OUTTMPL_TYPES,
90 PagedList,
91 parse_filesize,
92 PerRequestProxyHandler,
93 platform_name,
94 Popen,
95 POSTPROCESS_WHEN,
96 PostProcessingError,
97 preferredencoding,
98 prepend_extension,
99 ReExtractInfo,
100 register_socks_protocols,
101 RejectedVideoReached,
102 remove_terminal_sequences,
103 render_table,
104 replace_extension,
105 SameFileError,
106 sanitize_filename,
107 sanitize_path,
108 sanitize_url,
109 sanitized_Request,
110 std_headers,
111 STR_FORMAT_RE_TMPL,
112 STR_FORMAT_TYPES,
113 str_or_none,
114 strftime_or_none,
115 subtitles_filename,
116 supports_terminal_sequences,
117 timetuple_from_msec,
118 to_high_limit_path,
119 traverse_obj,
120 try_get,
121 UnavailableVideoError,
122 url_basename,
123 variadic,
124 version_tuple,
125 write_json_file,
126 write_string,
127 YoutubeDLCookieProcessor,
128 YoutubeDLHandler,
129 YoutubeDLRedirectHandler,
130 )
131 from .cache import Cache
132 from .minicurses import format_text
133 from .extractor import (
134 gen_extractor_classes,
135 get_info_extractor,
136 _LAZY_LOADER,
137 _PLUGIN_CLASSES as plugin_extractors
138 )
139 from .extractor.openload import PhantomJSwrapper
140 from .downloader import (
141 FFmpegFD,
142 get_suitable_downloader,
143 shorten_protocol_name
144 )
145 from .downloader.rtmp import rtmpdump_version
146 from .postprocessor import (
147 get_postprocessor,
148 EmbedThumbnailPP,
149 FFmpegFixupDuplicateMoovPP,
150 FFmpegFixupDurationPP,
151 FFmpegFixupM3u8PP,
152 FFmpegFixupM4aPP,
153 FFmpegFixupStretchedPP,
154 FFmpegFixupTimestampPP,
155 FFmpegMergerPP,
156 FFmpegPostProcessor,
157 MoveFilesAfterDownloadPP,
158 _PLUGIN_CLASSES as plugin_postprocessors
159 )
160 from .update import detect_variant
161 from .version import __version__, RELEASE_GIT_HEAD
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL(object):
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 verbose: Print additional info to stdout.
202 quiet: Do not print messages to stdout.
203 no_warnings: Do not print out anything for warnings.
204 forceprint: A dict with keys WHEN mapped to a list of templates to
205 print to stdout. The allowed keys are video or any of the
206 items in utils.POSTPROCESS_WHEN.
207 For compatibility, a single list is also accepted
208 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
209 a list of tuples with (template, filename)
210 forceurl: Force printing final URL. (Deprecated)
211 forcetitle: Force printing title. (Deprecated)
212 forceid: Force printing ID. (Deprecated)
213 forcethumbnail: Force printing thumbnail URL. (Deprecated)
214 forcedescription: Force printing description. (Deprecated)
215 forcefilename: Force printing final filename. (Deprecated)
216 forceduration: Force printing duration. (Deprecated)
217 forcejson: Force printing info_dict as JSON.
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
224 format: Video format code. see "FORMAT SELECTION" for more details.
225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
234 format_sort_force: Force the given format_sort. see "Sorting Formats"
235 for more details.
236 allow_multiple_video_streams: Allow multiple video streams to be merged
237 into a single file
238 allow_multiple_audio_streams: Allow multiple audio streams to be merged
239 into a single file
240 check_formats Whether to test if the formats are downloadable.
241 Can be True (check all), False (check none),
242 'selected' (check selected formats),
243 or None (check only if requested by extractor)
244 paths: Dictionary of output paths. The allowed keys are 'home'
245 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
246 outtmpl: Dictionary of templates for output names. Allowed keys
247 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
248 For compatibility with youtube-dl, a single string can also be used
249 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
250 restrictfilenames: Do not allow "&" and spaces in file names
251 trim_file_name: Limit length of filename (extension excluded)
252 windowsfilenames: Force the filenames to be windows compatible
253 ignoreerrors: Do not stop on download/postprocessing errors.
254 Can be 'only_download' to ignore only download errors.
255 Default is 'only_download' for CLI, but False for API
256 skip_playlist_after_errors: Number of allowed failures until the rest of
257 the playlist is skipped
258 force_generic_extractor: Force downloader to use the generic extractor
259 overwrites: Overwrite all video and metadata files if True,
260 overwrite only non-video files if None
261 and don't overwrite any file if False
262 For compatibility with youtube-dl,
263 "nooverwrites" may also be used instead
264 playliststart: Playlist item to start at.
265 playlistend: Playlist item to end at.
266 playlist_items: Specific indices of playlist to download.
267 playlistreverse: Download playlist items in reverse order.
268 playlistrandom: Download playlist items in random order.
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
271 logger: Log messages to a logging.Logger instance.
272 logtostderr: Log messages to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
276 clean_infojson: Remove private fields from the infojson
277 getcomments: Extract video comments. This will not be written to disk
278 unless writeinfojson is also given
279 writeannotations: Write the video annotations to a .annotations.xml file
280 writethumbnail: Write the thumbnail image to a file
281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
283 write_all_thumbnails: Write all thumbnail formats to files
284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
289 writesubtitles: Write the video subtitles to a file
290 writeautomaticsub: Write the automatically generated subtitles to a file
291 allsubtitles: Deprecated - Use subtitleslangs = ['all']
292 Downloads all the subtitles of the video
293 (requires writesubtitles or writeautomaticsub)
294 listsubtitles: Lists all available subtitles for the video
295 subtitlesformat: The format code for subtitles
296 subtitleslangs: List of languages of the subtitles to download (can be regex).
297 The list may contain "all" to refer to all the available
298 subtitles. The language can be prefixed with a "-" to
299 exclude it from the requested languages. Eg: ['all', '-live_chat']
300 keepvideo: Keep the video file after post-processing
301 daterange: A DateRange object, download only if the upload_date is in the range.
302 skip_download: Skip the actual download of the video file
303 cachedir: Location of the cache files in the filesystem.
304 False to disable filesystem cache.
305 noplaylist: Download single video instead of a playlist if in doubt.
306 age_limit: An integer representing the user's age in years.
307 Unsuitable videos for the given age are skipped.
308 min_views: An integer representing the minimum view count the video
309 must have in order to not be skipped.
310 Videos without view count information are always
311 downloaded. None for no limit.
312 max_views: An integer representing the maximum view count.
313 Videos that are more popular than that are not
314 downloaded.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 download_archive: File name of a file where all downloads are recorded.
318 Videos already present in the file are not downloaded
319 again.
320 break_on_existing: Stop the download process after attempting to download a
321 file that is in the archive.
322 break_on_reject: Stop the download process when encountering a video that
323 has been filtered out.
324 break_per_url: Whether break_on_reject and break_on_existing
325 should act on each input URL as opposed to for the entire queue
326 cookiefile: File name where cookies should be read from and dumped to
327 cookiesfrombrowser: A tuple containing the name of the browser, the profile
328 name/pathfrom where cookies are loaded, and the name of the
329 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
330 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
331 support RFC 5746 secure renegotiation
332 nocheckcertificate: Do not verify SSL certificates
333 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
334 At the moment, this is only supported by YouTube.
335 proxy: URL of the proxy server to use
336 geo_verification_proxy: URL of the proxy to use for IP address verification
337 on geo-restricted sites.
338 socket_timeout: Time to wait for unresponsive hosts, in seconds
339 bidi_workaround: Work around buggy terminals without bidirectional text
340 support, using fridibi
341 debug_printtraffic:Print out sent and received HTTP traffic
342 include_ads: Download ads as well (deprecated)
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
345 encoding: Use this encoding instead of the system-specified.
346 extract_flat: Do not resolve URLs, return the immediate result.
347 Pass in 'in_playlist' to only show this behavior for
348 playlist items.
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 post_hooks: Deprecated - Register a custom postprocessor instead
359 A list of functions that get called as the final step
360 for each video file, after all postprocessors have been
361 called. The filename will be passed as the only argument.
362 progress_hooks: A list of functions that get called on download
363 progress, with a dictionary with the entries
364 * status: One of "downloading", "error", or "finished".
365 Check this first and ignore unknown values.
366 * info_dict: The extracted info_dict
367
368 If status is one of "downloading", or "finished", the
369 following properties may also be present:
370 * filename: The final filename (always present)
371 * tmpfilename: The filename we're currently writing to
372 * downloaded_bytes: Bytes on disk
373 * total_bytes: Size of the whole file, None if unknown
374 * total_bytes_estimate: Guess of the eventual file size,
375 None if unavailable.
376 * elapsed: The number of seconds since download started.
377 * eta: The estimated time in seconds, None if unknown
378 * speed: The download speed in bytes/second, None if
379 unknown
380 * fragment_index: The counter of the currently
381 downloaded video fragment.
382 * fragment_count: The number of fragments (= individual
383 files that will be merged)
384
385 Progress hooks are guaranteed to be called at least once
386 (with status "finished") if the download is successful.
387 postprocessor_hooks: A list of functions that get called on postprocessing
388 progress, with a dictionary with the entries
389 * status: One of "started", "processing", or "finished".
390 Check this first and ignore unknown values.
391 * postprocessor: Name of the postprocessor
392 * info_dict: The extracted info_dict
393
394 Progress hooks are guaranteed to be called at least twice
395 (with status "started" and "finished") if the processing is successful.
396 merge_output_format: Extension to use when merging formats.
397 final_ext: Expected final extension; used to detect when the file was
398 already downloaded and converted
399 fixup: Automatically correct known faults of the file.
400 One of:
401 - "never": do nothing
402 - "warn": only emit a warning
403 - "detect_or_warn": check whether we can do anything
404 about it, warn otherwise (default)
405 source_address: Client-side IP address to bind to.
406 call_home: Boolean, true iff we are allowed to contact the
407 yt-dlp servers for debugging. (BROKEN)
408 sleep_interval_requests: Number of seconds to sleep between requests
409 during extraction
410 sleep_interval: Number of seconds to sleep before each download when
411 used alone or a lower bound of a range for randomized
412 sleep before each download (minimum possible number
413 of seconds to sleep) when used along with
414 max_sleep_interval.
415 max_sleep_interval:Upper bound of a range for randomized sleep before each
416 download (maximum possible number of seconds to sleep).
417 Must only be used along with sleep_interval.
418 Actual sleep time will be a random float from range
419 [sleep_interval; max_sleep_interval].
420 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
421 listformats: Print an overview of available video formats and exit.
422 list_thumbnails: Print a table of all thumbnails and exit.
423 match_filter: A function that gets called with the info_dict of
424 every video.
425 If it returns a message, the video is ignored.
426 If it returns None, the video is downloaded.
427 match_filter_func in utils.py is one example for this.
428 no_color: Do not emit color codes in output.
429 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
430 HTTP header
431 geo_bypass_country:
432 Two-letter ISO 3166-2 country code that will be used for
433 explicit geographic restriction bypassing via faking
434 X-Forwarded-For HTTP header
435 geo_bypass_ip_block:
436 IP range in CIDR notation that will be used similarly to
437 geo_bypass_country
438
439 The following options determine which downloader is picked:
440 external_downloader: A dictionary of protocol keys and the executable of the
441 external downloader to use for it. The allowed protocols
442 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
443 Set the value to 'native' to use the native downloader
444 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
445 or {'m3u8': 'ffmpeg'} instead.
446 Use the native HLS downloader instead of ffmpeg/avconv
447 if True, otherwise use ffmpeg/avconv if False, otherwise
448 use downloader suggested by extractor if None.
449 compat_opts: Compatibility options. See "Differences in default behavior".
450 The following options do not work when used through the API:
451 filename, abort-on-error, multistreams, no-live-chat, format-sort
452 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
453 Refer __init__.py for their implementation
454 progress_template: Dictionary of templates for progress outputs.
455 Allowed keys are 'download', 'postprocess',
456 'download-title' (console title) and 'postprocess-title'.
457 The template is mapped on a dictionary with keys 'progress' and 'info'
458
459 The following parameters are not used by YoutubeDL itself, they are used by
460 the downloader (see yt_dlp/downloader/common.py):
461 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
462 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
463 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
464 external_downloader_args, concurrent_fragment_downloads.
465
466 The following options are used by the post processors:
467 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
468 otherwise prefer ffmpeg. (avconv support is deprecated)
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
477 can also be used
478
479 The following options are used by the extractors:
480 extractor_retries: Number of times to retry for known errors
481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
482 hls_split_discontinuity: Split HLS playlists to different formats at
483 discontinuities such as ad breaks (default: False)
484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
486 Eg: {'youtube': {'skip': ['dash', 'hls']}}
487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
488 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
489 If True (default), DASH manifests and related
490 data will be downloaded and processed by extractor.
491 You can reduce network I/O by disabling it if you don't
492 care about DASH. (only for youtube)
493 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
494 If True (default), HLS manifests and related
495 data will be downloaded and processed by extractor.
496 You can reduce network I/O by disabling it if you don't
497 care about HLS. (only for youtube)
498 """
499
500 _NUMERIC_FIELDS = set((
501 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
502 'timestamp', 'release_timestamp',
503 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
504 'average_rating', 'comment_count', 'age_limit',
505 'start_time', 'end_time',
506 'chapter_number', 'season_number', 'episode_number',
507 'track_number', 'disc_number', 'release_year',
508 ))
509
510 _format_selection_exts = {
511 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
512 'video': {'mp4', 'flv', 'webm', '3gp'},
513 'storyboards': {'mhtml'},
514 }
515
516 params = None
517 _ies = {}
518 _pps = {k: [] for k in POSTPROCESS_WHEN}
519 _printed_messages = set()
520 _first_webpage_request = True
521 _download_retcode = None
522 _num_downloads = None
523 _playlist_level = 0
524 _playlist_urls = set()
525 _screen_file = None
526
527 def __init__(self, params=None, auto_init=True):
528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
530 Set to 'no_verbose_header' to not print the header
531 """
532 if params is None:
533 params = {}
534 self._ies = {}
535 self._ies_instances = {}
536 self._pps = {k: [] for k in POSTPROCESS_WHEN}
537 self._printed_messages = set()
538 self._first_webpage_request = True
539 self._post_hooks = []
540 self._progress_hooks = []
541 self._postprocessor_hooks = []
542 self._download_retcode = 0
543 self._num_downloads = 0
544 self._num_videos = 0
545 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
546 self._err_file = sys.stderr
547 self.params = params
548 self.cache = Cache(self)
549
550 windows_enable_vt_mode()
551 self._allow_colors = {
552 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
553 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
554 }
555
556 if sys.version_info < (3, 6):
557 self.report_warning(
558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
559
560 if self.params.get('allow_unplayable_formats'):
561 self.report_warning(
562 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
565 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
566
567 def check_deprecated(param, option, suggestion):
568 if self.params.get(param) is not None:
569 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
570 return True
571 return False
572
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
574 if self.params.get('geo_verification_proxy') is None:
575 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
576
577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
580
581 for msg in self.params.get('_warnings', []):
582 self.report_warning(msg)
583 for msg in self.params.get('_deprecation_warnings', []):
584 self.deprecation_warning(msg)
585
586 if 'list-formats' in self.params.get('compat_opts', []):
587 self.params['listformats_table'] = False
588
589 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self.params['overwrites'] = not self.params['nooverwrites']
594 elif self.params.get('overwrites') is None:
595 self.params.pop('overwrites', None)
596 else:
597 self.params['nooverwrites'] = not self.params['overwrites']
598
599 params.setdefault('forceprint', {})
600 params.setdefault('print_to_file', {})
601
602 # Compatibility with older syntax
603 if not isinstance(params['forceprint'], dict):
604 params['forceprint'] = {'video': params['forceprint']}
605
606 if params.get('bidi_workaround', False):
607 try:
608 import pty
609 master, slave = pty.openpty()
610 width = compat_get_terminal_size().columns
611 if width is None:
612 width_args = []
613 else:
614 width_args = ['-w', str(width)]
615 sp_kwargs = dict(
616 stdin=subprocess.PIPE,
617 stdout=slave,
618 stderr=self._err_file)
619 try:
620 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
621 except OSError:
622 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
623 self._output_channel = os.fdopen(master, 'rb')
624 except OSError as ose:
625 if ose.errno == errno.ENOENT:
626 self.report_warning(
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
629 else:
630 raise
631
632 if (sys.platform != 'win32'
633 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
634 and not params.get('restrictfilenames', False)):
635 # Unicode filesystem API will throw errors (#1474, #13027)
636 self.report_warning(
637 'Assuming --restrict-filenames since file system encoding '
638 'cannot encode all characters. '
639 'Set the LC_ALL environment variable to fix this.')
640 self.params['restrictfilenames'] = True
641
642 self.outtmpl_dict = self.parse_outtmpl()
643
644 # Creating format selector here allows us to catch syntax errors before the extraction
645 self.format_selector = (
646 self.params.get('format') if self.params.get('format') in (None, '-')
647 else self.params['format'] if callable(self.params['format'])
648 else self.build_format_selector(self.params['format']))
649
650 self._setup_opener()
651
652 if auto_init:
653 if auto_init != 'no_verbose_header':
654 self.print_debug_header()
655 self.add_default_info_extractors()
656
657 hooks = {
658 'post_hooks': self.add_post_hook,
659 'progress_hooks': self.add_progress_hook,
660 'postprocessor_hooks': self.add_postprocessor_hook,
661 }
662 for opt, fn in hooks.items():
663 for ph in self.params.get(opt, []):
664 fn(ph)
665
666 for pp_def_raw in self.params.get('postprocessors', []):
667 pp_def = dict(pp_def_raw)
668 when = pp_def.pop('when', 'post_process')
669 self.add_post_processor(
670 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
671 when=when)
672
673 register_socks_protocols()
674
675 def preload_download_archive(fn):
676 """Preload the archive, if any is specified"""
677 if fn is None:
678 return False
679 self.write_debug(f'Loading archive file {fn!r}')
680 try:
681 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
682 for line in archive_file:
683 self.archive.add(line.strip())
684 except IOError as ioe:
685 if ioe.errno != errno.ENOENT:
686 raise
687 return False
688 return True
689
690 self.archive = set()
691 preload_download_archive(self.params.get('download_archive'))
692
693 def warn_if_short_id(self, argv):
694 # short YouTube ID starting with dash?
695 idxs = [
696 i for i, a in enumerate(argv)
697 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
698 if idxs:
699 correct_argv = (
700 ['yt-dlp']
701 + [a for i, a in enumerate(argv) if i not in idxs]
702 + ['--'] + [argv[i] for i in idxs]
703 )
704 self.report_warning(
705 'Long argument string detected. '
706 'Use -- to separate parameters and URLs, like this:\n%s' %
707 args_to_str(correct_argv))
708
709 def add_info_extractor(self, ie):
710 """Add an InfoExtractor object to the end of the list."""
711 ie_key = ie.ie_key()
712 self._ies[ie_key] = ie
713 if not isinstance(ie, type):
714 self._ies_instances[ie_key] = ie
715 ie.set_downloader(self)
716
717 def _get_info_extractor_class(self, ie_key):
718 ie = self._ies.get(ie_key)
719 if ie is None:
720 ie = get_info_extractor(ie_key)
721 self.add_info_extractor(ie)
722 return ie
723
724 def get_info_extractor(self, ie_key):
725 """
726 Get an instance of an IE with name ie_key, it will try to get one from
727 the _ies list, if there's no instance it will create a new one and add
728 it to the extractor list.
729 """
730 ie = self._ies_instances.get(ie_key)
731 if ie is None:
732 ie = get_info_extractor(ie_key)()
733 self.add_info_extractor(ie)
734 return ie
735
736 def add_default_info_extractors(self):
737 """
738 Add the InfoExtractors returned by gen_extractors to the end of the list
739 """
740 for ie in gen_extractor_classes():
741 self.add_info_extractor(ie)
742
743 def add_post_processor(self, pp, when='post_process'):
744 """Add a PostProcessor object to the end of the chain."""
745 self._pps[when].append(pp)
746 pp.set_downloader(self)
747
748 def add_post_hook(self, ph):
749 """Add the post hook"""
750 self._post_hooks.append(ph)
751
752 def add_progress_hook(self, ph):
753 """Add the download progress hook"""
754 self._progress_hooks.append(ph)
755
756 def add_postprocessor_hook(self, ph):
757 """Add the postprocessing progress hook"""
758 self._postprocessor_hooks.append(ph)
759 for pps in self._pps.values():
760 for pp in pps:
761 pp.add_progress_hook(ph)
762
763 def _bidi_workaround(self, message):
764 if not hasattr(self, '_output_channel'):
765 return message
766
767 assert hasattr(self, '_output_process')
768 assert isinstance(message, compat_str)
769 line_count = message.count('\n') + 1
770 self._output_process.stdin.write((message + '\n').encode('utf-8'))
771 self._output_process.stdin.flush()
772 res = ''.join(self._output_channel.readline().decode('utf-8')
773 for _ in range(line_count))
774 return res[:-len('\n')]
775
776 def _write_string(self, message, out=None, only_once=False):
777 if only_once:
778 if message in self._printed_messages:
779 return
780 self._printed_messages.add(message)
781 write_string(message, out=out, encoding=self.params.get('encoding'))
782
783 def to_stdout(self, message, skip_eol=False, quiet=False):
784 """Print message to stdout"""
785 if self.params.get('logger'):
786 self.params['logger'].debug(message)
787 elif not quiet or self.params.get('verbose'):
788 self._write_string(
789 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
790 self._err_file if quiet else self._screen_file)
791
792 def to_stderr(self, message, only_once=False):
793 """Print message to stderr"""
794 assert isinstance(message, compat_str)
795 if self.params.get('logger'):
796 self.params['logger'].error(message)
797 else:
798 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
799
800 def to_console_title(self, message):
801 if not self.params.get('consoletitle', False):
802 return
803 message = remove_terminal_sequences(message)
804 if compat_os_name == 'nt':
805 if ctypes.windll.kernel32.GetConsoleWindow():
806 # c_wchar_p() might not be necessary if `message` is
807 # already of type unicode()
808 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
809 elif 'TERM' in os.environ:
810 self._write_string('\033]0;%s\007' % message, self._screen_file)
811
812 def save_console_title(self):
813 if not self.params.get('consoletitle', False):
814 return
815 if self.params.get('simulate'):
816 return
817 if compat_os_name != 'nt' and 'TERM' in os.environ:
818 # Save the title on stack
819 self._write_string('\033[22;0t', self._screen_file)
820
821 def restore_console_title(self):
822 if not self.params.get('consoletitle', False):
823 return
824 if self.params.get('simulate'):
825 return
826 if compat_os_name != 'nt' and 'TERM' in os.environ:
827 # Restore the title from stack
828 self._write_string('\033[23;0t', self._screen_file)
829
830 def __enter__(self):
831 self.save_console_title()
832 return self
833
834 def __exit__(self, *args):
835 self.restore_console_title()
836
837 if self.params.get('cookiefile') is not None:
838 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
839
840 def trouble(self, message=None, tb=None, is_error=True):
841 """Determine action to take when a download problem appears.
842
843 Depending on if the downloader has been configured to ignore
844 download errors or not, this method may throw an exception or
845 not when errors are found, after printing the message.
846
847 @param tb If given, is additional traceback information
848 @param is_error Whether to raise error according to ignorerrors
849 """
850 if message is not None:
851 self.to_stderr(message)
852 if self.params.get('verbose'):
853 if tb is None:
854 if sys.exc_info()[0]: # if .trouble has been called from an except block
855 tb = ''
856 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
857 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
858 tb += encode_compat_str(traceback.format_exc())
859 else:
860 tb_data = traceback.format_list(traceback.extract_stack())
861 tb = ''.join(tb_data)
862 if tb:
863 self.to_stderr(tb)
864 if not is_error:
865 return
866 if not self.params.get('ignoreerrors'):
867 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
868 exc_info = sys.exc_info()[1].exc_info
869 else:
870 exc_info = sys.exc_info()
871 raise DownloadError(message, exc_info)
872 self._download_retcode = 1
873
874 def to_screen(self, message, skip_eol=False):
875 """Print message to stdout if not in quiet mode"""
876 self.to_stdout(
877 message, skip_eol, quiet=self.params.get('quiet', False))
878
879 class Styles(Enum):
880 HEADERS = 'yellow'
881 EMPHASIS = 'light blue'
882 ID = 'green'
883 DELIM = 'blue'
884 ERROR = 'red'
885 WARNING = 'yellow'
886 SUPPRESS = 'light black'
887
888 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
889 if test_encoding:
890 original_text = text
891 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
892 text = text.encode(encoding, 'ignore').decode(encoding)
893 if fallback is not None and text != original_text:
894 text = fallback
895 if isinstance(f, self.Styles):
896 f = f.value
897 return format_text(text, f) if allow_colors else text if fallback is None else fallback
898
899 def _format_screen(self, *args, **kwargs):
900 return self._format_text(
901 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
902
903 def _format_err(self, *args, **kwargs):
904 return self._format_text(
905 self._err_file, self._allow_colors['err'], *args, **kwargs)
906
907 def report_warning(self, message, only_once=False):
908 '''
909 Print the message to stderr, it will be prefixed with 'WARNING:'
910 If stderr is a tty file the 'WARNING:' will be colored
911 '''
912 if self.params.get('logger') is not None:
913 self.params['logger'].warning(message)
914 else:
915 if self.params.get('no_warnings'):
916 return
917 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
918
919 def deprecation_warning(self, message):
920 if self.params.get('logger') is not None:
921 self.params['logger'].warning('DeprecationWarning: {message}')
922 else:
923 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
924
925 def report_error(self, message, *args, **kwargs):
926 '''
927 Do the same as trouble, but prefixes the message with 'ERROR:', colored
928 in red if stderr is a tty file.
929 '''
930 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
931
932 def write_debug(self, message, only_once=False):
933 '''Log debug message or Print message to stderr'''
934 if not self.params.get('verbose', False):
935 return
936 message = '[debug] %s' % message
937 if self.params.get('logger'):
938 self.params['logger'].debug(message)
939 else:
940 self.to_stderr(message, only_once)
941
942 def report_file_already_downloaded(self, file_name):
943 """Report file has already been fully downloaded."""
944 try:
945 self.to_screen('[download] %s has already been downloaded' % file_name)
946 except UnicodeEncodeError:
947 self.to_screen('[download] The file has already been downloaded')
948
949 def report_file_delete(self, file_name):
950 """Report that existing file will be deleted."""
951 try:
952 self.to_screen('Deleting existing file %s' % file_name)
953 except UnicodeEncodeError:
954 self.to_screen('Deleting existing file')
955
956 def raise_no_formats(self, info, forced=False):
957 has_drm = info.get('__has_drm')
958 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
959 expected = self.params.get('ignore_no_formats_error')
960 if forced or not expected:
961 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
962 expected=has_drm or expected)
963 else:
964 self.report_warning(msg)
965
966 def parse_outtmpl(self):
967 outtmpl_dict = self.params.get('outtmpl', {})
968 if not isinstance(outtmpl_dict, dict):
969 outtmpl_dict = {'default': outtmpl_dict}
970 # Remove spaces in the default template
971 if self.params.get('restrictfilenames'):
972 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
973 else:
974 sanitize = lambda x: x
975 outtmpl_dict.update({
976 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
977 if outtmpl_dict.get(k) is None})
978 for key, val in outtmpl_dict.items():
979 if isinstance(val, bytes):
980 self.report_warning(
981 'Parameter outtmpl is bytes, but should be a unicode string. '
982 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
983 return outtmpl_dict
984
985 def get_output_path(self, dir_type='', filename=None):
986 paths = self.params.get('paths', {})
987 assert isinstance(paths, dict)
988 path = os.path.join(
989 expand_path(paths.get('home', '').strip()),
990 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
991 filename or '')
992
993 # Temporary fix for #4787
994 # 'Treat' all problem characters by passing filename through preferredencoding
995 # to workaround encoding issues with subprocess on python2 @ Windows
996 if sys.version_info < (3, 0) and sys.platform == 'win32':
997 path = encodeFilename(path, True).decode(preferredencoding())
998 return sanitize_path(path, force=self.params.get('windowsfilenames'))
999
1000 @staticmethod
1001 def _outtmpl_expandpath(outtmpl):
1002 # expand_path translates '%%' into '%' and '$$' into '$'
1003 # correspondingly that is not what we want since we need to keep
1004 # '%%' intact for template dict substitution step. Working around
1005 # with boundary-alike separator hack.
1006 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009 # outtmpl should be expand_path'ed before template dict substitution
1010 # because meta fields may contain env variables we don't want to
1011 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013 return expand_path(outtmpl).replace(sep, '')
1014
1015 @staticmethod
1016 def escape_outtmpl(outtmpl):
1017 ''' Escape any remaining strings like %s, %abc% etc. '''
1018 return re.sub(
1019 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021 outtmpl)
1022
1023 @classmethod
1024 def validate_outtmpl(cls, outtmpl):
1025 ''' @return None or Exception object '''
1026 outtmpl = re.sub(
1027 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028 lambda mobj: f'{mobj.group(0)[:-1]}s',
1029 cls._outtmpl_expandpath(outtmpl))
1030 try:
1031 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032 return None
1033 except ValueError as err:
1034 return err
1035
1036 @staticmethod
1037 def _copy_infodict(info_dict):
1038 info_dict = dict(info_dict)
1039 for key in ('__original_infodict', '__postprocessors'):
1040 info_dict.pop(key, None)
1041 return info_dict
1042
1043 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045 @param sanitize Whether to sanitize the output as a filename.
1046 For backward compatibility, a function can also be passed
1047 """
1048
1049 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1050
1051 info_dict = self._copy_infodict(info_dict)
1052 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054 if info_dict.get('duration', None) is not None
1055 else None)
1056 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057 info_dict['video_autonumber'] = self._num_videos
1058 if info_dict.get('resolution') is None:
1059 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062 # of %(field)s to %(field)0Nd for backward compatibility
1063 field_size_compat_map = {
1064 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066 'autonumber': self.params.get('autonumber_size') or 5,
1067 }
1068
1069 TMPL_DICT = {}
1070 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071 MATH_FUNCTIONS = {
1072 '+': float.__add__,
1073 '-': float.__sub__,
1074 }
1075 # Field is of the form key1.key2...
1076 # where keys (except first) can be string, int or slice
1077 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081 (?P<negate>-)?
1082 (?P<fields>{field})
1083 (?P<maths>(?:{math_op}{math_field})*)
1084 (?:>(?P<strf_format>.+?))?
1085 (?P<alternate>(?<!\\),[^|&)]+)?
1086 (?:&(?P<replacement>.*?))?
1087 (?:\|(?P<default>.*?))?
1088 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090 def _traverse_infodict(k):
1091 k = k.split('.')
1092 if k[0] == '':
1093 k.pop(0)
1094 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096 def get_value(mdict):
1097 # Object traversal
1098 value = _traverse_infodict(mdict['fields'])
1099 # Negative
1100 if mdict['negate']:
1101 value = float_or_none(value)
1102 if value is not None:
1103 value *= -1
1104 # Do maths
1105 offset_key = mdict['maths']
1106 if offset_key:
1107 value = float_or_none(value)
1108 operator = None
1109 while offset_key:
1110 item = re.match(
1111 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112 offset_key).group(0)
1113 offset_key = offset_key[len(item):]
1114 if operator is None:
1115 operator = MATH_FUNCTIONS[item]
1116 continue
1117 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118 offset = float_or_none(item)
1119 if offset is None:
1120 offset = float_or_none(_traverse_infodict(item))
1121 try:
1122 value = operator(value, multiplier * offset)
1123 except (TypeError, ZeroDivisionError):
1124 return None
1125 operator = None
1126 # Datetime formatting
1127 if mdict['strf_format']:
1128 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130 return value
1131
1132 na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135 return sanitize_filename(str(value), restricted=restricted,
1136 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139 sanitize = bool(sanitize)
1140
1141 def _dumpjson_default(obj):
1142 if isinstance(obj, (set, LazyList)):
1143 return list(obj)
1144 return repr(obj)
1145
1146 def create_key(outer_mobj):
1147 if not outer_mobj.group('has_key'):
1148 return outer_mobj.group(0)
1149 key = outer_mobj.group('key')
1150 mobj = re.match(INTERNAL_FORMAT_RE, key)
1151 initial_field = mobj.group('fields') if mobj else ''
1152 value, replacement, default = None, None, na
1153 while mobj:
1154 mobj = mobj.groupdict()
1155 default = mobj['default'] if mobj['default'] is not None else default
1156 value = get_value(mobj)
1157 replacement = mobj['replacement']
1158 if value is None and mobj['alternate']:
1159 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160 else:
1161 break
1162
1163 fmt = outer_mobj.group('format')
1164 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167 value = default if value is None else value if replacement is None else replacement
1168
1169 flags = outer_mobj.group('conversion') or ''
1170 str_fmt = f'{fmt[:-1]}s'
1171 if fmt[-1] == 'l': # list
1172 delim = '\n' if '#' in flags else ', '
1173 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174 elif fmt[-1] == 'j': # json
1175 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176 elif fmt[-1] == 'q': # quoted
1177 value = map(str, variadic(value) if '#' in flags else [value])
1178 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179 elif fmt[-1] == 'B': # bytes
1180 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182 elif fmt[-1] == 'U': # unicode normalized
1183 value, fmt = unicodedata.normalize(
1184 # "+" = compatibility equivalence, "#" = NFD
1185 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186 value), str_fmt
1187 elif fmt[-1] == 'D': # decimal suffix
1188 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190 factor=1024 if '#' in flags else 1000)
1191 elif fmt[-1] == 'S': # filename sanitization
1192 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193 elif fmt[-1] == 'c':
1194 if value:
1195 value = str(value)[0]
1196 else:
1197 fmt = str_fmt
1198 elif fmt[-1] not in 'rs': # numeric
1199 value = float_or_none(value)
1200 if value is None:
1201 value, fmt = default, 's'
1202
1203 if sanitize:
1204 if fmt[-1] == 'r':
1205 # If value is an object, sanitize might convert it to a string
1206 # So we convert it to repr first
1207 value, fmt = repr(value), str_fmt
1208 if fmt[-1] in 'csr':
1209 value = sanitizer(initial_field, value)
1210
1211 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212 TMPL_DICT[key] = value
1213 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219 return self.escape_outtmpl(outtmpl) % info_dict
1220
1221 def _prepare_filename(self, info_dict, tmpl_type='default'):
1222 try:
1223 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225 if not filename:
1226 return None
1227
1228 if tmpl_type in ('default', 'temp'):
1229 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231 filename = replace_extension(filename, ext, final_ext)
1232 else:
1233 force_ext = OUTTMPL_TYPES[tmpl_type]
1234 if force_ext:
1235 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name = self.params.get('trim_file_name', False)
1239 if trim_file_name:
1240 no_ext, *ext = filename.rsplit('.', 2)
1241 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243 return filename
1244 except ValueError as err:
1245 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246 return None
1247
1248 def prepare_filename(self, info_dict, dir_type='', warn=False):
1249 """Generate the output filename."""
1250
1251 filename = self._prepare_filename(info_dict, dir_type or 'default')
1252 if not filename and dir_type not in ('', 'temp'):
1253 return ''
1254
1255 if warn:
1256 if not self.params.get('paths'):
1257 pass
1258 elif filename == '-':
1259 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260 elif os.path.isabs(filename):
1261 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262 if filename == '-' or not filename:
1263 return filename
1264
1265 return self.get_output_path(dir_type, filename)
1266
1267 def _match_entry(self, info_dict, incomplete=False, silent=False):
1268 """ Returns None if the file should be downloaded """
1269
1270 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272 def check_filter():
1273 if 'title' in info_dict:
1274 # This can happen when we're just evaluating the playlist
1275 title = info_dict['title']
1276 matchtitle = self.params.get('matchtitle', False)
1277 if matchtitle:
1278 if not re.search(matchtitle, title, re.IGNORECASE):
1279 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280 rejecttitle = self.params.get('rejecttitle', False)
1281 if rejecttitle:
1282 if re.search(rejecttitle, title, re.IGNORECASE):
1283 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284 date = info_dict.get('upload_date')
1285 if date is not None:
1286 dateRange = self.params.get('daterange', DateRange())
1287 if date not in dateRange:
1288 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289 view_count = info_dict.get('view_count')
1290 if view_count is not None:
1291 min_views = self.params.get('min_views')
1292 if min_views is not None and view_count < min_views:
1293 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294 max_views = self.params.get('max_views')
1295 if max_views is not None and view_count > max_views:
1296 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300 match_filter = self.params.get('match_filter')
1301 if match_filter is not None:
1302 try:
1303 ret = match_filter(info_dict, incomplete=incomplete)
1304 except TypeError:
1305 # For backward compatibility
1306 ret = None if incomplete else match_filter(info_dict)
1307 if ret is not None:
1308 return ret
1309 return None
1310
1311 if self.in_download_archive(info_dict):
1312 reason = '%s has already been recorded in the archive' % video_title
1313 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314 else:
1315 reason = check_filter()
1316 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317 if reason is not None:
1318 if not silent:
1319 self.to_screen('[download] ' + reason)
1320 if self.params.get(break_opt, False):
1321 raise break_err()
1322 return reason
1323
1324 @staticmethod
1325 def add_extra_info(info_dict, extra_info):
1326 '''Set the keys from extra_info in info dict if they are missing'''
1327 for key, value in extra_info.items():
1328 info_dict.setdefault(key, value)
1329
1330 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331 process=True, force_generic_extractor=False):
1332 """
1333 Return a list with a dictionary for each video extracted.
1334
1335 Arguments:
1336 url -- URL to extract
1337
1338 Keyword arguments:
1339 download -- whether to download videos during extraction
1340 ie_key -- extractor key hint
1341 extra_info -- dictionary containing the extra values to add to each result
1342 process -- whether to resolve all unresolved references (URLs, playlist items),
1343 must be True for download to work.
1344 force_generic_extractor -- force using the generic extractor
1345 """
1346
1347 if extra_info is None:
1348 extra_info = {}
1349
1350 if not ie_key and force_generic_extractor:
1351 ie_key = 'Generic'
1352
1353 if ie_key:
1354 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355 else:
1356 ies = self._ies
1357
1358 for ie_key, ie in ies.items():
1359 if not ie.suitable(url):
1360 continue
1361
1362 if not ie.working():
1363 self.report_warning('The program functionality for this site has been marked as broken, '
1364 'and will probably not work.')
1365
1366 temp_id = ie.get_temp_id(url)
1367 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369 if self.params.get('break_on_existing', False):
1370 raise ExistingVideoReached()
1371 break
1372 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373 else:
1374 self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376 def __handle_extraction_exceptions(func):
1377 @functools.wraps(func)
1378 def wrapper(self, *args, **kwargs):
1379 while True:
1380 try:
1381 return func(self, *args, **kwargs)
1382 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383 raise
1384 except ReExtractInfo as e:
1385 if e.expected:
1386 self.to_screen(f'{e}; Re-extracting data')
1387 else:
1388 self.to_stderr('\r')
1389 self.report_warning(f'{e}; Re-extracting data')
1390 continue
1391 except GeoRestrictedError as e:
1392 msg = e.msg
1393 if e.countries:
1394 msg += '\nThis video is available in %s.' % ', '.join(
1395 map(ISO3166Utils.short2full, e.countries))
1396 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397 self.report_error(msg)
1398 except ExtractorError as e: # An error we somewhat expected
1399 self.report_error(str(e), e.format_traceback())
1400 except Exception as e:
1401 if self.params.get('ignoreerrors'):
1402 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403 else:
1404 raise
1405 break
1406 return wrapper
1407
1408 def _wait_for_video(self, ie_result):
1409 if (not self.params.get('wait_for_video')
1410 or ie_result.get('_type', 'video') != 'video'
1411 or ie_result.get('formats') or ie_result.get('url')):
1412 return
1413
1414 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415 last_msg = ''
1416
1417 def progress(msg):
1418 nonlocal last_msg
1419 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420 last_msg = msg
1421
1422 min_wait, max_wait = self.params.get('wait_for_video')
1423 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426 self.report_warning('Release time of video is not known')
1427 elif (diff or 0) <= 0:
1428 self.report_warning('Video should already be available according to extracted info')
1429 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432 wait_till = time.time() + diff
1433 try:
1434 while True:
1435 diff = wait_till - time.time()
1436 if diff <= 0:
1437 progress('')
1438 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440 time.sleep(1)
1441 except KeyboardInterrupt:
1442 progress('')
1443 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444 except BaseException as e:
1445 if not isinstance(e, ReExtractInfo):
1446 self.to_screen('')
1447 raise
1448
1449 @__handle_extraction_exceptions
1450 def __extract_info(self, url, ie, download, extra_info, process):
1451 ie_result = ie.extract(url)
1452 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1453 return
1454 if isinstance(ie_result, list):
1455 # Backwards compatibility: old IE result format
1456 ie_result = {
1457 '_type': 'compat_list',
1458 'entries': ie_result,
1459 }
1460 if extra_info.get('original_url'):
1461 ie_result.setdefault('original_url', extra_info['original_url'])
1462 self.add_default_extra_info(ie_result, ie, url)
1463 if process:
1464 self._wait_for_video(ie_result)
1465 return self.process_ie_result(ie_result, download, extra_info)
1466 else:
1467 return ie_result
1468
1469 def add_default_extra_info(self, ie_result, ie, url):
1470 if url is not None:
1471 self.add_extra_info(ie_result, {
1472 'webpage_url': url,
1473 'original_url': url,
1474 'webpage_url_basename': url_basename(url),
1475 'webpage_url_domain': get_domain(url),
1476 })
1477 if ie is not None:
1478 self.add_extra_info(ie_result, {
1479 'extractor': ie.IE_NAME,
1480 'extractor_key': ie.ie_key(),
1481 })
1482
1483 def process_ie_result(self, ie_result, download=True, extra_info=None):
1484 """
1485 Take the result of the ie(may be modified) and resolve all unresolved
1486 references (URLs, playlist items).
1487
1488 It will also download the videos if 'download'.
1489 Returns the resolved ie_result.
1490 """
1491 if extra_info is None:
1492 extra_info = {}
1493 result_type = ie_result.get('_type', 'video')
1494
1495 if result_type in ('url', 'url_transparent'):
1496 ie_result['url'] = sanitize_url(ie_result['url'])
1497 if ie_result.get('original_url'):
1498 extra_info.setdefault('original_url', ie_result['original_url'])
1499
1500 extract_flat = self.params.get('extract_flat', False)
1501 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1502 or extract_flat is True):
1503 info_copy = ie_result.copy()
1504 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1505 if ie and not ie_result.get('id'):
1506 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1507 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1508 self.add_extra_info(info_copy, extra_info)
1509 info_copy, _ = self.pre_process(info_copy)
1510 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1511 if self.params.get('force_write_download_archive', False):
1512 self.record_download_archive(info_copy)
1513 return ie_result
1514
1515 if result_type == 'video':
1516 self.add_extra_info(ie_result, extra_info)
1517 ie_result = self.process_video_result(ie_result, download=download)
1518 additional_urls = (ie_result or {}).get('additional_urls')
1519 if additional_urls:
1520 # TODO: Improve MetadataParserPP to allow setting a list
1521 if isinstance(additional_urls, compat_str):
1522 additional_urls = [additional_urls]
1523 self.to_screen(
1524 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1525 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1526 ie_result['additional_entries'] = [
1527 self.extract_info(
1528 url, download, extra_info=extra_info,
1529 force_generic_extractor=self.params.get('force_generic_extractor'))
1530 for url in additional_urls
1531 ]
1532 return ie_result
1533 elif result_type == 'url':
1534 # We have to add extra_info to the results because it may be
1535 # contained in a playlist
1536 return self.extract_info(
1537 ie_result['url'], download,
1538 ie_key=ie_result.get('ie_key'),
1539 extra_info=extra_info)
1540 elif result_type == 'url_transparent':
1541 # Use the information from the embedding page
1542 info = self.extract_info(
1543 ie_result['url'], ie_key=ie_result.get('ie_key'),
1544 extra_info=extra_info, download=False, process=False)
1545
1546 # extract_info may return None when ignoreerrors is enabled and
1547 # extraction failed with an error, don't crash and return early
1548 # in this case
1549 if not info:
1550 return info
1551
1552 force_properties = dict(
1553 (k, v) for k, v in ie_result.items() if v is not None)
1554 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1555 if f in force_properties:
1556 del force_properties[f]
1557 new_result = info.copy()
1558 new_result.update(force_properties)
1559
1560 # Extracted info may not be a video result (i.e.
1561 # info.get('_type', 'video') != video) but rather an url or
1562 # url_transparent. In such cases outer metadata (from ie_result)
1563 # should be propagated to inner one (info). For this to happen
1564 # _type of info should be overridden with url_transparent. This
1565 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1566 if new_result.get('_type') == 'url':
1567 new_result['_type'] = 'url_transparent'
1568
1569 return self.process_ie_result(
1570 new_result, download=download, extra_info=extra_info)
1571 elif result_type in ('playlist', 'multi_video'):
1572 # Protect from infinite recursion due to recursively nested playlists
1573 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1574 webpage_url = ie_result['webpage_url']
1575 if webpage_url in self._playlist_urls:
1576 self.to_screen(
1577 '[download] Skipping already downloaded playlist: %s'
1578 % ie_result.get('title') or ie_result.get('id'))
1579 return
1580
1581 self._playlist_level += 1
1582 self._playlist_urls.add(webpage_url)
1583 self._sanitize_thumbnails(ie_result)
1584 try:
1585 return self.__process_playlist(ie_result, download)
1586 finally:
1587 self._playlist_level -= 1
1588 if not self._playlist_level:
1589 self._playlist_urls.clear()
1590 elif result_type == 'compat_list':
1591 self.report_warning(
1592 'Extractor %s returned a compat_list result. '
1593 'It needs to be updated.' % ie_result.get('extractor'))
1594
1595 def _fixup(r):
1596 self.add_extra_info(r, {
1597 'extractor': ie_result['extractor'],
1598 'webpage_url': ie_result['webpage_url'],
1599 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1600 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1601 'extractor_key': ie_result['extractor_key'],
1602 })
1603 return r
1604 ie_result['entries'] = [
1605 self.process_ie_result(_fixup(r), download, extra_info)
1606 for r in ie_result['entries']
1607 ]
1608 return ie_result
1609 else:
1610 raise Exception('Invalid result type: %s' % result_type)
1611
1612 def _ensure_dir_exists(self, path):
1613 return make_dir(path, self.report_error)
1614
1615 @staticmethod
1616 def _playlist_infodict(ie_result, **kwargs):
1617 return {
1618 **ie_result,
1619 'playlist': ie_result.get('title') or ie_result.get('id'),
1620 'playlist_id': ie_result.get('id'),
1621 'playlist_title': ie_result.get('title'),
1622 'playlist_uploader': ie_result.get('uploader'),
1623 'playlist_uploader_id': ie_result.get('uploader_id'),
1624 'playlist_index': 0,
1625 **kwargs,
1626 }
1627
1628 def __process_playlist(self, ie_result, download):
1629 # We process each entry in the playlist
1630 playlist = ie_result.get('title') or ie_result.get('id')
1631 self.to_screen('[download] Downloading playlist: %s' % playlist)
1632
1633 if 'entries' not in ie_result:
1634 raise EntryNotInPlaylist('There are no entries')
1635
1636 MissingEntry = object()
1637 incomplete_entries = bool(ie_result.get('requested_entries'))
1638 if incomplete_entries:
1639 def fill_missing_entries(entries, indices):
1640 ret = [MissingEntry] * max(indices)
1641 for i, entry in zip(indices, entries):
1642 ret[i - 1] = entry
1643 return ret
1644 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1645
1646 playlist_results = []
1647
1648 playliststart = self.params.get('playliststart', 1)
1649 playlistend = self.params.get('playlistend')
1650 # For backwards compatibility, interpret -1 as whole list
1651 if playlistend == -1:
1652 playlistend = None
1653
1654 playlistitems_str = self.params.get('playlist_items')
1655 playlistitems = None
1656 if playlistitems_str is not None:
1657 def iter_playlistitems(format):
1658 for string_segment in format.split(','):
1659 if '-' in string_segment:
1660 start, end = string_segment.split('-')
1661 for item in range(int(start), int(end) + 1):
1662 yield int(item)
1663 else:
1664 yield int(string_segment)
1665 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1666
1667 ie_entries = ie_result['entries']
1668 if isinstance(ie_entries, list):
1669 playlist_count = len(ie_entries)
1670 msg = f'Collected {playlist_count} videos; downloading %d of them'
1671 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1672
1673 def get_entry(i):
1674 return ie_entries[i - 1]
1675 else:
1676 msg = 'Downloading %d videos'
1677 if not isinstance(ie_entries, (PagedList, LazyList)):
1678 ie_entries = LazyList(ie_entries)
1679 elif isinstance(ie_entries, InAdvancePagedList):
1680 if ie_entries._pagesize == 1:
1681 playlist_count = ie_entries._pagecount
1682
1683 def get_entry(i):
1684 return YoutubeDL.__handle_extraction_exceptions(
1685 lambda self, i: ie_entries[i - 1]
1686 )(self, i)
1687
1688 entries, broken = [], False
1689 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1690 for i in items:
1691 if i == 0:
1692 continue
1693 if playlistitems is None and playlistend is not None and playlistend < i:
1694 break
1695 entry = None
1696 try:
1697 entry = get_entry(i)
1698 if entry is MissingEntry:
1699 raise EntryNotInPlaylist()
1700 except (IndexError, EntryNotInPlaylist):
1701 if incomplete_entries:
1702 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1703 elif not playlistitems:
1704 break
1705 entries.append(entry)
1706 try:
1707 if entry is not None:
1708 self._match_entry(entry, incomplete=True, silent=True)
1709 except (ExistingVideoReached, RejectedVideoReached):
1710 broken = True
1711 break
1712 ie_result['entries'] = entries
1713
1714 # Save playlist_index before re-ordering
1715 entries = [
1716 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1717 for i, entry in enumerate(entries, 1)
1718 if entry is not None]
1719 n_entries = len(entries)
1720
1721 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1722 ie_result['playlist_count'] = n_entries
1723
1724 if not playlistitems and (playliststart != 1 or playlistend):
1725 playlistitems = list(range(playliststart, playliststart + n_entries))
1726 ie_result['requested_entries'] = playlistitems
1727
1728 _infojson_written = False
1729 write_playlist_files = self.params.get('allow_playlist_files', True)
1730 if write_playlist_files and self.params.get('list_thumbnails'):
1731 self.list_thumbnails(ie_result)
1732 if write_playlist_files and not self.params.get('simulate'):
1733 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1734 _infojson_written = self._write_info_json(
1735 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1736 if _infojson_written is None:
1737 return
1738 if self._write_description('playlist', ie_result,
1739 self.prepare_filename(ie_copy, 'pl_description')) is None:
1740 return
1741 # TODO: This should be passed to ThumbnailsConvertor if necessary
1742 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1743
1744 if self.params.get('playlistreverse', False):
1745 entries = entries[::-1]
1746 if self.params.get('playlistrandom', False):
1747 random.shuffle(entries)
1748
1749 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1750
1751 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1752 failures = 0
1753 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1754 for i, entry_tuple in enumerate(entries, 1):
1755 playlist_index, entry = entry_tuple
1756 if 'playlist-index' in self.params.get('compat_opts', []):
1757 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1758 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1759 # This __x_forwarded_for_ip thing is a bit ugly but requires
1760 # minimal changes
1761 if x_forwarded_for:
1762 entry['__x_forwarded_for_ip'] = x_forwarded_for
1763 extra = {
1764 'n_entries': n_entries,
1765 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1766 'playlist_count': ie_result.get('playlist_count'),
1767 'playlist_index': playlist_index,
1768 'playlist_autonumber': i,
1769 'playlist': playlist,
1770 'playlist_id': ie_result.get('id'),
1771 'playlist_title': ie_result.get('title'),
1772 'playlist_uploader': ie_result.get('uploader'),
1773 'playlist_uploader_id': ie_result.get('uploader_id'),
1774 'extractor': ie_result['extractor'],
1775 'webpage_url': ie_result['webpage_url'],
1776 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1777 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1778 'extractor_key': ie_result['extractor_key'],
1779 }
1780
1781 if self._match_entry(entry, incomplete=True) is not None:
1782 continue
1783
1784 entry_result = self.__process_iterable_entry(entry, download, extra)
1785 if not entry_result:
1786 failures += 1
1787 if failures >= max_failures:
1788 self.report_error(
1789 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1790 break
1791 playlist_results.append(entry_result)
1792 ie_result['entries'] = playlist_results
1793
1794 # Write the updated info to json
1795 if _infojson_written and self._write_info_json(
1796 'updated playlist', ie_result,
1797 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1798 return
1799
1800 ie_result = self.run_all_pps('playlist', ie_result)
1801 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1802 return ie_result
1803
1804 @__handle_extraction_exceptions
1805 def __process_iterable_entry(self, entry, download, extra_info):
1806 return self.process_ie_result(
1807 entry, download=download, extra_info=extra_info)
1808
1809 def _build_format_filter(self, filter_spec):
1810 " Returns a function to filter the formats according to the filter_spec "
1811
1812 OPERATORS = {
1813 '<': operator.lt,
1814 '<=': operator.le,
1815 '>': operator.gt,
1816 '>=': operator.ge,
1817 '=': operator.eq,
1818 '!=': operator.ne,
1819 }
1820 operator_rex = re.compile(r'''(?x)\s*
1821 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1822 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1823 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1824 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1825 m = operator_rex.fullmatch(filter_spec)
1826 if m:
1827 try:
1828 comparison_value = int(m.group('value'))
1829 except ValueError:
1830 comparison_value = parse_filesize(m.group('value'))
1831 if comparison_value is None:
1832 comparison_value = parse_filesize(m.group('value') + 'B')
1833 if comparison_value is None:
1834 raise ValueError(
1835 'Invalid value %r in format specification %r' % (
1836 m.group('value'), filter_spec))
1837 op = OPERATORS[m.group('op')]
1838
1839 if not m:
1840 STR_OPERATORS = {
1841 '=': operator.eq,
1842 '^=': lambda attr, value: attr.startswith(value),
1843 '$=': lambda attr, value: attr.endswith(value),
1844 '*=': lambda attr, value: value in attr,
1845 }
1846 str_operator_rex = re.compile(r'''(?x)\s*
1847 (?P<key>[a-zA-Z0-9._-]+)\s*
1848 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849 (?P<value>[a-zA-Z0-9._-]+)\s*
1850 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1851 m = str_operator_rex.fullmatch(filter_spec)
1852 if m:
1853 comparison_value = m.group('value')
1854 str_op = STR_OPERATORS[m.group('op')]
1855 if m.group('negation'):
1856 op = lambda attr, value: not str_op(attr, value)
1857 else:
1858 op = str_op
1859
1860 if not m:
1861 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1862
1863 def _filter(f):
1864 actual_value = f.get(m.group('key'))
1865 if actual_value is None:
1866 return m.group('none_inclusive')
1867 return op(actual_value, comparison_value)
1868 return _filter
1869
1870 def _check_formats(self, formats):
1871 for f in formats:
1872 self.to_screen('[info] Testing format %s' % f['format_id'])
1873 path = self.get_output_path('temp')
1874 if not self._ensure_dir_exists(f'{path}/'):
1875 continue
1876 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1877 temp_file.close()
1878 try:
1879 success, _ = self.dl(temp_file.name, f, test=True)
1880 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1881 success = False
1882 finally:
1883 if os.path.exists(temp_file.name):
1884 try:
1885 os.remove(temp_file.name)
1886 except OSError:
1887 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1888 if success:
1889 yield f
1890 else:
1891 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1892
1893 def _default_format_spec(self, info_dict, download=True):
1894
1895 def can_merge():
1896 merger = FFmpegMergerPP(self)
1897 return merger.available and merger.can_merge()
1898
1899 prefer_best = (
1900 not self.params.get('simulate')
1901 and download
1902 and (
1903 not can_merge()
1904 or info_dict.get('is_live', False)
1905 or self.outtmpl_dict['default'] == '-'))
1906 compat = (
1907 prefer_best
1908 or self.params.get('allow_multiple_audio_streams', False)
1909 or 'format-spec' in self.params.get('compat_opts', []))
1910
1911 return (
1912 'best/bestvideo+bestaudio' if prefer_best
1913 else 'bestvideo*+bestaudio/best' if not compat
1914 else 'bestvideo+bestaudio/best')
1915
1916 def build_format_selector(self, format_spec):
1917 def syntax_error(note, start):
1918 message = (
1919 'Invalid format specification: '
1920 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1921 return SyntaxError(message)
1922
1923 PICKFIRST = 'PICKFIRST'
1924 MERGE = 'MERGE'
1925 SINGLE = 'SINGLE'
1926 GROUP = 'GROUP'
1927 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1928
1929 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1930 'video': self.params.get('allow_multiple_video_streams', False)}
1931
1932 check_formats = self.params.get('check_formats') == 'selected'
1933
1934 def _parse_filter(tokens):
1935 filter_parts = []
1936 for type, string, start, _, _ in tokens:
1937 if type == tokenize.OP and string == ']':
1938 return ''.join(filter_parts)
1939 else:
1940 filter_parts.append(string)
1941
1942 def _remove_unused_ops(tokens):
1943 # Remove operators that we don't use and join them with the surrounding strings
1944 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1945 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1946 last_string, last_start, last_end, last_line = None, None, None, None
1947 for type, string, start, end, line in tokens:
1948 if type == tokenize.OP and string == '[':
1949 if last_string:
1950 yield tokenize.NAME, last_string, last_start, last_end, last_line
1951 last_string = None
1952 yield type, string, start, end, line
1953 # everything inside brackets will be handled by _parse_filter
1954 for type, string, start, end, line in tokens:
1955 yield type, string, start, end, line
1956 if type == tokenize.OP and string == ']':
1957 break
1958 elif type == tokenize.OP and string in ALLOWED_OPS:
1959 if last_string:
1960 yield tokenize.NAME, last_string, last_start, last_end, last_line
1961 last_string = None
1962 yield type, string, start, end, line
1963 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1964 if not last_string:
1965 last_string = string
1966 last_start = start
1967 last_end = end
1968 else:
1969 last_string += string
1970 if last_string:
1971 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972
1973 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1974 selectors = []
1975 current_selector = None
1976 for type, string, start, _, _ in tokens:
1977 # ENCODING is only defined in python 3.x
1978 if type == getattr(tokenize, 'ENCODING', None):
1979 continue
1980 elif type in [tokenize.NAME, tokenize.NUMBER]:
1981 current_selector = FormatSelector(SINGLE, string, [])
1982 elif type == tokenize.OP:
1983 if string == ')':
1984 if not inside_group:
1985 # ')' will be handled by the parentheses group
1986 tokens.restore_last_token()
1987 break
1988 elif inside_merge and string in ['/', ',']:
1989 tokens.restore_last_token()
1990 break
1991 elif inside_choice and string == ',':
1992 tokens.restore_last_token()
1993 break
1994 elif string == ',':
1995 if not current_selector:
1996 raise syntax_error('"," must follow a format selector', start)
1997 selectors.append(current_selector)
1998 current_selector = None
1999 elif string == '/':
2000 if not current_selector:
2001 raise syntax_error('"/" must follow a format selector', start)
2002 first_choice = current_selector
2003 second_choice = _parse_format_selection(tokens, inside_choice=True)
2004 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2005 elif string == '[':
2006 if not current_selector:
2007 current_selector = FormatSelector(SINGLE, 'best', [])
2008 format_filter = _parse_filter(tokens)
2009 current_selector.filters.append(format_filter)
2010 elif string == '(':
2011 if current_selector:
2012 raise syntax_error('Unexpected "("', start)
2013 group = _parse_format_selection(tokens, inside_group=True)
2014 current_selector = FormatSelector(GROUP, group, [])
2015 elif string == '+':
2016 if not current_selector:
2017 raise syntax_error('Unexpected "+"', start)
2018 selector_1 = current_selector
2019 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2020 if not selector_2:
2021 raise syntax_error('Expected a selector', start)
2022 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2023 else:
2024 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2025 elif type == tokenize.ENDMARKER:
2026 break
2027 if current_selector:
2028 selectors.append(current_selector)
2029 return selectors
2030
2031 def _merge(formats_pair):
2032 format_1, format_2 = formats_pair
2033
2034 formats_info = []
2035 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2036 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2037
2038 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2039 get_no_more = {'video': False, 'audio': False}
2040 for (i, fmt_info) in enumerate(formats_info):
2041 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2042 formats_info.pop(i)
2043 continue
2044 for aud_vid in ['audio', 'video']:
2045 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2046 if get_no_more[aud_vid]:
2047 formats_info.pop(i)
2048 break
2049 get_no_more[aud_vid] = True
2050
2051 if len(formats_info) == 1:
2052 return formats_info[0]
2053
2054 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2055 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2056
2057 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2058 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2059
2060 output_ext = self.params.get('merge_output_format')
2061 if not output_ext:
2062 if the_only_video:
2063 output_ext = the_only_video['ext']
2064 elif the_only_audio and not video_fmts:
2065 output_ext = the_only_audio['ext']
2066 else:
2067 output_ext = 'mkv'
2068
2069 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2070
2071 new_dict = {
2072 'requested_formats': formats_info,
2073 'format': '+'.join(filtered('format')),
2074 'format_id': '+'.join(filtered('format_id')),
2075 'ext': output_ext,
2076 'protocol': '+'.join(map(determine_protocol, formats_info)),
2077 'language': '+'.join(orderedSet(filtered('language'))) or None,
2078 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2079 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2080 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2081 }
2082
2083 if the_only_video:
2084 new_dict.update({
2085 'width': the_only_video.get('width'),
2086 'height': the_only_video.get('height'),
2087 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2088 'fps': the_only_video.get('fps'),
2089 'dynamic_range': the_only_video.get('dynamic_range'),
2090 'vcodec': the_only_video.get('vcodec'),
2091 'vbr': the_only_video.get('vbr'),
2092 'stretched_ratio': the_only_video.get('stretched_ratio'),
2093 })
2094
2095 if the_only_audio:
2096 new_dict.update({
2097 'acodec': the_only_audio.get('acodec'),
2098 'abr': the_only_audio.get('abr'),
2099 'asr': the_only_audio.get('asr'),
2100 })
2101
2102 return new_dict
2103
2104 def _check_formats(formats):
2105 if not check_formats:
2106 yield from formats
2107 return
2108 yield from self._check_formats(formats)
2109
2110 def _build_selector_function(selector):
2111 if isinstance(selector, list): # ,
2112 fs = [_build_selector_function(s) for s in selector]
2113
2114 def selector_function(ctx):
2115 for f in fs:
2116 yield from f(ctx)
2117 return selector_function
2118
2119 elif selector.type == GROUP: # ()
2120 selector_function = _build_selector_function(selector.selector)
2121
2122 elif selector.type == PICKFIRST: # /
2123 fs = [_build_selector_function(s) for s in selector.selector]
2124
2125 def selector_function(ctx):
2126 for f in fs:
2127 picked_formats = list(f(ctx))
2128 if picked_formats:
2129 return picked_formats
2130 return []
2131
2132 elif selector.type == MERGE: # +
2133 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2134
2135 def selector_function(ctx):
2136 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2137 yield _merge(pair)
2138
2139 elif selector.type == SINGLE: # atom
2140 format_spec = selector.selector or 'best'
2141
2142 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2143 if format_spec == 'all':
2144 def selector_function(ctx):
2145 yield from _check_formats(ctx['formats'][::-1])
2146 elif format_spec == 'mergeall':
2147 def selector_function(ctx):
2148 formats = list(_check_formats(ctx['formats']))
2149 if not formats:
2150 return
2151 merged_format = formats[-1]
2152 for f in formats[-2::-1]:
2153 merged_format = _merge((merged_format, f))
2154 yield merged_format
2155
2156 else:
2157 format_fallback, format_reverse, format_idx = False, True, 1
2158 mobj = re.match(
2159 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2160 format_spec)
2161 if mobj is not None:
2162 format_idx = int_or_none(mobj.group('n'), default=1)
2163 format_reverse = mobj.group('bw')[0] == 'b'
2164 format_type = (mobj.group('type') or [None])[0]
2165 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2166 format_modified = mobj.group('mod') is not None
2167
2168 format_fallback = not format_type and not format_modified # for b, w
2169 _filter_f = (
2170 (lambda f: f.get('%scodec' % format_type) != 'none')
2171 if format_type and format_modified # bv*, ba*, wv*, wa*
2172 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2173 if format_type # bv, ba, wv, wa
2174 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2175 if not format_modified # b, w
2176 else lambda f: True) # b*, w*
2177 filter_f = lambda f: _filter_f(f) and (
2178 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2179 else:
2180 if format_spec in self._format_selection_exts['audio']:
2181 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2182 elif format_spec in self._format_selection_exts['video']:
2183 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2184 elif format_spec in self._format_selection_exts['storyboards']:
2185 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2186 else:
2187 filter_f = lambda f: f.get('format_id') == format_spec # id
2188
2189 def selector_function(ctx):
2190 formats = list(ctx['formats'])
2191 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2192 if format_fallback and ctx['incomplete_formats'] and not matches:
2193 # for extractors with incomplete formats (audio only (soundcloud)
2194 # or video only (imgur)) best/worst will fallback to
2195 # best/worst {video,audio}-only format
2196 matches = formats
2197 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2198 try:
2199 yield matches[format_idx - 1]
2200 except IndexError:
2201 return
2202
2203 filters = [self._build_format_filter(f) for f in selector.filters]
2204
2205 def final_selector(ctx):
2206 ctx_copy = dict(ctx)
2207 for _filter in filters:
2208 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2209 return selector_function(ctx_copy)
2210 return final_selector
2211
2212 stream = io.BytesIO(format_spec.encode('utf-8'))
2213 try:
2214 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2215 except tokenize.TokenError:
2216 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2217
2218 class TokenIterator(object):
2219 def __init__(self, tokens):
2220 self.tokens = tokens
2221 self.counter = 0
2222
2223 def __iter__(self):
2224 return self
2225
2226 def __next__(self):
2227 if self.counter >= len(self.tokens):
2228 raise StopIteration()
2229 value = self.tokens[self.counter]
2230 self.counter += 1
2231 return value
2232
2233 next = __next__
2234
2235 def restore_last_token(self):
2236 self.counter -= 1
2237
2238 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2239 return _build_selector_function(parsed_selector)
2240
2241 def _calc_headers(self, info_dict):
2242 res = std_headers.copy()
2243
2244 add_headers = info_dict.get('http_headers')
2245 if add_headers:
2246 res.update(add_headers)
2247
2248 cookies = self._calc_cookies(info_dict)
2249 if cookies:
2250 res['Cookie'] = cookies
2251
2252 if 'X-Forwarded-For' not in res:
2253 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2254 if x_forwarded_for_ip:
2255 res['X-Forwarded-For'] = x_forwarded_for_ip
2256
2257 return res
2258
2259 def _calc_cookies(self, info_dict):
2260 pr = sanitized_Request(info_dict['url'])
2261 self.cookiejar.add_cookie_header(pr)
2262 return pr.get_header('Cookie')
2263
2264 def _sort_thumbnails(self, thumbnails):
2265 thumbnails.sort(key=lambda t: (
2266 t.get('preference') if t.get('preference') is not None else -1,
2267 t.get('width') if t.get('width') is not None else -1,
2268 t.get('height') if t.get('height') is not None else -1,
2269 t.get('id') if t.get('id') is not None else '',
2270 t.get('url')))
2271
2272 def _sanitize_thumbnails(self, info_dict):
2273 thumbnails = info_dict.get('thumbnails')
2274 if thumbnails is None:
2275 thumbnail = info_dict.get('thumbnail')
2276 if thumbnail:
2277 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2278 if not thumbnails:
2279 return
2280
2281 def check_thumbnails(thumbnails):
2282 for t in thumbnails:
2283 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2284 try:
2285 self.urlopen(HEADRequest(t['url']))
2286 except network_exceptions as err:
2287 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2288 continue
2289 yield t
2290
2291 self._sort_thumbnails(thumbnails)
2292 for i, t in enumerate(thumbnails):
2293 if t.get('id') is None:
2294 t['id'] = '%d' % i
2295 if t.get('width') and t.get('height'):
2296 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2297 t['url'] = sanitize_url(t['url'])
2298
2299 if self.params.get('check_formats') is True:
2300 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2301 else:
2302 info_dict['thumbnails'] = thumbnails
2303
2304 def process_video_result(self, info_dict, download=True):
2305 assert info_dict.get('_type', 'video') == 'video'
2306 self._num_videos += 1
2307
2308 if 'id' not in info_dict:
2309 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2310 elif not info_dict.get('id'):
2311 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2312 if 'title' not in info_dict:
2313 raise ExtractorError('Missing "title" field in extractor result',
2314 video_id=info_dict['id'], ie=info_dict['extractor'])
2315 elif not info_dict.get('title'):
2316 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2317 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2318
2319 def report_force_conversion(field, field_not, conversion):
2320 self.report_warning(
2321 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2322 % (field, field_not, conversion))
2323
2324 def sanitize_string_field(info, string_field):
2325 field = info.get(string_field)
2326 if field is None or isinstance(field, compat_str):
2327 return
2328 report_force_conversion(string_field, 'a string', 'string')
2329 info[string_field] = compat_str(field)
2330
2331 def sanitize_numeric_fields(info):
2332 for numeric_field in self._NUMERIC_FIELDS:
2333 field = info.get(numeric_field)
2334 if field is None or isinstance(field, compat_numeric_types):
2335 continue
2336 report_force_conversion(numeric_field, 'numeric', 'int')
2337 info[numeric_field] = int_or_none(field)
2338
2339 sanitize_string_field(info_dict, 'id')
2340 sanitize_numeric_fields(info_dict)
2341
2342 if 'playlist' not in info_dict:
2343 # It isn't part of a playlist
2344 info_dict['playlist'] = None
2345 info_dict['playlist_index'] = None
2346
2347 self._sanitize_thumbnails(info_dict)
2348
2349 thumbnail = info_dict.get('thumbnail')
2350 thumbnails = info_dict.get('thumbnails')
2351 if thumbnail:
2352 info_dict['thumbnail'] = sanitize_url(thumbnail)
2353 elif thumbnails:
2354 info_dict['thumbnail'] = thumbnails[-1]['url']
2355
2356 if info_dict.get('display_id') is None and 'id' in info_dict:
2357 info_dict['display_id'] = info_dict['id']
2358
2359 if info_dict.get('duration') is not None:
2360 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2361
2362 for ts_key, date_key in (
2363 ('timestamp', 'upload_date'),
2364 ('release_timestamp', 'release_date'),
2365 ('modified_timestamp', 'modified_date'),
2366 ):
2367 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2368 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2369 # see http://bugs.python.org/issue1646728)
2370 try:
2371 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2372 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2373 except (ValueError, OverflowError, OSError):
2374 pass
2375
2376 live_keys = ('is_live', 'was_live')
2377 live_status = info_dict.get('live_status')
2378 if live_status is None:
2379 for key in live_keys:
2380 if info_dict.get(key) is False:
2381 continue
2382 if info_dict.get(key):
2383 live_status = key
2384 break
2385 if all(info_dict.get(key) is False for key in live_keys):
2386 live_status = 'not_live'
2387 if live_status:
2388 info_dict['live_status'] = live_status
2389 for key in live_keys:
2390 if info_dict.get(key) is None:
2391 info_dict[key] = (live_status == key)
2392
2393 # Auto generate title fields corresponding to the *_number fields when missing
2394 # in order to always have clean titles. This is very common for TV series.
2395 for field in ('chapter', 'season', 'episode'):
2396 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2397 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2398
2399 for cc_kind in ('subtitles', 'automatic_captions'):
2400 cc = info_dict.get(cc_kind)
2401 if cc:
2402 for _, subtitle in cc.items():
2403 for subtitle_format in subtitle:
2404 if subtitle_format.get('url'):
2405 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2406 if subtitle_format.get('ext') is None:
2407 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2408
2409 automatic_captions = info_dict.get('automatic_captions')
2410 subtitles = info_dict.get('subtitles')
2411
2412 info_dict['requested_subtitles'] = self.process_subtitles(
2413 info_dict['id'], subtitles, automatic_captions)
2414
2415 if info_dict.get('formats') is None:
2416 # There's only one format available
2417 formats = [info_dict]
2418 else:
2419 formats = info_dict['formats']
2420
2421 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2422 if not self.params.get('allow_unplayable_formats'):
2423 formats = [f for f in formats if not f.get('has_drm')]
2424
2425 # backward compatibility
2426 info_dict['fulltitle'] = info_dict['title']
2427
2428 if info_dict.get('is_live'):
2429 get_from_start = bool(self.params.get('live_from_start'))
2430 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2431 if not get_from_start:
2432 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2433
2434 if not formats:
2435 self.raise_no_formats(info_dict)
2436
2437 def is_wellformed(f):
2438 url = f.get('url')
2439 if not url:
2440 self.report_warning(
2441 '"url" field is missing or empty - skipping format, '
2442 'there is an error in extractor')
2443 return False
2444 if isinstance(url, bytes):
2445 sanitize_string_field(f, 'url')
2446 return True
2447
2448 # Filter out malformed formats for better extraction robustness
2449 formats = list(filter(is_wellformed, formats))
2450
2451 formats_dict = {}
2452
2453 # We check that all the formats have the format and format_id fields
2454 for i, format in enumerate(formats):
2455 sanitize_string_field(format, 'format_id')
2456 sanitize_numeric_fields(format)
2457 format['url'] = sanitize_url(format['url'])
2458 if not format.get('format_id'):
2459 format['format_id'] = compat_str(i)
2460 else:
2461 # Sanitize format_id from characters used in format selector expression
2462 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2463 format_id = format['format_id']
2464 if format_id not in formats_dict:
2465 formats_dict[format_id] = []
2466 formats_dict[format_id].append(format)
2467
2468 # Make sure all formats have unique format_id
2469 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2470 for format_id, ambiguous_formats in formats_dict.items():
2471 ambigious_id = len(ambiguous_formats) > 1
2472 for i, format in enumerate(ambiguous_formats):
2473 if ambigious_id:
2474 format['format_id'] = '%s-%d' % (format_id, i)
2475 if format.get('ext') is None:
2476 format['ext'] = determine_ext(format['url']).lower()
2477 # Ensure there is no conflict between id and ext in format selection
2478 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2479 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2480 format['format_id'] = 'f%s' % format['format_id']
2481
2482 for i, format in enumerate(formats):
2483 if format.get('format') is None:
2484 format['format'] = '{id} - {res}{note}'.format(
2485 id=format['format_id'],
2486 res=self.format_resolution(format),
2487 note=format_field(format, 'format_note', ' (%s)'),
2488 )
2489 if format.get('protocol') is None:
2490 format['protocol'] = determine_protocol(format)
2491 if format.get('resolution') is None:
2492 format['resolution'] = self.format_resolution(format, default=None)
2493 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2494 format['dynamic_range'] = 'SDR'
2495 if (info_dict.get('duration') and format.get('tbr')
2496 and not format.get('filesize') and not format.get('filesize_approx')):
2497 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2498
2499 # Add HTTP headers, so that external programs can use them from the
2500 # json output
2501 full_format_info = info_dict.copy()
2502 full_format_info.update(format)
2503 format['http_headers'] = self._calc_headers(full_format_info)
2504 # Remove private housekeeping stuff
2505 if '__x_forwarded_for_ip' in info_dict:
2506 del info_dict['__x_forwarded_for_ip']
2507
2508 # TODO Central sorting goes here
2509
2510 if self.params.get('check_formats') is True:
2511 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2512
2513 if not formats or formats[0] is not info_dict:
2514 # only set the 'formats' fields if the original info_dict list them
2515 # otherwise we end up with a circular reference, the first (and unique)
2516 # element in the 'formats' field in info_dict is info_dict itself,
2517 # which can't be exported to json
2518 info_dict['formats'] = formats
2519
2520 info_dict, _ = self.pre_process(info_dict)
2521
2522 # The pre-processors may have modified the formats
2523 formats = info_dict.get('formats', [info_dict])
2524
2525 list_only = self.params.get('simulate') is None and (
2526 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2527 interactive_format_selection = not list_only and self.format_selector == '-'
2528 if self.params.get('list_thumbnails'):
2529 self.list_thumbnails(info_dict)
2530 if self.params.get('listsubtitles'):
2531 if 'automatic_captions' in info_dict:
2532 self.list_subtitles(
2533 info_dict['id'], automatic_captions, 'automatic captions')
2534 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2535 if self.params.get('listformats') or interactive_format_selection:
2536 self.list_formats(info_dict)
2537 if list_only:
2538 # Without this printing, -F --print-json will not work
2539 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2540 return
2541
2542 format_selector = self.format_selector
2543 if format_selector is None:
2544 req_format = self._default_format_spec(info_dict, download=download)
2545 self.write_debug('Default format spec: %s' % req_format)
2546 format_selector = self.build_format_selector(req_format)
2547
2548 while True:
2549 if interactive_format_selection:
2550 req_format = input(
2551 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2552 try:
2553 format_selector = self.build_format_selector(req_format)
2554 except SyntaxError as err:
2555 self.report_error(err, tb=False, is_error=False)
2556 continue
2557
2558 # While in format selection we may need to have an access to the original
2559 # format set in order to calculate some metrics or do some processing.
2560 # For now we need to be able to guess whether original formats provided
2561 # by extractor are incomplete or not (i.e. whether extractor provides only
2562 # video-only or audio-only formats) for proper formats selection for
2563 # extractors with such incomplete formats (see
2564 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2565 # Since formats may be filtered during format selection and may not match
2566 # the original formats the results may be incorrect. Thus original formats
2567 # or pre-calculated metrics should be passed to format selection routines
2568 # as well.
2569 # We will pass a context object containing all necessary additional data
2570 # instead of just formats.
2571 # This fixes incorrect format selection issue (see
2572 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2573 incomplete_formats = (
2574 # All formats are video-only or
2575 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2576 # all formats are audio-only
2577 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2578
2579 ctx = {
2580 'formats': formats,
2581 'incomplete_formats': incomplete_formats,
2582 }
2583
2584 formats_to_download = list(format_selector(ctx))
2585 if interactive_format_selection and not formats_to_download:
2586 self.report_error('Requested format is not available', tb=False, is_error=False)
2587 continue
2588 break
2589
2590 if not formats_to_download:
2591 if not self.params.get('ignore_no_formats_error'):
2592 raise ExtractorError('Requested format is not available', expected=True,
2593 video_id=info_dict['id'], ie=info_dict['extractor'])
2594 self.report_warning('Requested format is not available')
2595 # Process what we can, even without any available formats.
2596 formats_to_download = [{}]
2597
2598 best_format = formats_to_download[-1]
2599 if download:
2600 if best_format:
2601 self.to_screen(
2602 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2603 + ', '.join([f['format_id'] for f in formats_to_download]))
2604 max_downloads_reached = False
2605 for i, fmt in enumerate(formats_to_download):
2606 formats_to_download[i] = new_info = dict(info_dict)
2607 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2608 new_info.update(fmt)
2609 new_info['__original_infodict'] = info_dict
2610 try:
2611 self.process_info(new_info)
2612 except MaxDownloadsReached:
2613 max_downloads_reached = True
2614 new_info.pop('__original_infodict')
2615 # Remove copied info
2616 for key, val in tuple(new_info.items()):
2617 if info_dict.get(key) == val:
2618 new_info.pop(key)
2619 if max_downloads_reached:
2620 break
2621
2622 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2623 assert write_archive.issubset({True, False, 'ignore'})
2624 if True in write_archive and False not in write_archive:
2625 self.record_download_archive(info_dict)
2626
2627 info_dict['requested_downloads'] = formats_to_download
2628 info_dict = self.run_all_pps('after_video', info_dict)
2629 if max_downloads_reached:
2630 raise MaxDownloadsReached()
2631
2632 # We update the info dict with the selected best quality format (backwards compatibility)
2633 info_dict.update(best_format)
2634 return info_dict
2635
2636 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2637 """Select the requested subtitles and their format"""
2638 available_subs = {}
2639 if normal_subtitles and self.params.get('writesubtitles'):
2640 available_subs.update(normal_subtitles)
2641 if automatic_captions and self.params.get('writeautomaticsub'):
2642 for lang, cap_info in automatic_captions.items():
2643 if lang not in available_subs:
2644 available_subs[lang] = cap_info
2645
2646 if (not self.params.get('writesubtitles') and not
2647 self.params.get('writeautomaticsub') or not
2648 available_subs):
2649 return None
2650
2651 all_sub_langs = available_subs.keys()
2652 if self.params.get('allsubtitles', False):
2653 requested_langs = all_sub_langs
2654 elif self.params.get('subtitleslangs', False):
2655 # A list is used so that the order of languages will be the same as
2656 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2657 requested_langs = []
2658 for lang_re in self.params.get('subtitleslangs'):
2659 if lang_re == 'all':
2660 requested_langs.extend(all_sub_langs)
2661 continue
2662 discard = lang_re[0] == '-'
2663 if discard:
2664 lang_re = lang_re[1:]
2665 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2666 if discard:
2667 for lang in current_langs:
2668 while lang in requested_langs:
2669 requested_langs.remove(lang)
2670 else:
2671 requested_langs.extend(current_langs)
2672 requested_langs = orderedSet(requested_langs)
2673 elif 'en' in available_subs:
2674 requested_langs = ['en']
2675 else:
2676 requested_langs = [list(all_sub_langs)[0]]
2677 if requested_langs:
2678 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2679
2680 formats_query = self.params.get('subtitlesformat', 'best')
2681 formats_preference = formats_query.split('/') if formats_query else []
2682 subs = {}
2683 for lang in requested_langs:
2684 formats = available_subs.get(lang)
2685 if formats is None:
2686 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2687 continue
2688 for ext in formats_preference:
2689 if ext == 'best':
2690 f = formats[-1]
2691 break
2692 matches = list(filter(lambda f: f['ext'] == ext, formats))
2693 if matches:
2694 f = matches[-1]
2695 break
2696 else:
2697 f = formats[-1]
2698 self.report_warning(
2699 'No subtitle format found matching "%s" for language %s, '
2700 'using %s' % (formats_query, lang, f['ext']))
2701 subs[lang] = f
2702 return subs
2703
2704 def _forceprint(self, key, info_dict):
2705 if info_dict is None:
2706 return
2707 info_copy = info_dict.copy()
2708 info_copy['formats_table'] = self.render_formats_table(info_dict)
2709 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2710 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2711 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2712
2713 def format_tmpl(tmpl):
2714 mobj = re.match(r'\w+(=?)$', tmpl)
2715 if mobj and mobj.group(1):
2716 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2717 elif mobj:
2718 return f'%({tmpl})s'
2719 return tmpl
2720
2721 for tmpl in self.params['forceprint'].get(key, []):
2722 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2723
2724 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2725 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2726 tmpl = format_tmpl(tmpl)
2727 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2728 with io.open(filename, 'a', encoding='utf-8') as f:
2729 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2730
2731 def __forced_printings(self, info_dict, filename, incomplete):
2732 def print_mandatory(field, actual_field=None):
2733 if actual_field is None:
2734 actual_field = field
2735 if (self.params.get('force%s' % field, False)
2736 and (not incomplete or info_dict.get(actual_field) is not None)):
2737 self.to_stdout(info_dict[actual_field])
2738
2739 def print_optional(field):
2740 if (self.params.get('force%s' % field, False)
2741 and info_dict.get(field) is not None):
2742 self.to_stdout(info_dict[field])
2743
2744 info_dict = info_dict.copy()
2745 if filename is not None:
2746 info_dict['filename'] = filename
2747 if info_dict.get('requested_formats') is not None:
2748 # For RTMP URLs, also include the playpath
2749 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2750 elif 'url' in info_dict:
2751 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2752
2753 if (self.params.get('forcejson')
2754 or self.params['forceprint'].get('video')
2755 or self.params['print_to_file'].get('video')):
2756 self.post_extract(info_dict)
2757 self._forceprint('video', info_dict)
2758
2759 print_mandatory('title')
2760 print_mandatory('id')
2761 print_mandatory('url', 'urls')
2762 print_optional('thumbnail')
2763 print_optional('description')
2764 print_optional('filename')
2765 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2766 self.to_stdout(formatSeconds(info_dict['duration']))
2767 print_mandatory('format')
2768
2769 if self.params.get('forcejson'):
2770 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2771
2772 def dl(self, name, info, subtitle=False, test=False):
2773 if not info.get('url'):
2774 self.raise_no_formats(info, True)
2775
2776 if test:
2777 verbose = self.params.get('verbose')
2778 params = {
2779 'test': True,
2780 'quiet': self.params.get('quiet') or not verbose,
2781 'verbose': verbose,
2782 'noprogress': not verbose,
2783 'nopart': True,
2784 'skip_unavailable_fragments': False,
2785 'keep_fragments': False,
2786 'overwrites': True,
2787 '_no_ytdl_file': True,
2788 }
2789 else:
2790 params = self.params
2791 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2792 if not test:
2793 for ph in self._progress_hooks:
2794 fd.add_progress_hook(ph)
2795 urls = '", "'.join(
2796 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2797 for f in info.get('requested_formats', []) or [info])
2798 self.write_debug('Invoking downloader on "%s"' % urls)
2799
2800 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2801 # But it may contain objects that are not deep-copyable
2802 new_info = self._copy_infodict(info)
2803 if new_info.get('http_headers') is None:
2804 new_info['http_headers'] = self._calc_headers(new_info)
2805 return fd.download(name, new_info, subtitle)
2806
2807 def existing_file(self, filepaths, *, default_overwrite=True):
2808 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2809 if existing_files and not self.params.get('overwrites', default_overwrite):
2810 return existing_files[0]
2811
2812 for file in existing_files:
2813 self.report_file_delete(file)
2814 os.remove(file)
2815 return None
2816
2817 def process_info(self, info_dict):
2818 """Process a single resolved IE result. (Modified it in-place)"""
2819
2820 assert info_dict.get('_type', 'video') == 'video'
2821 original_infodict = info_dict
2822
2823 if 'format' not in info_dict and 'ext' in info_dict:
2824 info_dict['format'] = info_dict['ext']
2825
2826 if self._match_entry(info_dict) is not None:
2827 info_dict['__write_download_archive'] = 'ignore'
2828 return
2829
2830 self.post_extract(info_dict)
2831 self._num_downloads += 1
2832
2833 # info_dict['_filename'] needs to be set for backward compatibility
2834 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2835 temp_filename = self.prepare_filename(info_dict, 'temp')
2836 files_to_move = {}
2837
2838 # Forced printings
2839 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2840
2841 if self.params.get('simulate'):
2842 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2843 return
2844
2845 if full_filename is None:
2846 return
2847 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2848 return
2849 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2850 return
2851
2852 if self._write_description('video', info_dict,
2853 self.prepare_filename(info_dict, 'description')) is None:
2854 return
2855
2856 sub_files = self._write_subtitles(info_dict, temp_filename)
2857 if sub_files is None:
2858 return
2859 files_to_move.update(dict(sub_files))
2860
2861 thumb_files = self._write_thumbnails(
2862 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2863 if thumb_files is None:
2864 return
2865 files_to_move.update(dict(thumb_files))
2866
2867 infofn = self.prepare_filename(info_dict, 'infojson')
2868 _infojson_written = self._write_info_json('video', info_dict, infofn)
2869 if _infojson_written:
2870 info_dict['infojson_filename'] = infofn
2871 # For backward compatibility, even though it was a private field
2872 info_dict['__infojson_filename'] = infofn
2873 elif _infojson_written is None:
2874 return
2875
2876 # Note: Annotations are deprecated
2877 annofn = None
2878 if self.params.get('writeannotations', False):
2879 annofn = self.prepare_filename(info_dict, 'annotation')
2880 if annofn:
2881 if not self._ensure_dir_exists(encodeFilename(annofn)):
2882 return
2883 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2884 self.to_screen('[info] Video annotations are already present')
2885 elif not info_dict.get('annotations'):
2886 self.report_warning('There are no annotations to write.')
2887 else:
2888 try:
2889 self.to_screen('[info] Writing video annotations to: ' + annofn)
2890 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2891 annofile.write(info_dict['annotations'])
2892 except (KeyError, TypeError):
2893 self.report_warning('There are no annotations to write.')
2894 except (OSError, IOError):
2895 self.report_error('Cannot write annotations file: ' + annofn)
2896 return
2897
2898 # Write internet shortcut files
2899 def _write_link_file(link_type):
2900 if 'webpage_url' not in info_dict:
2901 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2902 return False
2903 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2904 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2905 return False
2906 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2907 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2908 return True
2909 try:
2910 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2911 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2912 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2913 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2914 if link_type == 'desktop':
2915 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2916 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2917 except (OSError, IOError):
2918 self.report_error(f'Cannot write internet shortcut {linkfn}')
2919 return False
2920 return True
2921
2922 write_links = {
2923 'url': self.params.get('writeurllink'),
2924 'webloc': self.params.get('writewebloclink'),
2925 'desktop': self.params.get('writedesktoplink'),
2926 }
2927 if self.params.get('writelink'):
2928 link_type = ('webloc' if sys.platform == 'darwin'
2929 else 'desktop' if sys.platform.startswith('linux')
2930 else 'url')
2931 write_links[link_type] = True
2932
2933 if any(should_write and not _write_link_file(link_type)
2934 for link_type, should_write in write_links.items()):
2935 return
2936
2937 def replace_info_dict(new_info):
2938 nonlocal info_dict
2939 if new_info == info_dict:
2940 return
2941 info_dict.clear()
2942 info_dict.update(new_info)
2943
2944 try:
2945 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2946 replace_info_dict(new_info)
2947 except PostProcessingError as err:
2948 self.report_error('Preprocessing: %s' % str(err))
2949 return
2950
2951 if self.params.get('skip_download'):
2952 info_dict['filepath'] = temp_filename
2953 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2954 info_dict['__files_to_move'] = files_to_move
2955 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2956 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2957 else:
2958 # Download
2959 info_dict.setdefault('__postprocessors', [])
2960 try:
2961
2962 def existing_video_file(*filepaths):
2963 ext = info_dict.get('ext')
2964 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2965 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2966 default_overwrite=False)
2967 if file:
2968 info_dict['ext'] = os.path.splitext(file)[1][1:]
2969 return file
2970
2971 success = True
2972 if info_dict.get('requested_formats') is not None:
2973
2974 def compatible_formats(formats):
2975 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2976 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2977 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2978 if len(video_formats) > 2 or len(audio_formats) > 2:
2979 return False
2980
2981 # Check extension
2982 exts = set(format.get('ext') for format in formats)
2983 COMPATIBLE_EXTS = (
2984 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2985 set(('webm',)),
2986 )
2987 for ext_sets in COMPATIBLE_EXTS:
2988 if ext_sets.issuperset(exts):
2989 return True
2990 # TODO: Check acodec/vcodec
2991 return False
2992
2993 requested_formats = info_dict['requested_formats']
2994 old_ext = info_dict['ext']
2995 if self.params.get('merge_output_format') is None:
2996 if not compatible_formats(requested_formats):
2997 info_dict['ext'] = 'mkv'
2998 self.report_warning(
2999 'Requested formats are incompatible for merge and will be merged into mkv')
3000 if (info_dict['ext'] == 'webm'
3001 and info_dict.get('thumbnails')
3002 # check with type instead of pp_key, __name__, or isinstance
3003 # since we dont want any custom PPs to trigger this
3004 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3005 info_dict['ext'] = 'mkv'
3006 self.report_warning(
3007 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3008 new_ext = info_dict['ext']
3009
3010 def correct_ext(filename, ext=new_ext):
3011 if filename == '-':
3012 return filename
3013 filename_real_ext = os.path.splitext(filename)[1][1:]
3014 filename_wo_ext = (
3015 os.path.splitext(filename)[0]
3016 if filename_real_ext in (old_ext, new_ext)
3017 else filename)
3018 return '%s.%s' % (filename_wo_ext, ext)
3019
3020 # Ensure filename always has a correct extension for successful merge
3021 full_filename = correct_ext(full_filename)
3022 temp_filename = correct_ext(temp_filename)
3023 dl_filename = existing_video_file(full_filename, temp_filename)
3024 info_dict['__real_download'] = False
3025
3026 downloaded = []
3027 merger = FFmpegMergerPP(self)
3028
3029 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3030 if dl_filename is not None:
3031 self.report_file_already_downloaded(dl_filename)
3032 elif fd:
3033 for f in requested_formats if fd != FFmpegFD else []:
3034 f['filepath'] = fname = prepend_extension(
3035 correct_ext(temp_filename, info_dict['ext']),
3036 'f%s' % f['format_id'], info_dict['ext'])
3037 downloaded.append(fname)
3038 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3039 success, real_download = self.dl(temp_filename, info_dict)
3040 info_dict['__real_download'] = real_download
3041 else:
3042 if self.params.get('allow_unplayable_formats'):
3043 self.report_warning(
3044 'You have requested merging of multiple formats '
3045 'while also allowing unplayable formats to be downloaded. '
3046 'The formats won\'t be merged to prevent data corruption.')
3047 elif not merger.available:
3048 self.report_warning(
3049 'You have requested merging of multiple formats but ffmpeg is not installed. '
3050 'The formats won\'t be merged.')
3051
3052 if temp_filename == '-':
3053 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3054 else 'but the formats are incompatible for simultaneous download' if merger.available
3055 else 'but ffmpeg is not installed')
3056 self.report_warning(
3057 f'You have requested downloading multiple formats to stdout {reason}. '
3058 'The formats will be streamed one after the other')
3059 fname = temp_filename
3060 for f in requested_formats:
3061 new_info = dict(info_dict)
3062 del new_info['requested_formats']
3063 new_info.update(f)
3064 if temp_filename != '-':
3065 fname = prepend_extension(
3066 correct_ext(temp_filename, new_info['ext']),
3067 'f%s' % f['format_id'], new_info['ext'])
3068 if not self._ensure_dir_exists(fname):
3069 return
3070 f['filepath'] = fname
3071 downloaded.append(fname)
3072 partial_success, real_download = self.dl(fname, new_info)
3073 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3074 success = success and partial_success
3075
3076 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3077 info_dict['__postprocessors'].append(merger)
3078 info_dict['__files_to_merge'] = downloaded
3079 # Even if there were no downloads, it is being merged only now
3080 info_dict['__real_download'] = True
3081 else:
3082 for file in downloaded:
3083 files_to_move[file] = None
3084 else:
3085 # Just a single file
3086 dl_filename = existing_video_file(full_filename, temp_filename)
3087 if dl_filename is None or dl_filename == temp_filename:
3088 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3089 # So we should try to resume the download
3090 success, real_download = self.dl(temp_filename, info_dict)
3091 info_dict['__real_download'] = real_download
3092 else:
3093 self.report_file_already_downloaded(dl_filename)
3094
3095 dl_filename = dl_filename or temp_filename
3096 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3097
3098 except network_exceptions as err:
3099 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3100 return
3101 except (OSError, IOError) as err:
3102 raise UnavailableVideoError(err)
3103 except (ContentTooShortError, ) as err:
3104 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3105 return
3106
3107 if success and full_filename != '-':
3108
3109 def fixup():
3110 do_fixup = True
3111 fixup_policy = self.params.get('fixup')
3112 vid = info_dict['id']
3113
3114 if fixup_policy in ('ignore', 'never'):
3115 return
3116 elif fixup_policy == 'warn':
3117 do_fixup = False
3118 elif fixup_policy != 'force':
3119 assert fixup_policy in ('detect_or_warn', None)
3120 if not info_dict.get('__real_download'):
3121 do_fixup = False
3122
3123 def ffmpeg_fixup(cndn, msg, cls):
3124 if not cndn:
3125 return
3126 if not do_fixup:
3127 self.report_warning(f'{vid}: {msg}')
3128 return
3129 pp = cls(self)
3130 if pp.available:
3131 info_dict['__postprocessors'].append(pp)
3132 else:
3133 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3134
3135 stretched_ratio = info_dict.get('stretched_ratio')
3136 ffmpeg_fixup(
3137 stretched_ratio not in (1, None),
3138 f'Non-uniform pixel ratio {stretched_ratio}',
3139 FFmpegFixupStretchedPP)
3140
3141 ffmpeg_fixup(
3142 (info_dict.get('requested_formats') is None
3143 and info_dict.get('container') == 'm4a_dash'
3144 and info_dict.get('ext') == 'm4a'),
3145 'writing DASH m4a. Only some players support this container',
3146 FFmpegFixupM4aPP)
3147
3148 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3149 downloader = downloader.__name__ if downloader else None
3150
3151 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3152 ffmpeg_fixup(downloader == 'HlsFD',
3153 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3154 FFmpegFixupM3u8PP)
3155 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3156 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3157
3158 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3159 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3160
3161 fixup()
3162 try:
3163 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3164 except PostProcessingError as err:
3165 self.report_error('Postprocessing: %s' % str(err))
3166 return
3167 try:
3168 for ph in self._post_hooks:
3169 ph(info_dict['filepath'])
3170 except Exception as err:
3171 self.report_error('post hooks: %s' % str(err))
3172 return
3173 info_dict['__write_download_archive'] = True
3174
3175 if self.params.get('force_write_download_archive'):
3176 info_dict['__write_download_archive'] = True
3177
3178 # Make sure the info_dict was modified in-place
3179 assert info_dict is original_infodict
3180
3181 max_downloads = self.params.get('max_downloads')
3182 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3183 raise MaxDownloadsReached()
3184
3185 def __download_wrapper(self, func):
3186 @functools.wraps(func)
3187 def wrapper(*args, **kwargs):
3188 try:
3189 res = func(*args, **kwargs)
3190 except UnavailableVideoError as e:
3191 self.report_error(e)
3192 except MaxDownloadsReached as e:
3193 self.to_screen(f'[info] {e}')
3194 raise
3195 except DownloadCancelled as e:
3196 self.to_screen(f'[info] {e}')
3197 if not self.params.get('break_per_url'):
3198 raise
3199 else:
3200 if self.params.get('dump_single_json', False):
3201 self.post_extract(res)
3202 self.to_stdout(json.dumps(self.sanitize_info(res)))
3203 return wrapper
3204
3205 def download(self, url_list):
3206 """Download a given list of URLs."""
3207 url_list = variadic(url_list) # Passing a single URL is a common mistake
3208 outtmpl = self.outtmpl_dict['default']
3209 if (len(url_list) > 1
3210 and outtmpl != '-'
3211 and '%' not in outtmpl
3212 and self.params.get('max_downloads') != 1):
3213 raise SameFileError(outtmpl)
3214
3215 for url in url_list:
3216 self.__download_wrapper(self.extract_info)(
3217 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3218
3219 return self._download_retcode
3220
3221 def download_with_info_file(self, info_filename):
3222 with contextlib.closing(fileinput.FileInput(
3223 [info_filename], mode='r',
3224 openhook=fileinput.hook_encoded('utf-8'))) as f:
3225 # FileInput doesn't have a read method, we can't call json.load
3226 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3227 try:
3228 self.__download_wrapper(self.process_ie_result)(info, download=True)
3229 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3230 if not isinstance(e, EntryNotInPlaylist):
3231 self.to_stderr('\r')
3232 webpage_url = info.get('webpage_url')
3233 if webpage_url is not None:
3234 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3235 return self.download([webpage_url])
3236 else:
3237 raise
3238 return self._download_retcode
3239
3240 @staticmethod
3241 def sanitize_info(info_dict, remove_private_keys=False):
3242 ''' Sanitize the infodict for converting to json '''
3243 if info_dict is None:
3244 return info_dict
3245 info_dict.setdefault('epoch', int(time.time()))
3246 info_dict.setdefault('_type', 'video')
3247 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
3248 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
3249 if remove_private_keys:
3250 remove_keys |= {
3251 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3252 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3253 }
3254 reject = lambda k, v: k not in keep_keys and (
3255 k.startswith('_') or k in remove_keys or v is None)
3256 else:
3257 reject = lambda k, v: k in remove_keys
3258
3259 def filter_fn(obj):
3260 if isinstance(obj, dict):
3261 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3262 elif isinstance(obj, (list, tuple, set, LazyList)):
3263 return list(map(filter_fn, obj))
3264 elif obj is None or isinstance(obj, (str, int, float, bool)):
3265 return obj
3266 else:
3267 return repr(obj)
3268
3269 return filter_fn(info_dict)
3270
3271 @staticmethod
3272 def filter_requested_info(info_dict, actually_filter=True):
3273 ''' Alias of sanitize_info for backward compatibility '''
3274 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3275
3276 @staticmethod
3277 def post_extract(info_dict):
3278 def actual_post_extract(info_dict):
3279 if info_dict.get('_type') in ('playlist', 'multi_video'):
3280 for video_dict in info_dict.get('entries', {}):
3281 actual_post_extract(video_dict or {})
3282 return
3283
3284 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3285 extra = post_extractor().items()
3286 info_dict.update(extra)
3287 info_dict.pop('__post_extractor', None)
3288
3289 original_infodict = info_dict.get('__original_infodict') or {}
3290 original_infodict.update(extra)
3291 original_infodict.pop('__post_extractor', None)
3292
3293 actual_post_extract(info_dict or {})
3294
3295 def run_pp(self, pp, infodict):
3296 files_to_delete = []
3297 if '__files_to_move' not in infodict:
3298 infodict['__files_to_move'] = {}
3299 try:
3300 files_to_delete, infodict = pp.run(infodict)
3301 except PostProcessingError as e:
3302 # Must be True and not 'only_download'
3303 if self.params.get('ignoreerrors') is True:
3304 self.report_error(e)
3305 return infodict
3306 raise
3307
3308 if not files_to_delete:
3309 return infodict
3310 if self.params.get('keepvideo', False):
3311 for f in files_to_delete:
3312 infodict['__files_to_move'].setdefault(f, '')
3313 else:
3314 for old_filename in set(files_to_delete):
3315 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3316 try:
3317 os.remove(encodeFilename(old_filename))
3318 except (IOError, OSError):
3319 self.report_warning('Unable to remove downloaded original file')
3320 if old_filename in infodict['__files_to_move']:
3321 del infodict['__files_to_move'][old_filename]
3322 return infodict
3323
3324 def run_all_pps(self, key, info, *, additional_pps=None):
3325 self._forceprint(key, info)
3326 for pp in (additional_pps or []) + self._pps[key]:
3327 info = self.run_pp(pp, info)
3328 return info
3329
3330 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3331 info = dict(ie_info)
3332 info['__files_to_move'] = files_to_move or {}
3333 info = self.run_all_pps(key, info)
3334 return info, info.pop('__files_to_move', None)
3335
3336 def post_process(self, filename, info, files_to_move=None):
3337 """Run all the postprocessors on the given file."""
3338 info['filepath'] = filename
3339 info['__files_to_move'] = files_to_move or {}
3340 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3341 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3342 del info['__files_to_move']
3343 return self.run_all_pps('after_move', info)
3344
3345 def _make_archive_id(self, info_dict):
3346 video_id = info_dict.get('id')
3347 if not video_id:
3348 return
3349 # Future-proof against any change in case
3350 # and backwards compatibility with prior versions
3351 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3352 if extractor is None:
3353 url = str_or_none(info_dict.get('url'))
3354 if not url:
3355 return
3356 # Try to find matching extractor for the URL and take its ie_key
3357 for ie_key, ie in self._ies.items():
3358 if ie.suitable(url):
3359 extractor = ie_key
3360 break
3361 else:
3362 return
3363 return '%s %s' % (extractor.lower(), video_id)
3364
3365 def in_download_archive(self, info_dict):
3366 fn = self.params.get('download_archive')
3367 if fn is None:
3368 return False
3369
3370 vid_id = self._make_archive_id(info_dict)
3371 if not vid_id:
3372 return False # Incomplete video information
3373
3374 return vid_id in self.archive
3375
3376 def record_download_archive(self, info_dict):
3377 fn = self.params.get('download_archive')
3378 if fn is None:
3379 return
3380 vid_id = self._make_archive_id(info_dict)
3381 assert vid_id
3382 self.write_debug(f'Adding to archive: {vid_id}')
3383 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3384 archive_file.write(vid_id + '\n')
3385 self.archive.add(vid_id)
3386
3387 @staticmethod
3388 def format_resolution(format, default='unknown'):
3389 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3390 return 'audio only'
3391 if format.get('resolution') is not None:
3392 return format['resolution']
3393 if format.get('width') and format.get('height'):
3394 return '%dx%d' % (format['width'], format['height'])
3395 elif format.get('height'):
3396 return '%sp' % format['height']
3397 elif format.get('width'):
3398 return '%dx?' % format['width']
3399 return default
3400
3401 def _list_format_headers(self, *headers):
3402 if self.params.get('listformats_table', True) is not False:
3403 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3404 return headers
3405
3406 def _format_note(self, fdict):
3407 res = ''
3408 if fdict.get('ext') in ['f4f', 'f4m']:
3409 res += '(unsupported)'
3410 if fdict.get('language'):
3411 if res:
3412 res += ' '
3413 res += '[%s]' % fdict['language']
3414 if fdict.get('format_note') is not None:
3415 if res:
3416 res += ' '
3417 res += fdict['format_note']
3418 if fdict.get('tbr') is not None:
3419 if res:
3420 res += ', '
3421 res += '%4dk' % fdict['tbr']
3422 if fdict.get('container') is not None:
3423 if res:
3424 res += ', '
3425 res += '%s container' % fdict['container']
3426 if (fdict.get('vcodec') is not None
3427 and fdict.get('vcodec') != 'none'):
3428 if res:
3429 res += ', '
3430 res += fdict['vcodec']
3431 if fdict.get('vbr') is not None:
3432 res += '@'
3433 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3434 res += 'video@'
3435 if fdict.get('vbr') is not None:
3436 res += '%4dk' % fdict['vbr']
3437 if fdict.get('fps') is not None:
3438 if res:
3439 res += ', '
3440 res += '%sfps' % fdict['fps']
3441 if fdict.get('acodec') is not None:
3442 if res:
3443 res += ', '
3444 if fdict['acodec'] == 'none':
3445 res += 'video only'
3446 else:
3447 res += '%-5s' % fdict['acodec']
3448 elif fdict.get('abr') is not None:
3449 if res:
3450 res += ', '
3451 res += 'audio'
3452 if fdict.get('abr') is not None:
3453 res += '@%3dk' % fdict['abr']
3454 if fdict.get('asr') is not None:
3455 res += ' (%5dHz)' % fdict['asr']
3456 if fdict.get('filesize') is not None:
3457 if res:
3458 res += ', '
3459 res += format_bytes(fdict['filesize'])
3460 elif fdict.get('filesize_approx') is not None:
3461 if res:
3462 res += ', '
3463 res += '~' + format_bytes(fdict['filesize_approx'])
3464 return res
3465
3466 def render_formats_table(self, info_dict):
3467 if not info_dict.get('formats') and not info_dict.get('url'):
3468 return None
3469
3470 formats = info_dict.get('formats', [info_dict])
3471 if not self.params.get('listformats_table', True) is not False:
3472 table = [
3473 [
3474 format_field(f, 'format_id'),
3475 format_field(f, 'ext'),
3476 self.format_resolution(f),
3477 self._format_note(f)
3478 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3479 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3480
3481 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3482 table = [
3483 [
3484 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3485 format_field(f, 'ext'),
3486 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3487 format_field(f, 'fps', '\t%d'),
3488 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3489 delim,
3490 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3491 format_field(f, 'tbr', '\t%dk'),
3492 shorten_protocol_name(f.get('protocol', '')),
3493 delim,
3494 format_field(f, 'vcodec', default='unknown').replace(
3495 'none', 'images' if f.get('acodec') == 'none'
3496 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3497 format_field(f, 'vbr', '\t%dk'),
3498 format_field(f, 'acodec', default='unknown').replace(
3499 'none', '' if f.get('vcodec') == 'none'
3500 else self._format_screen('video only', self.Styles.SUPPRESS)),
3501 format_field(f, 'abr', '\t%dk'),
3502 format_field(f, 'asr', '\t%dHz'),
3503 join_nonempty(
3504 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3505 format_field(f, 'language', '[%s]'),
3506 join_nonempty(format_field(f, 'format_note'),
3507 format_field(f, 'container', ignore=(None, f.get('ext'))),
3508 delim=', '),
3509 delim=' '),
3510 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3511 header_line = self._list_format_headers(
3512 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3513 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3514
3515 return render_table(
3516 header_line, table, hide_empty=True,
3517 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3518
3519 def render_thumbnails_table(self, info_dict):
3520 thumbnails = list(info_dict.get('thumbnails') or [])
3521 if not thumbnails:
3522 return None
3523 return render_table(
3524 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3525 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3526
3527 def render_subtitles_table(self, video_id, subtitles):
3528 def _row(lang, formats):
3529 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3530 if len(set(names)) == 1:
3531 names = [] if names[0] == 'unknown' else names[:1]
3532 return [lang, ', '.join(names), ', '.join(exts)]
3533
3534 if not subtitles:
3535 return None
3536 return render_table(
3537 self._list_format_headers('Language', 'Name', 'Formats'),
3538 [_row(lang, formats) for lang, formats in subtitles.items()],
3539 hide_empty=True)
3540
3541 def __list_table(self, video_id, name, func, *args):
3542 table = func(*args)
3543 if not table:
3544 self.to_screen(f'{video_id} has no {name}')
3545 return
3546 self.to_screen(f'[info] Available {name} for {video_id}:')
3547 self.to_stdout(table)
3548
3549 def list_formats(self, info_dict):
3550 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3551
3552 def list_thumbnails(self, info_dict):
3553 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3554
3555 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3556 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3557
3558 def urlopen(self, req):
3559 """ Start an HTTP download """
3560 if isinstance(req, compat_basestring):
3561 req = sanitized_Request(req)
3562 return self._opener.open(req, timeout=self._socket_timeout)
3563
3564 def print_debug_header(self):
3565 if not self.params.get('verbose'):
3566 return
3567
3568 def get_encoding(stream):
3569 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3570 if not supports_terminal_sequences(stream):
3571 from .compat import WINDOWS_VT_MODE
3572 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3573 return ret
3574
3575 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3576 locale.getpreferredencoding(),
3577 sys.getfilesystemencoding(),
3578 get_encoding(self._screen_file), get_encoding(self._err_file),
3579 self.get_encoding())
3580
3581 logger = self.params.get('logger')
3582 if logger:
3583 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3584 write_debug(encoding_str)
3585 else:
3586 write_string(f'[debug] {encoding_str}\n', encoding=None)
3587 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3588
3589 source = detect_variant()
3590 write_debug(join_nonempty(
3591 'yt-dlp version', __version__,
3592 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3593 '' if source == 'unknown' else f'({source})',
3594 delim=' '))
3595 if not _LAZY_LOADER:
3596 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3597 write_debug('Lazy loading extractors is forcibly disabled')
3598 else:
3599 write_debug('Lazy loading extractors is disabled')
3600 if plugin_extractors or plugin_postprocessors:
3601 write_debug('Plugins: %s' % [
3602 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3603 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3604 if self.params.get('compat_opts'):
3605 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3606
3607 if source == 'source':
3608 try:
3609 sp = Popen(
3610 ['git', 'rev-parse', '--short', 'HEAD'],
3611 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3612 cwd=os.path.dirname(os.path.abspath(__file__)))
3613 out, err = sp.communicate_or_kill()
3614 out = out.decode().strip()
3615 if re.match('[0-9a-f]+', out):
3616 write_debug('Git HEAD: %s' % out)
3617 except Exception:
3618 try:
3619 sys.exc_clear()
3620 except Exception:
3621 pass
3622
3623 def python_implementation():
3624 impl_name = platform.python_implementation()
3625 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3626 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3627 return impl_name
3628
3629 write_debug('Python version %s (%s %s) - %s' % (
3630 platform.python_version(),
3631 python_implementation(),
3632 platform.architecture()[0],
3633 platform_name()))
3634
3635 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3636 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3637 if ffmpeg_features:
3638 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3639
3640 exe_versions['rtmpdump'] = rtmpdump_version()
3641 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3642 exe_str = ', '.join(
3643 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3644 ) or 'none'
3645 write_debug('exe versions: %s' % exe_str)
3646
3647 from .downloader.websocket import has_websockets
3648 from .postprocessor.embedthumbnail import has_mutagen
3649 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3650
3651 lib_str = join_nonempty(
3652 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3653 SECRETSTORAGE_AVAILABLE and 'secretstorage',
3654 has_mutagen and 'mutagen',
3655 SQLITE_AVAILABLE and 'sqlite',
3656 has_websockets and 'websockets',
3657 delim=', ') or 'none'
3658 write_debug('Optional libraries: %s' % lib_str)
3659
3660 proxy_map = {}
3661 for handler in self._opener.handlers:
3662 if hasattr(handler, 'proxies'):
3663 proxy_map.update(handler.proxies)
3664 write_debug(f'Proxy map: {proxy_map}')
3665
3666 # Not implemented
3667 if False and self.params.get('call_home'):
3668 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3669 write_debug('Public IP address: %s' % ipaddr)
3670 latest_version = self.urlopen(
3671 'https://yt-dl.org/latest/version').read().decode('utf-8')
3672 if version_tuple(latest_version) > version_tuple(__version__):
3673 self.report_warning(
3674 'You are using an outdated version (newest version: %s)! '
3675 'See https://yt-dl.org/update if you need help updating.' %
3676 latest_version)
3677
3678 def _setup_opener(self):
3679 timeout_val = self.params.get('socket_timeout')
3680 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3681
3682 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3683 opts_cookiefile = self.params.get('cookiefile')
3684 opts_proxy = self.params.get('proxy')
3685
3686 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3687
3688 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3689 if opts_proxy is not None:
3690 if opts_proxy == '':
3691 proxies = {}
3692 else:
3693 proxies = {'http': opts_proxy, 'https': opts_proxy}
3694 else:
3695 proxies = compat_urllib_request.getproxies()
3696 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3697 if 'http' in proxies and 'https' not in proxies:
3698 proxies['https'] = proxies['http']
3699 proxy_handler = PerRequestProxyHandler(proxies)
3700
3701 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3702 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3703 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3704 redirect_handler = YoutubeDLRedirectHandler()
3705 data_handler = compat_urllib_request_DataHandler()
3706
3707 # When passing our own FileHandler instance, build_opener won't add the
3708 # default FileHandler and allows us to disable the file protocol, which
3709 # can be used for malicious purposes (see
3710 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3711 file_handler = compat_urllib_request.FileHandler()
3712
3713 def file_open(*args, **kwargs):
3714 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3715 file_handler.file_open = file_open
3716
3717 opener = compat_urllib_request.build_opener(
3718 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3719
3720 # Delete the default user-agent header, which would otherwise apply in
3721 # cases where our custom HTTP handler doesn't come into play
3722 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3723 opener.addheaders = []
3724 self._opener = opener
3725
3726 def encode(self, s):
3727 if isinstance(s, bytes):
3728 return s # Already encoded
3729
3730 try:
3731 return s.encode(self.get_encoding())
3732 except UnicodeEncodeError as err:
3733 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3734 raise
3735
3736 def get_encoding(self):
3737 encoding = self.params.get('encoding')
3738 if encoding is None:
3739 encoding = preferredencoding()
3740 return encoding
3741
3742 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3743 ''' Write infojson and returns True = written, False = skip, None = error '''
3744 if overwrite is None:
3745 overwrite = self.params.get('overwrites', True)
3746 if not self.params.get('writeinfojson'):
3747 return False
3748 elif not infofn:
3749 self.write_debug(f'Skipping writing {label} infojson')
3750 return False
3751 elif not self._ensure_dir_exists(infofn):
3752 return None
3753 elif not overwrite and os.path.exists(infofn):
3754 self.to_screen(f'[info] {label.title()} metadata is already present')
3755 else:
3756 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3757 try:
3758 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3759 except (OSError, IOError):
3760 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3761 return None
3762 return True
3763
3764 def _write_description(self, label, ie_result, descfn):
3765 ''' Write description and returns True = written, False = skip, None = error '''
3766 if not self.params.get('writedescription'):
3767 return False
3768 elif not descfn:
3769 self.write_debug(f'Skipping writing {label} description')
3770 return False
3771 elif not self._ensure_dir_exists(descfn):
3772 return None
3773 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3774 self.to_screen(f'[info] {label.title()} description is already present')
3775 elif ie_result.get('description') is None:
3776 self.report_warning(f'There\'s no {label} description to write')
3777 return False
3778 else:
3779 try:
3780 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3781 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3782 descfile.write(ie_result['description'])
3783 except (OSError, IOError):
3784 self.report_error(f'Cannot write {label} description file {descfn}')
3785 return None
3786 return True
3787
3788 def _write_subtitles(self, info_dict, filename):
3789 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3790 ret = []
3791 subtitles = info_dict.get('requested_subtitles')
3792 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3793 # subtitles download errors are already managed as troubles in relevant IE
3794 # that way it will silently go on when used with unsupporting IE
3795 return ret
3796
3797 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3798 if not sub_filename_base:
3799 self.to_screen('[info] Skipping writing video subtitles')
3800 return ret
3801 for sub_lang, sub_info in subtitles.items():
3802 sub_format = sub_info['ext']
3803 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3804 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3805 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3806 if existing_sub:
3807 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3808 sub_info['filepath'] = existing_sub
3809 ret.append((existing_sub, sub_filename_final))
3810 continue
3811
3812 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3813 if sub_info.get('data') is not None:
3814 try:
3815 # Use newline='' to prevent conversion of newline characters
3816 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3817 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3818 subfile.write(sub_info['data'])
3819 sub_info['filepath'] = sub_filename
3820 ret.append((sub_filename, sub_filename_final))
3821 continue
3822 except (OSError, IOError):
3823 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3824 return None
3825
3826 try:
3827 sub_copy = sub_info.copy()
3828 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3829 self.dl(sub_filename, sub_copy, subtitle=True)
3830 sub_info['filepath'] = sub_filename
3831 ret.append((sub_filename, sub_filename_final))
3832 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3833 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3834 raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3835 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3836 return ret
3837
3838 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3839 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3840 write_all = self.params.get('write_all_thumbnails', False)
3841 thumbnails, ret = [], []
3842 if write_all or self.params.get('writethumbnail', False):
3843 thumbnails = info_dict.get('thumbnails') or []
3844 multiple = write_all and len(thumbnails) > 1
3845
3846 if thumb_filename_base is None:
3847 thumb_filename_base = filename
3848 if thumbnails and not thumb_filename_base:
3849 self.write_debug(f'Skipping writing {label} thumbnail')
3850 return ret
3851
3852 for idx, t in list(enumerate(thumbnails))[::-1]:
3853 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3854 thumb_display_id = f'{label} thumbnail {t["id"]}'
3855 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3856 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3857
3858 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3859 if existing_thumb:
3860 self.to_screen('[info] %s is already present' % (
3861 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3862 t['filepath'] = existing_thumb
3863 ret.append((existing_thumb, thumb_filename_final))
3864 else:
3865 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3866 try:
3867 uf = self.urlopen(t['url'])
3868 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3869 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3870 shutil.copyfileobj(uf, thumbf)
3871 ret.append((thumb_filename, thumb_filename_final))
3872 t['filepath'] = thumb_filename
3873 except network_exceptions as err:
3874 thumbnails.pop(idx)
3875 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3876 if ret and not write_all:
3877 break
3878 return ret