]> jfr.im git - yt-dlp.git/blob - yt_dlp/YoutubeDL.py
[cleanup,docs] Minor fixes
[yt-dlp.git] / yt_dlp / YoutubeDL.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import datetime
9 import errno
10 import fileinput
11 import functools
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import sys
23 import tempfile
24 import time
25 import tokenize
26 import traceback
27 import random
28 import unicodedata
29
30 from enum import Enum
31 from string import ascii_letters
32
33 from .compat import (
34 compat_basestring,
35 compat_get_terminal_size,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_pycrypto_AES,
40 compat_shlex_quote,
41 compat_str,
42 compat_tokenize_tokenize,
43 compat_urllib_error,
44 compat_urllib_request,
45 compat_urllib_request_DataHandler,
46 windows_enable_vt_mode,
47 )
48 from .cookies import load_cookies
49 from .utils import (
50 age_restricted,
51 args_to_str,
52 ContentTooShortError,
53 date_from_str,
54 DateRange,
55 DEFAULT_OUTTMPL,
56 determine_ext,
57 determine_protocol,
58 DownloadCancelled,
59 DownloadError,
60 encode_compat_str,
61 encodeFilename,
62 EntryNotInPlaylist,
63 error_to_compat_str,
64 ExistingVideoReached,
65 expand_path,
66 ExtractorError,
67 float_or_none,
68 format_bytes,
69 format_field,
70 format_decimal_suffix,
71 formatSeconds,
72 GeoRestrictedError,
73 get_domain,
74 HEADRequest,
75 InAdvancePagedList,
76 int_or_none,
77 iri_to_uri,
78 ISO3166Utils,
79 join_nonempty,
80 LazyList,
81 LINK_TEMPLATES,
82 locked_file,
83 make_dir,
84 make_HTTPS_handler,
85 MaxDownloadsReached,
86 network_exceptions,
87 number_of_digits,
88 orderedSet,
89 OUTTMPL_TYPES,
90 PagedList,
91 parse_filesize,
92 PerRequestProxyHandler,
93 platform_name,
94 Popen,
95 POSTPROCESS_WHEN,
96 PostProcessingError,
97 preferredencoding,
98 prepend_extension,
99 ReExtractInfo,
100 register_socks_protocols,
101 RejectedVideoReached,
102 remove_terminal_sequences,
103 render_table,
104 replace_extension,
105 SameFileError,
106 sanitize_filename,
107 sanitize_path,
108 sanitize_url,
109 sanitized_Request,
110 std_headers,
111 STR_FORMAT_RE_TMPL,
112 STR_FORMAT_TYPES,
113 str_or_none,
114 strftime_or_none,
115 subtitles_filename,
116 supports_terminal_sequences,
117 timetuple_from_msec,
118 to_high_limit_path,
119 traverse_obj,
120 try_get,
121 UnavailableVideoError,
122 url_basename,
123 variadic,
124 version_tuple,
125 write_json_file,
126 write_string,
127 YoutubeDLCookieProcessor,
128 YoutubeDLHandler,
129 YoutubeDLRedirectHandler,
130 )
131 from .cache import Cache
132 from .minicurses import format_text
133 from .extractor import (
134 gen_extractor_classes,
135 get_info_extractor,
136 _LAZY_LOADER,
137 _PLUGIN_CLASSES as plugin_extractors
138 )
139 from .extractor.openload import PhantomJSwrapper
140 from .downloader import (
141 FFmpegFD,
142 get_suitable_downloader,
143 shorten_protocol_name
144 )
145 from .downloader.rtmp import rtmpdump_version
146 from .postprocessor import (
147 get_postprocessor,
148 EmbedThumbnailPP,
149 FFmpegFixupDuplicateMoovPP,
150 FFmpegFixupDurationPP,
151 FFmpegFixupM3u8PP,
152 FFmpegFixupM4aPP,
153 FFmpegFixupStretchedPP,
154 FFmpegFixupTimestampPP,
155 FFmpegMergerPP,
156 FFmpegPostProcessor,
157 MoveFilesAfterDownloadPP,
158 _PLUGIN_CLASSES as plugin_postprocessors
159 )
160 from .update import detect_variant
161 from .version import __version__, RELEASE_GIT_HEAD
162
163 if compat_os_name == 'nt':
164 import ctypes
165
166
167 class YoutubeDL(object):
168 """YoutubeDL class.
169
170 YoutubeDL objects are the ones responsible of downloading the
171 actual video file and writing it to disk if the user has requested
172 it, among some other tasks. In most cases there should be one per
173 program. As, given a video URL, the downloader doesn't know how to
174 extract all the needed information, task that InfoExtractors do, it
175 has to pass the URL to one of them.
176
177 For this, YoutubeDL objects have a method that allows
178 InfoExtractors to be registered in a given order. When it is passed
179 a URL, the YoutubeDL object handles it to the first InfoExtractor it
180 finds that reports being able to handle it. The InfoExtractor extracts
181 all the information about the video or videos the URL refers to, and
182 YoutubeDL process the extracted information, possibly using a File
183 Downloader to download the video.
184
185 YoutubeDL objects accept a lot of parameters. In order not to saturate
186 the object constructor with arguments, it receives a dictionary of
187 options instead. These options are available through the params
188 attribute for the InfoExtractors to use. The YoutubeDL also
189 registers itself as the downloader in charge for the InfoExtractors
190 that are added to it, so this is a "mutual registration".
191
192 Available options:
193
194 username: Username for authentication purposes.
195 password: Password for authentication purposes.
196 videopassword: Password for accessing a video.
197 ap_mso: Adobe Pass multiple-system operator identifier.
198 ap_username: Multiple-system operator account username.
199 ap_password: Multiple-system operator account password.
200 usenetrc: Use netrc for authentication instead.
201 verbose: Print additional info to stdout.
202 quiet: Do not print messages to stdout.
203 no_warnings: Do not print out anything for warnings.
204 forceprint: A dict with keys WHEN mapped to a list of templates to
205 print to stdout. The allowed keys are video or any of the
206 items in utils.POSTPROCESS_WHEN.
207 For compatibility, a single list is also accepted
208 print_to_file: A dict with keys WHEN (same as forceprint) mapped to
209 a list of tuples with (template, filename)
210 forceurl: Force printing final URL. (Deprecated)
211 forcetitle: Force printing title. (Deprecated)
212 forceid: Force printing ID. (Deprecated)
213 forcethumbnail: Force printing thumbnail URL. (Deprecated)
214 forcedescription: Force printing description. (Deprecated)
215 forcefilename: Force printing final filename. (Deprecated)
216 forceduration: Force printing duration. (Deprecated)
217 forcejson: Force printing info_dict as JSON.
218 dump_single_json: Force printing the info_dict of the whole playlist
219 (or video) as a single JSON line.
220 force_write_download_archive: Force writing download archive regardless
221 of 'skip_download' or 'simulate'.
222 simulate: Do not download the video files. If unset (or None),
223 simulate only if listsubtitles, listformats or list_thumbnails is used
224 format: Video format code. see "FORMAT SELECTION" for more details.
225 You can also pass a function. The function takes 'ctx' as
226 argument and returns the formats to download.
227 See "build_format_selector" for an implementation
228 allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
229 ignore_no_formats_error: Ignore "No video formats" error. Usefull for
230 extracting metadata even if the video is not actually
231 available for download (experimental)
232 format_sort: A list of fields by which to sort the video formats.
233 See "Sorting Formats" for more details.
234 format_sort_force: Force the given format_sort. see "Sorting Formats"
235 for more details.
236 allow_multiple_video_streams: Allow multiple video streams to be merged
237 into a single file
238 allow_multiple_audio_streams: Allow multiple audio streams to be merged
239 into a single file
240 check_formats Whether to test if the formats are downloadable.
241 Can be True (check all), False (check none),
242 'selected' (check selected formats),
243 or None (check only if requested by extractor)
244 paths: Dictionary of output paths. The allowed keys are 'home'
245 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
246 outtmpl: Dictionary of templates for output names. Allowed keys
247 are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
248 For compatibility with youtube-dl, a single string can also be used
249 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
250 restrictfilenames: Do not allow "&" and spaces in file names
251 trim_file_name: Limit length of filename (extension excluded)
252 windowsfilenames: Force the filenames to be windows compatible
253 ignoreerrors: Do not stop on download/postprocessing errors.
254 Can be 'only_download' to ignore only download errors.
255 Default is 'only_download' for CLI, but False for API
256 skip_playlist_after_errors: Number of allowed failures until the rest of
257 the playlist is skipped
258 force_generic_extractor: Force downloader to use the generic extractor
259 overwrites: Overwrite all video and metadata files if True,
260 overwrite only non-video files if None
261 and don't overwrite any file if False
262 For compatibility with youtube-dl,
263 "nooverwrites" may also be used instead
264 playliststart: Playlist item to start at.
265 playlistend: Playlist item to end at.
266 playlist_items: Specific indices of playlist to download.
267 playlistreverse: Download playlist items in reverse order.
268 playlistrandom: Download playlist items in random order.
269 matchtitle: Download only matching titles.
270 rejecttitle: Reject downloads for matching titles.
271 logger: Log messages to a logging.Logger instance.
272 logtostderr: Log messages to stderr instead of stdout.
273 consoletitle: Display progress in console window's titlebar.
274 writedescription: Write the video description to a .description file
275 writeinfojson: Write the video description to a .info.json file
276 clean_infojson: Remove private fields from the infojson
277 getcomments: Extract video comments. This will not be written to disk
278 unless writeinfojson is also given
279 writeannotations: Write the video annotations to a .annotations.xml file
280 writethumbnail: Write the thumbnail image to a file
281 allow_playlist_files: Whether to write playlists' description, infojson etc
282 also to disk when using the 'write*' options
283 write_all_thumbnails: Write all thumbnail formats to files
284 writelink: Write an internet shortcut file, depending on the
285 current platform (.url/.webloc/.desktop)
286 writeurllink: Write a Windows internet shortcut file (.url)
287 writewebloclink: Write a macOS internet shortcut file (.webloc)
288 writedesktoplink: Write a Linux internet shortcut file (.desktop)
289 writesubtitles: Write the video subtitles to a file
290 writeautomaticsub: Write the automatically generated subtitles to a file
291 allsubtitles: Deprecated - Use subtitleslangs = ['all']
292 Downloads all the subtitles of the video
293 (requires writesubtitles or writeautomaticsub)
294 listsubtitles: Lists all available subtitles for the video
295 subtitlesformat: The format code for subtitles
296 subtitleslangs: List of languages of the subtitles to download (can be regex).
297 The list may contain "all" to refer to all the available
298 subtitles. The language can be prefixed with a "-" to
299 exclude it from the requested languages. Eg: ['all', '-live_chat']
300 keepvideo: Keep the video file after post-processing
301 daterange: A DateRange object, download only if the upload_date is in the range.
302 skip_download: Skip the actual download of the video file
303 cachedir: Location of the cache files in the filesystem.
304 False to disable filesystem cache.
305 noplaylist: Download single video instead of a playlist if in doubt.
306 age_limit: An integer representing the user's age in years.
307 Unsuitable videos for the given age are skipped.
308 min_views: An integer representing the minimum view count the video
309 must have in order to not be skipped.
310 Videos without view count information are always
311 downloaded. None for no limit.
312 max_views: An integer representing the maximum view count.
313 Videos that are more popular than that are not
314 downloaded.
315 Videos without view count information are always
316 downloaded. None for no limit.
317 download_archive: File name of a file where all downloads are recorded.
318 Videos already present in the file are not downloaded
319 again.
320 break_on_existing: Stop the download process after attempting to download a
321 file that is in the archive.
322 break_on_reject: Stop the download process when encountering a video that
323 has been filtered out.
324 break_per_url: Whether break_on_reject and break_on_existing
325 should act on each input URL as opposed to for the entire queue
326 cookiefile: File name where cookies should be read from and dumped to
327 cookiesfrombrowser: A tuple containing the name of the browser, the profile
328 name/pathfrom where cookies are loaded, and the name of the
329 keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT')
330 legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
331 support RFC 5746 secure renegotiation
332 nocheckcertificate: Do not verify SSL certificates
333 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
334 At the moment, this is only supported by YouTube.
335 proxy: URL of the proxy server to use
336 geo_verification_proxy: URL of the proxy to use for IP address verification
337 on geo-restricted sites.
338 socket_timeout: Time to wait for unresponsive hosts, in seconds
339 bidi_workaround: Work around buggy terminals without bidirectional text
340 support, using fridibi
341 debug_printtraffic:Print out sent and received HTTP traffic
342 include_ads: Download ads as well (deprecated)
343 default_search: Prepend this string if an input url is not valid.
344 'auto' for elaborate guessing
345 encoding: Use this encoding instead of the system-specified.
346 extract_flat: Do not resolve URLs, return the immediate result.
347 Pass in 'in_playlist' to only show this behavior for
348 playlist items.
349 wait_for_video: If given, wait for scheduled streams to become available.
350 The value should be a tuple containing the range
351 (min_secs, max_secs) to wait between retries
352 postprocessors: A list of dictionaries, each with an entry
353 * key: The name of the postprocessor. See
354 yt_dlp/postprocessor/__init__.py for a list.
355 * when: When to run the postprocessor. Allowed values are
356 the entries of utils.POSTPROCESS_WHEN
357 Assumed to be 'post_process' if not given
358 post_hooks: Deprecated - Register a custom postprocessor instead
359 A list of functions that get called as the final step
360 for each video file, after all postprocessors have been
361 called. The filename will be passed as the only argument.
362 progress_hooks: A list of functions that get called on download
363 progress, with a dictionary with the entries
364 * status: One of "downloading", "error", or "finished".
365 Check this first and ignore unknown values.
366 * info_dict: The extracted info_dict
367
368 If status is one of "downloading", or "finished", the
369 following properties may also be present:
370 * filename: The final filename (always present)
371 * tmpfilename: The filename we're currently writing to
372 * downloaded_bytes: Bytes on disk
373 * total_bytes: Size of the whole file, None if unknown
374 * total_bytes_estimate: Guess of the eventual file size,
375 None if unavailable.
376 * elapsed: The number of seconds since download started.
377 * eta: The estimated time in seconds, None if unknown
378 * speed: The download speed in bytes/second, None if
379 unknown
380 * fragment_index: The counter of the currently
381 downloaded video fragment.
382 * fragment_count: The number of fragments (= individual
383 files that will be merged)
384
385 Progress hooks are guaranteed to be called at least once
386 (with status "finished") if the download is successful.
387 postprocessor_hooks: A list of functions that get called on postprocessing
388 progress, with a dictionary with the entries
389 * status: One of "started", "processing", or "finished".
390 Check this first and ignore unknown values.
391 * postprocessor: Name of the postprocessor
392 * info_dict: The extracted info_dict
393
394 Progress hooks are guaranteed to be called at least twice
395 (with status "started" and "finished") if the processing is successful.
396 merge_output_format: Extension to use when merging formats.
397 final_ext: Expected final extension; used to detect when the file was
398 already downloaded and converted
399 fixup: Automatically correct known faults of the file.
400 One of:
401 - "never": do nothing
402 - "warn": only emit a warning
403 - "detect_or_warn": check whether we can do anything
404 about it, warn otherwise (default)
405 source_address: Client-side IP address to bind to.
406 call_home: Boolean, true iff we are allowed to contact the
407 yt-dlp servers for debugging. (BROKEN)
408 sleep_interval_requests: Number of seconds to sleep between requests
409 during extraction
410 sleep_interval: Number of seconds to sleep before each download when
411 used alone or a lower bound of a range for randomized
412 sleep before each download (minimum possible number
413 of seconds to sleep) when used along with
414 max_sleep_interval.
415 max_sleep_interval:Upper bound of a range for randomized sleep before each
416 download (maximum possible number of seconds to sleep).
417 Must only be used along with sleep_interval.
418 Actual sleep time will be a random float from range
419 [sleep_interval; max_sleep_interval].
420 sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
421 listformats: Print an overview of available video formats and exit.
422 list_thumbnails: Print a table of all thumbnails and exit.
423 match_filter: A function that gets called with the info_dict of
424 every video.
425 If it returns a message, the video is ignored.
426 If it returns None, the video is downloaded.
427 match_filter_func in utils.py is one example for this.
428 no_color: Do not emit color codes in output.
429 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
430 HTTP header
431 geo_bypass_country:
432 Two-letter ISO 3166-2 country code that will be used for
433 explicit geographic restriction bypassing via faking
434 X-Forwarded-For HTTP header
435 geo_bypass_ip_block:
436 IP range in CIDR notation that will be used similarly to
437 geo_bypass_country
438
439 The following options determine which downloader is picked:
440 external_downloader: A dictionary of protocol keys and the executable of the
441 external downloader to use for it. The allowed protocols
442 are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
443 Set the value to 'native' to use the native downloader
444 hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
445 or {'m3u8': 'ffmpeg'} instead.
446 Use the native HLS downloader instead of ffmpeg/avconv
447 if True, otherwise use ffmpeg/avconv if False, otherwise
448 use downloader suggested by extractor if None.
449 compat_opts: Compatibility options. See "Differences in default behavior".
450 The following options do not work when used through the API:
451 filename, abort-on-error, multistreams, no-live-chat, format-sort
452 no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
453 Refer __init__.py for their implementation
454 progress_template: Dictionary of templates for progress outputs.
455 Allowed keys are 'download', 'postprocess',
456 'download-title' (console title) and 'postprocess-title'.
457 The template is mapped on a dictionary with keys 'progress' and 'info'
458
459 The following parameters are not used by YoutubeDL itself, they are used by
460 the downloader (see yt_dlp/downloader/common.py):
461 nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
462 max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries,
463 continuedl, noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
464 external_downloader_args, concurrent_fragment_downloads.
465
466 The following options are used by the post processors:
467 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
468 otherwise prefer ffmpeg. (avconv support is deprecated)
469 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
470 to the binary or its containing directory.
471 postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
472 and a list of additional command-line arguments for the
473 postprocessor/executable. The dict can also have "PP+EXE" keys
474 which are used when the given exe is used by the given PP.
475 Use 'default' as the name for arguments to passed to all PP
476 For compatibility with youtube-dl, a single list of args
477 can also be used
478
479 The following options are used by the extractors:
480 extractor_retries: Number of times to retry for known errors
481 dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
482 hls_split_discontinuity: Split HLS playlists to different formats at
483 discontinuities such as ad breaks (default: False)
484 extractor_args: A dictionary of arguments to be passed to the extractors.
485 See "EXTRACTOR ARGUMENTS" for details.
486 Eg: {'youtube': {'skip': ['dash', 'hls']}}
487 mark_watched: Mark videos watched (even with --simulate). Only for YouTube
488 youtube_include_dash_manifest: Deprecated - Use extractor_args instead.
489 If True (default), DASH manifests and related
490 data will be downloaded and processed by extractor.
491 You can reduce network I/O by disabling it if you don't
492 care about DASH. (only for youtube)
493 youtube_include_hls_manifest: Deprecated - Use extractor_args instead.
494 If True (default), HLS manifests and related
495 data will be downloaded and processed by extractor.
496 You can reduce network I/O by disabling it if you don't
497 care about HLS. (only for youtube)
498 """
499
500 _NUMERIC_FIELDS = set((
501 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
502 'timestamp', 'release_timestamp',
503 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
504 'average_rating', 'comment_count', 'age_limit',
505 'start_time', 'end_time',
506 'chapter_number', 'season_number', 'episode_number',
507 'track_number', 'disc_number', 'release_year',
508 ))
509
510 _format_selection_exts = {
511 'audio': {'m4a', 'mp3', 'ogg', 'aac'},
512 'video': {'mp4', 'flv', 'webm', '3gp'},
513 'storyboards': {'mhtml'},
514 }
515
516 params = None
517 _ies = {}
518 _pps = {k: [] for k in POSTPROCESS_WHEN}
519 _printed_messages = set()
520 _first_webpage_request = True
521 _download_retcode = None
522 _num_downloads = None
523 _playlist_level = 0
524 _playlist_urls = set()
525 _screen_file = None
526
527 def __init__(self, params=None, auto_init=True):
528 """Create a FileDownloader object with the given options.
529 @param auto_init Whether to load the default extractors and print header (if verbose).
530 Set to 'no_verbose_header' to not print the header
531 """
532 if params is None:
533 params = {}
534 self._ies = {}
535 self._ies_instances = {}
536 self._pps = {k: [] for k in POSTPROCESS_WHEN}
537 self._printed_messages = set()
538 self._first_webpage_request = True
539 self._post_hooks = []
540 self._progress_hooks = []
541 self._postprocessor_hooks = []
542 self._download_retcode = 0
543 self._num_downloads = 0
544 self._num_videos = 0
545 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
546 self._err_file = sys.stderr
547 self.params = params
548 self.cache = Cache(self)
549
550 windows_enable_vt_mode()
551 self._allow_colors = {
552 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
553 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
554 }
555
556 if sys.version_info < (3, 6):
557 self.report_warning(
558 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
559
560 if self.params.get('allow_unplayable_formats'):
561 self.report_warning(
562 f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. '
563 'This is a developer option intended for debugging. \n'
564 ' If you experience any issues while using this option, '
565 f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report')
566
567 def check_deprecated(param, option, suggestion):
568 if self.params.get(param) is not None:
569 self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
570 return True
571 return False
572
573 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
574 if self.params.get('geo_verification_proxy') is None:
575 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
576
577 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
578 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
579 check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
580
581 for msg in self.params.get('_warnings', []):
582 self.report_warning(msg)
583 for msg in self.params.get('_deprecation_warnings', []):
584 self.deprecation_warning(msg)
585
586 if 'list-formats' in self.params.get('compat_opts', []):
587 self.params['listformats_table'] = False
588
589 if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None:
590 # nooverwrites was unnecessarily changed to overwrites
591 # in 0c3d0f51778b153f65c21906031c2e091fcfb641
592 # This ensures compatibility with both keys
593 self.params['overwrites'] = not self.params['nooverwrites']
594 elif self.params.get('overwrites') is None:
595 self.params.pop('overwrites', None)
596 else:
597 self.params['nooverwrites'] = not self.params['overwrites']
598
599 self.params.setdefault('forceprint', {})
600 self.params.setdefault('print_to_file', {})
601
602 # Compatibility with older syntax
603 if not isinstance(params['forceprint'], dict):
604 self.params['forceprint'] = {'video': params['forceprint']}
605
606 if self.params.get('bidi_workaround', False):
607 try:
608 import pty
609 master, slave = pty.openpty()
610 width = compat_get_terminal_size().columns
611 if width is None:
612 width_args = []
613 else:
614 width_args = ['-w', str(width)]
615 sp_kwargs = dict(
616 stdin=subprocess.PIPE,
617 stdout=slave,
618 stderr=self._err_file)
619 try:
620 self._output_process = Popen(['bidiv'] + width_args, **sp_kwargs)
621 except OSError:
622 self._output_process = Popen(['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
623 self._output_channel = os.fdopen(master, 'rb')
624 except OSError as ose:
625 if ose.errno == errno.ENOENT:
626 self.report_warning(
627 'Could not find fribidi executable, ignoring --bidi-workaround. '
628 'Make sure that fribidi is an executable file in one of the directories in your $PATH.')
629 else:
630 raise
631
632 if (sys.platform != 'win32'
633 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
634 and not self.params.get('restrictfilenames', False)):
635 # Unicode filesystem API will throw errors (#1474, #13027)
636 self.report_warning(
637 'Assuming --restrict-filenames since file system encoding '
638 'cannot encode all characters. '
639 'Set the LC_ALL environment variable to fix this.')
640 self.params['restrictfilenames'] = True
641
642 self.outtmpl_dict = self.parse_outtmpl()
643
644 # Creating format selector here allows us to catch syntax errors before the extraction
645 self.format_selector = (
646 self.params.get('format') if self.params.get('format') in (None, '-')
647 else self.params['format'] if callable(self.params['format'])
648 else self.build_format_selector(self.params['format']))
649
650 self._setup_opener()
651
652 if auto_init:
653 if auto_init != 'no_verbose_header':
654 self.print_debug_header()
655 self.add_default_info_extractors()
656
657 hooks = {
658 'post_hooks': self.add_post_hook,
659 'progress_hooks': self.add_progress_hook,
660 'postprocessor_hooks': self.add_postprocessor_hook,
661 }
662 for opt, fn in hooks.items():
663 for ph in self.params.get(opt, []):
664 fn(ph)
665
666 for pp_def_raw in self.params.get('postprocessors', []):
667 pp_def = dict(pp_def_raw)
668 when = pp_def.pop('when', 'post_process')
669 self.add_post_processor(
670 get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
671 when=when)
672
673 register_socks_protocols()
674
675 def preload_download_archive(fn):
676 """Preload the archive, if any is specified"""
677 if fn is None:
678 return False
679 self.write_debug(f'Loading archive file {fn!r}')
680 try:
681 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
682 for line in archive_file:
683 self.archive.add(line.strip())
684 except IOError as ioe:
685 if ioe.errno != errno.ENOENT:
686 raise
687 return False
688 return True
689
690 self.archive = set()
691 preload_download_archive(self.params.get('download_archive'))
692
693 def warn_if_short_id(self, argv):
694 # short YouTube ID starting with dash?
695 idxs = [
696 i for i, a in enumerate(argv)
697 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
698 if idxs:
699 correct_argv = (
700 ['yt-dlp']
701 + [a for i, a in enumerate(argv) if i not in idxs]
702 + ['--'] + [argv[i] for i in idxs]
703 )
704 self.report_warning(
705 'Long argument string detected. '
706 'Use -- to separate parameters and URLs, like this:\n%s' %
707 args_to_str(correct_argv))
708
709 def add_info_extractor(self, ie):
710 """Add an InfoExtractor object to the end of the list."""
711 ie_key = ie.ie_key()
712 self._ies[ie_key] = ie
713 if not isinstance(ie, type):
714 self._ies_instances[ie_key] = ie
715 ie.set_downloader(self)
716
717 def _get_info_extractor_class(self, ie_key):
718 ie = self._ies.get(ie_key)
719 if ie is None:
720 ie = get_info_extractor(ie_key)
721 self.add_info_extractor(ie)
722 return ie
723
724 def get_info_extractor(self, ie_key):
725 """
726 Get an instance of an IE with name ie_key, it will try to get one from
727 the _ies list, if there's no instance it will create a new one and add
728 it to the extractor list.
729 """
730 ie = self._ies_instances.get(ie_key)
731 if ie is None:
732 ie = get_info_extractor(ie_key)()
733 self.add_info_extractor(ie)
734 return ie
735
736 def add_default_info_extractors(self):
737 """
738 Add the InfoExtractors returned by gen_extractors to the end of the list
739 """
740 for ie in gen_extractor_classes():
741 self.add_info_extractor(ie)
742
743 def add_post_processor(self, pp, when='post_process'):
744 """Add a PostProcessor object to the end of the chain."""
745 self._pps[when].append(pp)
746 pp.set_downloader(self)
747
748 def add_post_hook(self, ph):
749 """Add the post hook"""
750 self._post_hooks.append(ph)
751
752 def add_progress_hook(self, ph):
753 """Add the download progress hook"""
754 self._progress_hooks.append(ph)
755
756 def add_postprocessor_hook(self, ph):
757 """Add the postprocessing progress hook"""
758 self._postprocessor_hooks.append(ph)
759 for pps in self._pps.values():
760 for pp in pps:
761 pp.add_progress_hook(ph)
762
763 def _bidi_workaround(self, message):
764 if not hasattr(self, '_output_channel'):
765 return message
766
767 assert hasattr(self, '_output_process')
768 assert isinstance(message, compat_str)
769 line_count = message.count('\n') + 1
770 self._output_process.stdin.write((message + '\n').encode('utf-8'))
771 self._output_process.stdin.flush()
772 res = ''.join(self._output_channel.readline().decode('utf-8')
773 for _ in range(line_count))
774 return res[:-len('\n')]
775
776 def _write_string(self, message, out=None, only_once=False):
777 if only_once:
778 if message in self._printed_messages:
779 return
780 self._printed_messages.add(message)
781 write_string(message, out=out, encoding=self.params.get('encoding'))
782
783 def to_stdout(self, message, skip_eol=False, quiet=False):
784 """Print message to stdout"""
785 if self.params.get('logger'):
786 self.params['logger'].debug(message)
787 elif not quiet or self.params.get('verbose'):
788 self._write_string(
789 '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
790 self._err_file if quiet else self._screen_file)
791
792 def to_stderr(self, message, only_once=False):
793 """Print message to stderr"""
794 assert isinstance(message, compat_str)
795 if self.params.get('logger'):
796 self.params['logger'].error(message)
797 else:
798 self._write_string('%s\n' % self._bidi_workaround(message), self._err_file, only_once=only_once)
799
800 def to_console_title(self, message):
801 if not self.params.get('consoletitle', False):
802 return
803 message = remove_terminal_sequences(message)
804 if compat_os_name == 'nt':
805 if ctypes.windll.kernel32.GetConsoleWindow():
806 # c_wchar_p() might not be necessary if `message` is
807 # already of type unicode()
808 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
809 elif 'TERM' in os.environ:
810 self._write_string('\033]0;%s\007' % message, self._screen_file)
811
812 def save_console_title(self):
813 if not self.params.get('consoletitle', False):
814 return
815 if self.params.get('simulate'):
816 return
817 if compat_os_name != 'nt' and 'TERM' in os.environ:
818 # Save the title on stack
819 self._write_string('\033[22;0t', self._screen_file)
820
821 def restore_console_title(self):
822 if not self.params.get('consoletitle', False):
823 return
824 if self.params.get('simulate'):
825 return
826 if compat_os_name != 'nt' and 'TERM' in os.environ:
827 # Restore the title from stack
828 self._write_string('\033[23;0t', self._screen_file)
829
830 def __enter__(self):
831 self.save_console_title()
832 return self
833
834 def __exit__(self, *args):
835 self.restore_console_title()
836
837 if self.params.get('cookiefile') is not None:
838 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
839
840 def trouble(self, message=None, tb=None, is_error=True):
841 """Determine action to take when a download problem appears.
842
843 Depending on if the downloader has been configured to ignore
844 download errors or not, this method may throw an exception or
845 not when errors are found, after printing the message.
846
847 @param tb If given, is additional traceback information
848 @param is_error Whether to raise error according to ignorerrors
849 """
850 if message is not None:
851 self.to_stderr(message)
852 if self.params.get('verbose'):
853 if tb is None:
854 if sys.exc_info()[0]: # if .trouble has been called from an except block
855 tb = ''
856 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
857 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
858 tb += encode_compat_str(traceback.format_exc())
859 else:
860 tb_data = traceback.format_list(traceback.extract_stack())
861 tb = ''.join(tb_data)
862 if tb:
863 self.to_stderr(tb)
864 if not is_error:
865 return
866 if not self.params.get('ignoreerrors'):
867 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
868 exc_info = sys.exc_info()[1].exc_info
869 else:
870 exc_info = sys.exc_info()
871 raise DownloadError(message, exc_info)
872 self._download_retcode = 1
873
874 def to_screen(self, message, skip_eol=False):
875 """Print message to stdout if not in quiet mode"""
876 self.to_stdout(
877 message, skip_eol, quiet=self.params.get('quiet', False))
878
879 class Styles(Enum):
880 HEADERS = 'yellow'
881 EMPHASIS = 'light blue'
882 ID = 'green'
883 DELIM = 'blue'
884 ERROR = 'red'
885 WARNING = 'yellow'
886 SUPPRESS = 'light black'
887
888 def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
889 if test_encoding:
890 original_text = text
891 encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
892 text = text.encode(encoding, 'ignore').decode(encoding)
893 if fallback is not None and text != original_text:
894 text = fallback
895 if isinstance(f, self.Styles):
896 f = f.value
897 return format_text(text, f) if allow_colors else text if fallback is None else fallback
898
899 def _format_screen(self, *args, **kwargs):
900 return self._format_text(
901 self._screen_file, self._allow_colors['screen'], *args, **kwargs)
902
903 def _format_err(self, *args, **kwargs):
904 return self._format_text(
905 self._err_file, self._allow_colors['err'], *args, **kwargs)
906
907 def report_warning(self, message, only_once=False):
908 '''
909 Print the message to stderr, it will be prefixed with 'WARNING:'
910 If stderr is a tty file the 'WARNING:' will be colored
911 '''
912 if self.params.get('logger') is not None:
913 self.params['logger'].warning(message)
914 else:
915 if self.params.get('no_warnings'):
916 return
917 self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
918
919 def deprecation_warning(self, message):
920 if self.params.get('logger') is not None:
921 self.params['logger'].warning('DeprecationWarning: {message}')
922 else:
923 self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
924
925 def report_error(self, message, *args, **kwargs):
926 '''
927 Do the same as trouble, but prefixes the message with 'ERROR:', colored
928 in red if stderr is a tty file.
929 '''
930 self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs)
931
932 def write_debug(self, message, only_once=False):
933 '''Log debug message or Print message to stderr'''
934 if not self.params.get('verbose', False):
935 return
936 message = '[debug] %s' % message
937 if self.params.get('logger'):
938 self.params['logger'].debug(message)
939 else:
940 self.to_stderr(message, only_once)
941
942 def report_file_already_downloaded(self, file_name):
943 """Report file has already been fully downloaded."""
944 try:
945 self.to_screen('[download] %s has already been downloaded' % file_name)
946 except UnicodeEncodeError:
947 self.to_screen('[download] The file has already been downloaded')
948
949 def report_file_delete(self, file_name):
950 """Report that existing file will be deleted."""
951 try:
952 self.to_screen('Deleting existing file %s' % file_name)
953 except UnicodeEncodeError:
954 self.to_screen('Deleting existing file')
955
956 def raise_no_formats(self, info, forced=False):
957 has_drm = info.get('__has_drm')
958 msg = 'This video is DRM protected' if has_drm else 'No video formats found!'
959 expected = self.params.get('ignore_no_formats_error')
960 if forced or not expected:
961 raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'],
962 expected=has_drm or expected)
963 else:
964 self.report_warning(msg)
965
966 def parse_outtmpl(self):
967 outtmpl_dict = self.params.get('outtmpl', {})
968 if not isinstance(outtmpl_dict, dict):
969 outtmpl_dict = {'default': outtmpl_dict}
970 # Remove spaces in the default template
971 if self.params.get('restrictfilenames'):
972 sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-')
973 else:
974 sanitize = lambda x: x
975 outtmpl_dict.update({
976 k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items()
977 if outtmpl_dict.get(k) is None})
978 for key, val in outtmpl_dict.items():
979 if isinstance(val, bytes):
980 self.report_warning(
981 'Parameter outtmpl is bytes, but should be a unicode string. '
982 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
983 return outtmpl_dict
984
985 def get_output_path(self, dir_type='', filename=None):
986 paths = self.params.get('paths', {})
987 assert isinstance(paths, dict)
988 path = os.path.join(
989 expand_path(paths.get('home', '').strip()),
990 expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
991 filename or '')
992
993 # Temporary fix for #4787
994 # 'Treat' all problem characters by passing filename through preferredencoding
995 # to workaround encoding issues with subprocess on python2 @ Windows
996 if sys.version_info < (3, 0) and sys.platform == 'win32':
997 path = encodeFilename(path, True).decode(preferredencoding())
998 return sanitize_path(path, force=self.params.get('windowsfilenames'))
999
1000 @staticmethod
1001 def _outtmpl_expandpath(outtmpl):
1002 # expand_path translates '%%' into '%' and '$$' into '$'
1003 # correspondingly that is not what we want since we need to keep
1004 # '%%' intact for template dict substitution step. Working around
1005 # with boundary-alike separator hack.
1006 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
1007 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
1008
1009 # outtmpl should be expand_path'ed before template dict substitution
1010 # because meta fields may contain env variables we don't want to
1011 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
1012 # title "Hello $PATH", we don't want `$PATH` to be expanded.
1013 return expand_path(outtmpl).replace(sep, '')
1014
1015 @staticmethod
1016 def escape_outtmpl(outtmpl):
1017 ''' Escape any remaining strings like %s, %abc% etc. '''
1018 return re.sub(
1019 STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'),
1020 lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0),
1021 outtmpl)
1022
1023 @classmethod
1024 def validate_outtmpl(cls, outtmpl):
1025 ''' @return None or Exception object '''
1026 outtmpl = re.sub(
1027 STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDS]'),
1028 lambda mobj: f'{mobj.group(0)[:-1]}s',
1029 cls._outtmpl_expandpath(outtmpl))
1030 try:
1031 cls.escape_outtmpl(outtmpl) % collections.defaultdict(int)
1032 return None
1033 except ValueError as err:
1034 return err
1035
1036 @staticmethod
1037 def _copy_infodict(info_dict):
1038 info_dict = dict(info_dict)
1039 for key in ('__original_infodict', '__postprocessors'):
1040 info_dict.pop(key, None)
1041 return info_dict
1042
1043 def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
1044 """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
1045 @param sanitize Whether to sanitize the output as a filename.
1046 For backward compatibility, a function can also be passed
1047 """
1048
1049 info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
1050
1051 info_dict = self._copy_infodict(info_dict)
1052 info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
1053 formatSeconds(info_dict['duration'], '-' if sanitize else ':')
1054 if info_dict.get('duration', None) is not None
1055 else None)
1056 info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
1057 info_dict['video_autonumber'] = self._num_videos
1058 if info_dict.get('resolution') is None:
1059 info_dict['resolution'] = self.format_resolution(info_dict, default=None)
1060
1061 # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
1062 # of %(field)s to %(field)0Nd for backward compatibility
1063 field_size_compat_map = {
1064 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
1065 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
1066 'autonumber': self.params.get('autonumber_size') or 5,
1067 }
1068
1069 TMPL_DICT = {}
1070 EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDS]'))
1071 MATH_FUNCTIONS = {
1072 '+': float.__add__,
1073 '-': float.__sub__,
1074 }
1075 # Field is of the form key1.key2...
1076 # where keys (except first) can be string, int or slice
1077 FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
1078 MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
1079 MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
1080 INTERNAL_FORMAT_RE = re.compile(r'''(?x)
1081 (?P<negate>-)?
1082 (?P<fields>{field})
1083 (?P<maths>(?:{math_op}{math_field})*)
1084 (?:>(?P<strf_format>.+?))?
1085 (?P<alternate>(?<!\\),[^|&)]+)?
1086 (?:&(?P<replacement>.*?))?
1087 (?:\|(?P<default>.*?))?
1088 $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
1089
1090 def _traverse_infodict(k):
1091 k = k.split('.')
1092 if k[0] == '':
1093 k.pop(0)
1094 return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
1095
1096 def get_value(mdict):
1097 # Object traversal
1098 value = _traverse_infodict(mdict['fields'])
1099 # Negative
1100 if mdict['negate']:
1101 value = float_or_none(value)
1102 if value is not None:
1103 value *= -1
1104 # Do maths
1105 offset_key = mdict['maths']
1106 if offset_key:
1107 value = float_or_none(value)
1108 operator = None
1109 while offset_key:
1110 item = re.match(
1111 MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
1112 offset_key).group(0)
1113 offset_key = offset_key[len(item):]
1114 if operator is None:
1115 operator = MATH_FUNCTIONS[item]
1116 continue
1117 item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
1118 offset = float_or_none(item)
1119 if offset is None:
1120 offset = float_or_none(_traverse_infodict(item))
1121 try:
1122 value = operator(value, multiplier * offset)
1123 except (TypeError, ZeroDivisionError):
1124 return None
1125 operator = None
1126 # Datetime formatting
1127 if mdict['strf_format']:
1128 value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ','))
1129
1130 return value
1131
1132 na = self.params.get('outtmpl_na_placeholder', 'NA')
1133
1134 def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
1135 return sanitize_filename(str(value), restricted=restricted,
1136 is_id=re.search(r'(^|[_.])id(\.|$)', key))
1137
1138 sanitizer = sanitize if callable(sanitize) else filename_sanitizer
1139 sanitize = bool(sanitize)
1140
1141 def _dumpjson_default(obj):
1142 if isinstance(obj, (set, LazyList)):
1143 return list(obj)
1144 return repr(obj)
1145
1146 def create_key(outer_mobj):
1147 if not outer_mobj.group('has_key'):
1148 return outer_mobj.group(0)
1149 key = outer_mobj.group('key')
1150 mobj = re.match(INTERNAL_FORMAT_RE, key)
1151 initial_field = mobj.group('fields') if mobj else ''
1152 value, replacement, default = None, None, na
1153 while mobj:
1154 mobj = mobj.groupdict()
1155 default = mobj['default'] if mobj['default'] is not None else default
1156 value = get_value(mobj)
1157 replacement = mobj['replacement']
1158 if value is None and mobj['alternate']:
1159 mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
1160 else:
1161 break
1162
1163 fmt = outer_mobj.group('format')
1164 if fmt == 's' and value is not None and key in field_size_compat_map.keys():
1165 fmt = '0{:d}d'.format(field_size_compat_map[key])
1166
1167 value = default if value is None else value if replacement is None else replacement
1168
1169 flags = outer_mobj.group('conversion') or ''
1170 str_fmt = f'{fmt[:-1]}s'
1171 if fmt[-1] == 'l': # list
1172 delim = '\n' if '#' in flags else ', '
1173 value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt
1174 elif fmt[-1] == 'j': # json
1175 value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
1176 elif fmt[-1] == 'q': # quoted
1177 value = map(str, variadic(value) if '#' in flags else [value])
1178 value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
1179 elif fmt[-1] == 'B': # bytes
1180 value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
1181 value, fmt = value.decode('utf-8', 'ignore'), 's'
1182 elif fmt[-1] == 'U': # unicode normalized
1183 value, fmt = unicodedata.normalize(
1184 # "+" = compatibility equivalence, "#" = NFD
1185 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
1186 value), str_fmt
1187 elif fmt[-1] == 'D': # decimal suffix
1188 num_fmt, fmt = fmt[:-1].replace('#', ''), 's'
1189 value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s',
1190 factor=1024 if '#' in flags else 1000)
1191 elif fmt[-1] == 'S': # filename sanitization
1192 value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
1193 elif fmt[-1] == 'c':
1194 if value:
1195 value = str(value)[0]
1196 else:
1197 fmt = str_fmt
1198 elif fmt[-1] not in 'rs': # numeric
1199 value = float_or_none(value)
1200 if value is None:
1201 value, fmt = default, 's'
1202
1203 if sanitize:
1204 if fmt[-1] == 'r':
1205 # If value is an object, sanitize might convert it to a string
1206 # So we convert it to repr first
1207 value, fmt = repr(value), str_fmt
1208 if fmt[-1] in 'csr':
1209 value = sanitizer(initial_field, value)
1210
1211 key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
1212 TMPL_DICT[key] = value
1213 return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix'))
1214
1215 return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
1216
1217 def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
1218 outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs)
1219 return self.escape_outtmpl(outtmpl) % info_dict
1220
1221 def _prepare_filename(self, info_dict, tmpl_type='default'):
1222 try:
1223 outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
1224 filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
1225 if not filename:
1226 return None
1227
1228 if tmpl_type in ('default', 'temp'):
1229 final_ext, ext = self.params.get('final_ext'), info_dict.get('ext')
1230 if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'):
1231 filename = replace_extension(filename, ext, final_ext)
1232 else:
1233 force_ext = OUTTMPL_TYPES[tmpl_type]
1234 if force_ext:
1235 filename = replace_extension(filename, force_ext, info_dict.get('ext'))
1236
1237 # https://github.com/blackjack4494/youtube-dlc/issues/85
1238 trim_file_name = self.params.get('trim_file_name', False)
1239 if trim_file_name:
1240 no_ext, *ext = filename.rsplit('.', 2)
1241 filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
1242
1243 return filename
1244 except ValueError as err:
1245 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
1246 return None
1247
1248 def prepare_filename(self, info_dict, dir_type='', warn=False):
1249 """Generate the output filename."""
1250
1251 filename = self._prepare_filename(info_dict, dir_type or 'default')
1252 if not filename and dir_type not in ('', 'temp'):
1253 return ''
1254
1255 if warn:
1256 if not self.params.get('paths'):
1257 pass
1258 elif filename == '-':
1259 self.report_warning('--paths is ignored when an outputting to stdout', only_once=True)
1260 elif os.path.isabs(filename):
1261 self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True)
1262 if filename == '-' or not filename:
1263 return filename
1264
1265 return self.get_output_path(dir_type, filename)
1266
1267 def _match_entry(self, info_dict, incomplete=False, silent=False):
1268 """ Returns None if the file should be downloaded """
1269
1270 video_title = info_dict.get('title', info_dict.get('id', 'video'))
1271
1272 def check_filter():
1273 if 'title' in info_dict:
1274 # This can happen when we're just evaluating the playlist
1275 title = info_dict['title']
1276 matchtitle = self.params.get('matchtitle', False)
1277 if matchtitle:
1278 if not re.search(matchtitle, title, re.IGNORECASE):
1279 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
1280 rejecttitle = self.params.get('rejecttitle', False)
1281 if rejecttitle:
1282 if re.search(rejecttitle, title, re.IGNORECASE):
1283 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
1284 date = info_dict.get('upload_date')
1285 if date is not None:
1286 dateRange = self.params.get('daterange', DateRange())
1287 if date not in dateRange:
1288 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
1289 view_count = info_dict.get('view_count')
1290 if view_count is not None:
1291 min_views = self.params.get('min_views')
1292 if min_views is not None and view_count < min_views:
1293 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
1294 max_views = self.params.get('max_views')
1295 if max_views is not None and view_count > max_views:
1296 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
1297 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
1298 return 'Skipping "%s" because it is age restricted' % video_title
1299
1300 match_filter = self.params.get('match_filter')
1301 if match_filter is not None:
1302 try:
1303 ret = match_filter(info_dict, incomplete=incomplete)
1304 except TypeError:
1305 # For backward compatibility
1306 ret = None if incomplete else match_filter(info_dict)
1307 if ret is not None:
1308 return ret
1309 return None
1310
1311 if self.in_download_archive(info_dict):
1312 reason = '%s has already been recorded in the archive' % video_title
1313 break_opt, break_err = 'break_on_existing', ExistingVideoReached
1314 else:
1315 reason = check_filter()
1316 break_opt, break_err = 'break_on_reject', RejectedVideoReached
1317 if reason is not None:
1318 if not silent:
1319 self.to_screen('[download] ' + reason)
1320 if self.params.get(break_opt, False):
1321 raise break_err()
1322 return reason
1323
1324 @staticmethod
1325 def add_extra_info(info_dict, extra_info):
1326 '''Set the keys from extra_info in info dict if they are missing'''
1327 for key, value in extra_info.items():
1328 info_dict.setdefault(key, value)
1329
1330 def extract_info(self, url, download=True, ie_key=None, extra_info=None,
1331 process=True, force_generic_extractor=False):
1332 """
1333 Return a list with a dictionary for each video extracted.
1334
1335 Arguments:
1336 url -- URL to extract
1337
1338 Keyword arguments:
1339 download -- whether to download videos during extraction
1340 ie_key -- extractor key hint
1341 extra_info -- dictionary containing the extra values to add to each result
1342 process -- whether to resolve all unresolved references (URLs, playlist items),
1343 must be True for download to work.
1344 force_generic_extractor -- force using the generic extractor
1345 """
1346
1347 if extra_info is None:
1348 extra_info = {}
1349
1350 if not ie_key and force_generic_extractor:
1351 ie_key = 'Generic'
1352
1353 if ie_key:
1354 ies = {ie_key: self._get_info_extractor_class(ie_key)}
1355 else:
1356 ies = self._ies
1357
1358 for ie_key, ie in ies.items():
1359 if not ie.suitable(url):
1360 continue
1361
1362 if not ie.working():
1363 self.report_warning('The program functionality for this site has been marked as broken, '
1364 'and will probably not work.')
1365
1366 temp_id = ie.get_temp_id(url)
1367 if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
1368 self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
1369 if self.params.get('break_on_existing', False):
1370 raise ExistingVideoReached()
1371 break
1372 return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
1373 else:
1374 self.report_error('no suitable InfoExtractor for URL %s' % url)
1375
1376 def __handle_extraction_exceptions(func):
1377 @functools.wraps(func)
1378 def wrapper(self, *args, **kwargs):
1379 while True:
1380 try:
1381 return func(self, *args, **kwargs)
1382 except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1383 raise
1384 except ReExtractInfo as e:
1385 if e.expected:
1386 self.to_screen(f'{e}; Re-extracting data')
1387 else:
1388 self.to_stderr('\r')
1389 self.report_warning(f'{e}; Re-extracting data')
1390 continue
1391 except GeoRestrictedError as e:
1392 msg = e.msg
1393 if e.countries:
1394 msg += '\nThis video is available in %s.' % ', '.join(
1395 map(ISO3166Utils.short2full, e.countries))
1396 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
1397 self.report_error(msg)
1398 except ExtractorError as e: # An error we somewhat expected
1399 self.report_error(str(e), e.format_traceback())
1400 except Exception as e:
1401 if self.params.get('ignoreerrors'):
1402 self.report_error(str(e), tb=encode_compat_str(traceback.format_exc()))
1403 else:
1404 raise
1405 break
1406 return wrapper
1407
1408 def _wait_for_video(self, ie_result):
1409 if (not self.params.get('wait_for_video')
1410 or ie_result.get('_type', 'video') != 'video'
1411 or ie_result.get('formats') or ie_result.get('url')):
1412 return
1413
1414 format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
1415 last_msg = ''
1416
1417 def progress(msg):
1418 nonlocal last_msg
1419 self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
1420 last_msg = msg
1421
1422 min_wait, max_wait = self.params.get('wait_for_video')
1423 diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
1424 if diff is None and ie_result.get('live_status') == 'is_upcoming':
1425 diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
1426 self.report_warning('Release time of video is not known')
1427 elif (diff or 0) <= 0:
1428 self.report_warning('Video should already be available according to extracted info')
1429 diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
1430 self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
1431
1432 wait_till = time.time() + diff
1433 try:
1434 while True:
1435 diff = wait_till - time.time()
1436 if diff <= 0:
1437 progress('')
1438 raise ReExtractInfo('[wait] Wait period ended', expected=True)
1439 progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
1440 time.sleep(1)
1441 except KeyboardInterrupt:
1442 progress('')
1443 raise ReExtractInfo('[wait] Interrupted by user', expected=True)
1444 except BaseException as e:
1445 if not isinstance(e, ReExtractInfo):
1446 self.to_screen('')
1447 raise
1448
1449 @__handle_extraction_exceptions
1450 def __extract_info(self, url, ie, download, extra_info, process):
1451 ie_result = ie.extract(url)
1452 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
1453 return
1454 if isinstance(ie_result, list):
1455 # Backwards compatibility: old IE result format
1456 ie_result = {
1457 '_type': 'compat_list',
1458 'entries': ie_result,
1459 }
1460 if extra_info.get('original_url'):
1461 ie_result.setdefault('original_url', extra_info['original_url'])
1462 self.add_default_extra_info(ie_result, ie, url)
1463 if process:
1464 self._wait_for_video(ie_result)
1465 return self.process_ie_result(ie_result, download, extra_info)
1466 else:
1467 return ie_result
1468
1469 def add_default_extra_info(self, ie_result, ie, url):
1470 if url is not None:
1471 self.add_extra_info(ie_result, {
1472 'webpage_url': url,
1473 'original_url': url,
1474 'webpage_url_basename': url_basename(url),
1475 'webpage_url_domain': get_domain(url),
1476 })
1477 if ie is not None:
1478 self.add_extra_info(ie_result, {
1479 'extractor': ie.IE_NAME,
1480 'extractor_key': ie.ie_key(),
1481 })
1482
1483 def process_ie_result(self, ie_result, download=True, extra_info=None):
1484 """
1485 Take the result of the ie(may be modified) and resolve all unresolved
1486 references (URLs, playlist items).
1487
1488 It will also download the videos if 'download'.
1489 Returns the resolved ie_result.
1490 """
1491 if extra_info is None:
1492 extra_info = {}
1493 result_type = ie_result.get('_type', 'video')
1494
1495 if result_type in ('url', 'url_transparent'):
1496 ie_result['url'] = sanitize_url(ie_result['url'])
1497 if ie_result.get('original_url'):
1498 extra_info.setdefault('original_url', ie_result['original_url'])
1499
1500 extract_flat = self.params.get('extract_flat', False)
1501 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
1502 or extract_flat is True):
1503 info_copy = ie_result.copy()
1504 ie = try_get(ie_result.get('ie_key'), self.get_info_extractor)
1505 if ie and not ie_result.get('id'):
1506 info_copy['id'] = ie.get_temp_id(ie_result['url'])
1507 self.add_default_extra_info(info_copy, ie, ie_result['url'])
1508 self.add_extra_info(info_copy, extra_info)
1509 info_copy, _ = self.pre_process(info_copy)
1510 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
1511 if self.params.get('force_write_download_archive', False):
1512 self.record_download_archive(info_copy)
1513 return ie_result
1514
1515 if result_type == 'video':
1516 self.add_extra_info(ie_result, extra_info)
1517 ie_result = self.process_video_result(ie_result, download=download)
1518 additional_urls = (ie_result or {}).get('additional_urls')
1519 if additional_urls:
1520 # TODO: Improve MetadataParserPP to allow setting a list
1521 if isinstance(additional_urls, compat_str):
1522 additional_urls = [additional_urls]
1523 self.to_screen(
1524 '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
1525 self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
1526 ie_result['additional_entries'] = [
1527 self.extract_info(
1528 url, download, extra_info=extra_info,
1529 force_generic_extractor=self.params.get('force_generic_extractor'))
1530 for url in additional_urls
1531 ]
1532 return ie_result
1533 elif result_type == 'url':
1534 # We have to add extra_info to the results because it may be
1535 # contained in a playlist
1536 return self.extract_info(
1537 ie_result['url'], download,
1538 ie_key=ie_result.get('ie_key'),
1539 extra_info=extra_info)
1540 elif result_type == 'url_transparent':
1541 # Use the information from the embedding page
1542 info = self.extract_info(
1543 ie_result['url'], ie_key=ie_result.get('ie_key'),
1544 extra_info=extra_info, download=False, process=False)
1545
1546 # extract_info may return None when ignoreerrors is enabled and
1547 # extraction failed with an error, don't crash and return early
1548 # in this case
1549 if not info:
1550 return info
1551
1552 force_properties = dict(
1553 (k, v) for k, v in ie_result.items() if v is not None)
1554 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
1555 if f in force_properties:
1556 del force_properties[f]
1557 new_result = info.copy()
1558 new_result.update(force_properties)
1559
1560 # Extracted info may not be a video result (i.e.
1561 # info.get('_type', 'video') != video) but rather an url or
1562 # url_transparent. In such cases outer metadata (from ie_result)
1563 # should be propagated to inner one (info). For this to happen
1564 # _type of info should be overridden with url_transparent. This
1565 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
1566 if new_result.get('_type') == 'url':
1567 new_result['_type'] = 'url_transparent'
1568
1569 return self.process_ie_result(
1570 new_result, download=download, extra_info=extra_info)
1571 elif result_type in ('playlist', 'multi_video'):
1572 # Protect from infinite recursion due to recursively nested playlists
1573 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
1574 webpage_url = ie_result['webpage_url']
1575 if webpage_url in self._playlist_urls:
1576 self.to_screen(
1577 '[download] Skipping already downloaded playlist: %s'
1578 % ie_result.get('title') or ie_result.get('id'))
1579 return
1580
1581 self._playlist_level += 1
1582 self._playlist_urls.add(webpage_url)
1583 self._sanitize_thumbnails(ie_result)
1584 try:
1585 return self.__process_playlist(ie_result, download)
1586 finally:
1587 self._playlist_level -= 1
1588 if not self._playlist_level:
1589 self._playlist_urls.clear()
1590 elif result_type == 'compat_list':
1591 self.report_warning(
1592 'Extractor %s returned a compat_list result. '
1593 'It needs to be updated.' % ie_result.get('extractor'))
1594
1595 def _fixup(r):
1596 self.add_extra_info(r, {
1597 'extractor': ie_result['extractor'],
1598 'webpage_url': ie_result['webpage_url'],
1599 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1600 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1601 'extractor_key': ie_result['extractor_key'],
1602 })
1603 return r
1604 ie_result['entries'] = [
1605 self.process_ie_result(_fixup(r), download, extra_info)
1606 for r in ie_result['entries']
1607 ]
1608 return ie_result
1609 else:
1610 raise Exception('Invalid result type: %s' % result_type)
1611
1612 def _ensure_dir_exists(self, path):
1613 return make_dir(path, self.report_error)
1614
1615 @staticmethod
1616 def _playlist_infodict(ie_result, **kwargs):
1617 return {
1618 **ie_result,
1619 'playlist': ie_result.get('title') or ie_result.get('id'),
1620 'playlist_id': ie_result.get('id'),
1621 'playlist_title': ie_result.get('title'),
1622 'playlist_uploader': ie_result.get('uploader'),
1623 'playlist_uploader_id': ie_result.get('uploader_id'),
1624 'playlist_index': 0,
1625 **kwargs,
1626 }
1627
1628 def __process_playlist(self, ie_result, download):
1629 # We process each entry in the playlist
1630 playlist = ie_result.get('title') or ie_result.get('id')
1631 self.to_screen('[download] Downloading playlist: %s' % playlist)
1632
1633 if 'entries' not in ie_result:
1634 raise EntryNotInPlaylist('There are no entries')
1635
1636 MissingEntry = object()
1637 incomplete_entries = bool(ie_result.get('requested_entries'))
1638 if incomplete_entries:
1639 def fill_missing_entries(entries, indices):
1640 ret = [MissingEntry] * max(indices)
1641 for i, entry in zip(indices, entries):
1642 ret[i - 1] = entry
1643 return ret
1644 ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
1645
1646 playlist_results = []
1647
1648 playliststart = self.params.get('playliststart', 1)
1649 playlistend = self.params.get('playlistend')
1650 # For backwards compatibility, interpret -1 as whole list
1651 if playlistend == -1:
1652 playlistend = None
1653
1654 playlistitems_str = self.params.get('playlist_items')
1655 playlistitems = None
1656 if playlistitems_str is not None:
1657 def iter_playlistitems(format):
1658 for string_segment in format.split(','):
1659 if '-' in string_segment:
1660 start, end = string_segment.split('-')
1661 for item in range(int(start), int(end) + 1):
1662 yield int(item)
1663 else:
1664 yield int(string_segment)
1665 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
1666
1667 ie_entries = ie_result['entries']
1668 if isinstance(ie_entries, list):
1669 playlist_count = len(ie_entries)
1670 msg = f'Collected {playlist_count} videos; downloading %d of them'
1671 ie_result['playlist_count'] = ie_result.get('playlist_count') or playlist_count
1672
1673 def get_entry(i):
1674 return ie_entries[i - 1]
1675 else:
1676 msg = 'Downloading %d videos'
1677 if not isinstance(ie_entries, (PagedList, LazyList)):
1678 ie_entries = LazyList(ie_entries)
1679 elif isinstance(ie_entries, InAdvancePagedList):
1680 if ie_entries._pagesize == 1:
1681 playlist_count = ie_entries._pagecount
1682
1683 def get_entry(i):
1684 return YoutubeDL.__handle_extraction_exceptions(
1685 lambda self, i: ie_entries[i - 1]
1686 )(self, i)
1687
1688 entries, broken = [], False
1689 items = playlistitems if playlistitems is not None else itertools.count(playliststart)
1690 for i in items:
1691 if i == 0:
1692 continue
1693 if playlistitems is None and playlistend is not None and playlistend < i:
1694 break
1695 entry = None
1696 try:
1697 entry = get_entry(i)
1698 if entry is MissingEntry:
1699 raise EntryNotInPlaylist()
1700 except (IndexError, EntryNotInPlaylist):
1701 if incomplete_entries:
1702 raise EntryNotInPlaylist(f'Entry {i} cannot be found')
1703 elif not playlistitems:
1704 break
1705 entries.append(entry)
1706 try:
1707 if entry is not None:
1708 self._match_entry(entry, incomplete=True, silent=True)
1709 except (ExistingVideoReached, RejectedVideoReached):
1710 broken = True
1711 break
1712 ie_result['entries'] = entries
1713
1714 # Save playlist_index before re-ordering
1715 entries = [
1716 ((playlistitems[i - 1] if playlistitems else i + playliststart - 1), entry)
1717 for i, entry in enumerate(entries, 1)
1718 if entry is not None]
1719 n_entries = len(entries)
1720
1721 if not (ie_result.get('playlist_count') or broken or playlistitems or playlistend):
1722 ie_result['playlist_count'] = n_entries
1723
1724 if not playlistitems and (playliststart != 1 or playlistend):
1725 playlistitems = list(range(playliststart, playliststart + n_entries))
1726 ie_result['requested_entries'] = playlistitems
1727
1728 _infojson_written = False
1729 write_playlist_files = self.params.get('allow_playlist_files', True)
1730 if write_playlist_files and self.params.get('list_thumbnails'):
1731 self.list_thumbnails(ie_result)
1732 if write_playlist_files and not self.params.get('simulate'):
1733 ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
1734 _infojson_written = self._write_info_json(
1735 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
1736 if _infojson_written is None:
1737 return
1738 if self._write_description('playlist', ie_result,
1739 self.prepare_filename(ie_copy, 'pl_description')) is None:
1740 return
1741 # TODO: This should be passed to ThumbnailsConvertor if necessary
1742 self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
1743
1744 if self.params.get('playlistreverse', False):
1745 entries = entries[::-1]
1746 if self.params.get('playlistrandom', False):
1747 random.shuffle(entries)
1748
1749 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1750
1751 self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
1752 failures = 0
1753 max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
1754 for i, entry_tuple in enumerate(entries, 1):
1755 playlist_index, entry = entry_tuple
1756 if 'playlist-index' in self.params.get('compat_opts', []):
1757 playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
1758 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1759 # This __x_forwarded_for_ip thing is a bit ugly but requires
1760 # minimal changes
1761 if x_forwarded_for:
1762 entry['__x_forwarded_for_ip'] = x_forwarded_for
1763 extra = {
1764 'n_entries': n_entries,
1765 '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
1766 'playlist_count': ie_result.get('playlist_count'),
1767 'playlist_index': playlist_index,
1768 'playlist_autonumber': i,
1769 'playlist': playlist,
1770 'playlist_id': ie_result.get('id'),
1771 'playlist_title': ie_result.get('title'),
1772 'playlist_uploader': ie_result.get('uploader'),
1773 'playlist_uploader_id': ie_result.get('uploader_id'),
1774 'extractor': ie_result['extractor'],
1775 'webpage_url': ie_result['webpage_url'],
1776 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1777 'webpage_url_domain': get_domain(ie_result['webpage_url']),
1778 'extractor_key': ie_result['extractor_key'],
1779 }
1780
1781 if self._match_entry(entry, incomplete=True) is not None:
1782 continue
1783
1784 entry_result = self.__process_iterable_entry(entry, download, extra)
1785 if not entry_result:
1786 failures += 1
1787 if failures >= max_failures:
1788 self.report_error(
1789 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
1790 break
1791 playlist_results.append(entry_result)
1792 ie_result['entries'] = playlist_results
1793
1794 # Write the updated info to json
1795 if _infojson_written and self._write_info_json(
1796 'updated playlist', ie_result,
1797 self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
1798 return
1799
1800 ie_result = self.run_all_pps('playlist', ie_result)
1801 self.to_screen(f'[download] Finished downloading playlist: {playlist}')
1802 return ie_result
1803
1804 @__handle_extraction_exceptions
1805 def __process_iterable_entry(self, entry, download, extra_info):
1806 return self.process_ie_result(
1807 entry, download=download, extra_info=extra_info)
1808
1809 def _build_format_filter(self, filter_spec):
1810 " Returns a function to filter the formats according to the filter_spec "
1811
1812 OPERATORS = {
1813 '<': operator.lt,
1814 '<=': operator.le,
1815 '>': operator.gt,
1816 '>=': operator.ge,
1817 '=': operator.eq,
1818 '!=': operator.ne,
1819 }
1820 operator_rex = re.compile(r'''(?x)\s*
1821 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
1822 (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1823 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
1824 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1825 m = operator_rex.fullmatch(filter_spec)
1826 if m:
1827 try:
1828 comparison_value = int(m.group('value'))
1829 except ValueError:
1830 comparison_value = parse_filesize(m.group('value'))
1831 if comparison_value is None:
1832 comparison_value = parse_filesize(m.group('value') + 'B')
1833 if comparison_value is None:
1834 raise ValueError(
1835 'Invalid value %r in format specification %r' % (
1836 m.group('value'), filter_spec))
1837 op = OPERATORS[m.group('op')]
1838
1839 if not m:
1840 STR_OPERATORS = {
1841 '=': operator.eq,
1842 '^=': lambda attr, value: attr.startswith(value),
1843 '$=': lambda attr, value: attr.endswith(value),
1844 '*=': lambda attr, value: value in attr,
1845 }
1846 str_operator_rex = re.compile(r'''(?x)\s*
1847 (?P<key>[a-zA-Z0-9._-]+)\s*
1848 (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1849 (?P<value>[a-zA-Z0-9._-]+)\s*
1850 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1851 m = str_operator_rex.fullmatch(filter_spec)
1852 if m:
1853 comparison_value = m.group('value')
1854 str_op = STR_OPERATORS[m.group('op')]
1855 if m.group('negation'):
1856 op = lambda attr, value: not str_op(attr, value)
1857 else:
1858 op = str_op
1859
1860 if not m:
1861 raise SyntaxError('Invalid filter specification %r' % filter_spec)
1862
1863 def _filter(f):
1864 actual_value = f.get(m.group('key'))
1865 if actual_value is None:
1866 return m.group('none_inclusive')
1867 return op(actual_value, comparison_value)
1868 return _filter
1869
1870 def _check_formats(self, formats):
1871 for f in formats:
1872 self.to_screen('[info] Testing format %s' % f['format_id'])
1873 path = self.get_output_path('temp')
1874 if not self._ensure_dir_exists(f'{path}/'):
1875 continue
1876 temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
1877 temp_file.close()
1878 try:
1879 success, _ = self.dl(temp_file.name, f, test=True)
1880 except (DownloadError, IOError, OSError, ValueError) + network_exceptions:
1881 success = False
1882 finally:
1883 if os.path.exists(temp_file.name):
1884 try:
1885 os.remove(temp_file.name)
1886 except OSError:
1887 self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
1888 if success:
1889 yield f
1890 else:
1891 self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
1892
1893 def _default_format_spec(self, info_dict, download=True):
1894
1895 def can_merge():
1896 merger = FFmpegMergerPP(self)
1897 return merger.available and merger.can_merge()
1898
1899 prefer_best = (
1900 not self.params.get('simulate')
1901 and download
1902 and (
1903 not can_merge()
1904 or info_dict.get('is_live', False)
1905 or self.outtmpl_dict['default'] == '-'))
1906 compat = (
1907 prefer_best
1908 or self.params.get('allow_multiple_audio_streams', False)
1909 or 'format-spec' in self.params.get('compat_opts', []))
1910
1911 return (
1912 'best/bestvideo+bestaudio' if prefer_best
1913 else 'bestvideo*+bestaudio/best' if not compat
1914 else 'bestvideo+bestaudio/best')
1915
1916 def build_format_selector(self, format_spec):
1917 def syntax_error(note, start):
1918 message = (
1919 'Invalid format specification: '
1920 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1921 return SyntaxError(message)
1922
1923 PICKFIRST = 'PICKFIRST'
1924 MERGE = 'MERGE'
1925 SINGLE = 'SINGLE'
1926 GROUP = 'GROUP'
1927 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1928
1929 allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
1930 'video': self.params.get('allow_multiple_video_streams', False)}
1931
1932 check_formats = self.params.get('check_formats') == 'selected'
1933
1934 def _parse_filter(tokens):
1935 filter_parts = []
1936 for type, string, start, _, _ in tokens:
1937 if type == tokenize.OP and string == ']':
1938 return ''.join(filter_parts)
1939 else:
1940 filter_parts.append(string)
1941
1942 def _remove_unused_ops(tokens):
1943 # Remove operators that we don't use and join them with the surrounding strings
1944 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1945 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1946 last_string, last_start, last_end, last_line = None, None, None, None
1947 for type, string, start, end, line in tokens:
1948 if type == tokenize.OP and string == '[':
1949 if last_string:
1950 yield tokenize.NAME, last_string, last_start, last_end, last_line
1951 last_string = None
1952 yield type, string, start, end, line
1953 # everything inside brackets will be handled by _parse_filter
1954 for type, string, start, end, line in tokens:
1955 yield type, string, start, end, line
1956 if type == tokenize.OP and string == ']':
1957 break
1958 elif type == tokenize.OP and string in ALLOWED_OPS:
1959 if last_string:
1960 yield tokenize.NAME, last_string, last_start, last_end, last_line
1961 last_string = None
1962 yield type, string, start, end, line
1963 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1964 if not last_string:
1965 last_string = string
1966 last_start = start
1967 last_end = end
1968 else:
1969 last_string += string
1970 if last_string:
1971 yield tokenize.NAME, last_string, last_start, last_end, last_line
1972
1973 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1974 selectors = []
1975 current_selector = None
1976 for type, string, start, _, _ in tokens:
1977 # ENCODING is only defined in python 3.x
1978 if type == getattr(tokenize, 'ENCODING', None):
1979 continue
1980 elif type in [tokenize.NAME, tokenize.NUMBER]:
1981 current_selector = FormatSelector(SINGLE, string, [])
1982 elif type == tokenize.OP:
1983 if string == ')':
1984 if not inside_group:
1985 # ')' will be handled by the parentheses group
1986 tokens.restore_last_token()
1987 break
1988 elif inside_merge and string in ['/', ',']:
1989 tokens.restore_last_token()
1990 break
1991 elif inside_choice and string == ',':
1992 tokens.restore_last_token()
1993 break
1994 elif string == ',':
1995 if not current_selector:
1996 raise syntax_error('"," must follow a format selector', start)
1997 selectors.append(current_selector)
1998 current_selector = None
1999 elif string == '/':
2000 if not current_selector:
2001 raise syntax_error('"/" must follow a format selector', start)
2002 first_choice = current_selector
2003 second_choice = _parse_format_selection(tokens, inside_choice=True)
2004 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
2005 elif string == '[':
2006 if not current_selector:
2007 current_selector = FormatSelector(SINGLE, 'best', [])
2008 format_filter = _parse_filter(tokens)
2009 current_selector.filters.append(format_filter)
2010 elif string == '(':
2011 if current_selector:
2012 raise syntax_error('Unexpected "("', start)
2013 group = _parse_format_selection(tokens, inside_group=True)
2014 current_selector = FormatSelector(GROUP, group, [])
2015 elif string == '+':
2016 if not current_selector:
2017 raise syntax_error('Unexpected "+"', start)
2018 selector_1 = current_selector
2019 selector_2 = _parse_format_selection(tokens, inside_merge=True)
2020 if not selector_2:
2021 raise syntax_error('Expected a selector', start)
2022 current_selector = FormatSelector(MERGE, (selector_1, selector_2), [])
2023 else:
2024 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
2025 elif type == tokenize.ENDMARKER:
2026 break
2027 if current_selector:
2028 selectors.append(current_selector)
2029 return selectors
2030
2031 def _merge(formats_pair):
2032 format_1, format_2 = formats_pair
2033
2034 formats_info = []
2035 formats_info.extend(format_1.get('requested_formats', (format_1,)))
2036 formats_info.extend(format_2.get('requested_formats', (format_2,)))
2037
2038 if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
2039 get_no_more = {'video': False, 'audio': False}
2040 for (i, fmt_info) in enumerate(formats_info):
2041 if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none':
2042 formats_info.pop(i)
2043 continue
2044 for aud_vid in ['audio', 'video']:
2045 if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
2046 if get_no_more[aud_vid]:
2047 formats_info.pop(i)
2048 break
2049 get_no_more[aud_vid] = True
2050
2051 if len(formats_info) == 1:
2052 return formats_info[0]
2053
2054 video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
2055 audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
2056
2057 the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
2058 the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
2059
2060 output_ext = self.params.get('merge_output_format')
2061 if not output_ext:
2062 if the_only_video:
2063 output_ext = the_only_video['ext']
2064 elif the_only_audio and not video_fmts:
2065 output_ext = the_only_audio['ext']
2066 else:
2067 output_ext = 'mkv'
2068
2069 filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info))
2070
2071 new_dict = {
2072 'requested_formats': formats_info,
2073 'format': '+'.join(filtered('format')),
2074 'format_id': '+'.join(filtered('format_id')),
2075 'ext': output_ext,
2076 'protocol': '+'.join(map(determine_protocol, formats_info)),
2077 'language': '+'.join(orderedSet(filtered('language'))) or None,
2078 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
2079 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
2080 'tbr': sum(filtered('tbr', 'vbr', 'abr')),
2081 }
2082
2083 if the_only_video:
2084 new_dict.update({
2085 'width': the_only_video.get('width'),
2086 'height': the_only_video.get('height'),
2087 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
2088 'fps': the_only_video.get('fps'),
2089 'dynamic_range': the_only_video.get('dynamic_range'),
2090 'vcodec': the_only_video.get('vcodec'),
2091 'vbr': the_only_video.get('vbr'),
2092 'stretched_ratio': the_only_video.get('stretched_ratio'),
2093 })
2094
2095 if the_only_audio:
2096 new_dict.update({
2097 'acodec': the_only_audio.get('acodec'),
2098 'abr': the_only_audio.get('abr'),
2099 'asr': the_only_audio.get('asr'),
2100 })
2101
2102 return new_dict
2103
2104 def _check_formats(formats):
2105 if not check_formats:
2106 yield from formats
2107 return
2108 yield from self._check_formats(formats)
2109
2110 def _build_selector_function(selector):
2111 if isinstance(selector, list): # ,
2112 fs = [_build_selector_function(s) for s in selector]
2113
2114 def selector_function(ctx):
2115 for f in fs:
2116 yield from f(ctx)
2117 return selector_function
2118
2119 elif selector.type == GROUP: # ()
2120 selector_function = _build_selector_function(selector.selector)
2121
2122 elif selector.type == PICKFIRST: # /
2123 fs = [_build_selector_function(s) for s in selector.selector]
2124
2125 def selector_function(ctx):
2126 for f in fs:
2127 picked_formats = list(f(ctx))
2128 if picked_formats:
2129 return picked_formats
2130 return []
2131
2132 elif selector.type == MERGE: # +
2133 selector_1, selector_2 = map(_build_selector_function, selector.selector)
2134
2135 def selector_function(ctx):
2136 for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
2137 yield _merge(pair)
2138
2139 elif selector.type == SINGLE: # atom
2140 format_spec = selector.selector or 'best'
2141
2142 # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
2143 if format_spec == 'all':
2144 def selector_function(ctx):
2145 yield from _check_formats(ctx['formats'][::-1])
2146 elif format_spec == 'mergeall':
2147 def selector_function(ctx):
2148 formats = list(_check_formats(ctx['formats']))
2149 if not formats:
2150 return
2151 merged_format = formats[-1]
2152 for f in formats[-2::-1]:
2153 merged_format = _merge((merged_format, f))
2154 yield merged_format
2155
2156 else:
2157 format_fallback, format_reverse, format_idx = False, True, 1
2158 mobj = re.match(
2159 r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
2160 format_spec)
2161 if mobj is not None:
2162 format_idx = int_or_none(mobj.group('n'), default=1)
2163 format_reverse = mobj.group('bw')[0] == 'b'
2164 format_type = (mobj.group('type') or [None])[0]
2165 not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
2166 format_modified = mobj.group('mod') is not None
2167
2168 format_fallback = not format_type and not format_modified # for b, w
2169 _filter_f = (
2170 (lambda f: f.get('%scodec' % format_type) != 'none')
2171 if format_type and format_modified # bv*, ba*, wv*, wa*
2172 else (lambda f: f.get('%scodec' % not_format_type) == 'none')
2173 if format_type # bv, ba, wv, wa
2174 else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
2175 if not format_modified # b, w
2176 else lambda f: True) # b*, w*
2177 filter_f = lambda f: _filter_f(f) and (
2178 f.get('vcodec') != 'none' or f.get('acodec') != 'none')
2179 else:
2180 if format_spec in self._format_selection_exts['audio']:
2181 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none'
2182 elif format_spec in self._format_selection_exts['video']:
2183 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none'
2184 elif format_spec in self._format_selection_exts['storyboards']:
2185 filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none'
2186 else:
2187 filter_f = lambda f: f.get('format_id') == format_spec # id
2188
2189 def selector_function(ctx):
2190 formats = list(ctx['formats'])
2191 matches = list(filter(filter_f, formats)) if filter_f is not None else formats
2192 if format_fallback and ctx['incomplete_formats'] and not matches:
2193 # for extractors with incomplete formats (audio only (soundcloud)
2194 # or video only (imgur)) best/worst will fallback to
2195 # best/worst {video,audio}-only format
2196 matches = formats
2197 matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1]))
2198 try:
2199 yield matches[format_idx - 1]
2200 except IndexError:
2201 return
2202
2203 filters = [self._build_format_filter(f) for f in selector.filters]
2204
2205 def final_selector(ctx):
2206 ctx_copy = dict(ctx)
2207 for _filter in filters:
2208 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
2209 return selector_function(ctx_copy)
2210 return final_selector
2211
2212 stream = io.BytesIO(format_spec.encode('utf-8'))
2213 try:
2214 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
2215 except tokenize.TokenError:
2216 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
2217
2218 class TokenIterator(object):
2219 def __init__(self, tokens):
2220 self.tokens = tokens
2221 self.counter = 0
2222
2223 def __iter__(self):
2224 return self
2225
2226 def __next__(self):
2227 if self.counter >= len(self.tokens):
2228 raise StopIteration()
2229 value = self.tokens[self.counter]
2230 self.counter += 1
2231 return value
2232
2233 next = __next__
2234
2235 def restore_last_token(self):
2236 self.counter -= 1
2237
2238 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
2239 return _build_selector_function(parsed_selector)
2240
2241 def _calc_headers(self, info_dict):
2242 res = std_headers.copy()
2243 res.update(info_dict.get('http_headers') or {})
2244
2245 cookies = self._calc_cookies(info_dict)
2246 if cookies:
2247 res['Cookie'] = cookies
2248
2249 if 'X-Forwarded-For' not in res:
2250 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
2251 if x_forwarded_for_ip:
2252 res['X-Forwarded-For'] = x_forwarded_for_ip
2253
2254 return res
2255
2256 def _calc_cookies(self, info_dict):
2257 pr = sanitized_Request(info_dict['url'])
2258 self.cookiejar.add_cookie_header(pr)
2259 return pr.get_header('Cookie')
2260
2261 def _sort_thumbnails(self, thumbnails):
2262 thumbnails.sort(key=lambda t: (
2263 t.get('preference') if t.get('preference') is not None else -1,
2264 t.get('width') if t.get('width') is not None else -1,
2265 t.get('height') if t.get('height') is not None else -1,
2266 t.get('id') if t.get('id') is not None else '',
2267 t.get('url')))
2268
2269 def _sanitize_thumbnails(self, info_dict):
2270 thumbnails = info_dict.get('thumbnails')
2271 if thumbnails is None:
2272 thumbnail = info_dict.get('thumbnail')
2273 if thumbnail:
2274 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
2275 if not thumbnails:
2276 return
2277
2278 def check_thumbnails(thumbnails):
2279 for t in thumbnails:
2280 self.to_screen(f'[info] Testing thumbnail {t["id"]}')
2281 try:
2282 self.urlopen(HEADRequest(t['url']))
2283 except network_exceptions as err:
2284 self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...')
2285 continue
2286 yield t
2287
2288 self._sort_thumbnails(thumbnails)
2289 for i, t in enumerate(thumbnails):
2290 if t.get('id') is None:
2291 t['id'] = '%d' % i
2292 if t.get('width') and t.get('height'):
2293 t['resolution'] = '%dx%d' % (t['width'], t['height'])
2294 t['url'] = sanitize_url(t['url'])
2295
2296 if self.params.get('check_formats') is True:
2297 info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
2298 else:
2299 info_dict['thumbnails'] = thumbnails
2300
2301 def process_video_result(self, info_dict, download=True):
2302 assert info_dict.get('_type', 'video') == 'video'
2303 self._num_videos += 1
2304
2305 if 'id' not in info_dict:
2306 raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor'])
2307 elif not info_dict.get('id'):
2308 raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor'])
2309
2310 info_dict['fulltitle'] = info_dict.get('title')
2311 if 'title' not in info_dict:
2312 raise ExtractorError('Missing "title" field in extractor result',
2313 video_id=info_dict['id'], ie=info_dict['extractor'])
2314 elif not info_dict.get('title'):
2315 self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
2316 info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
2317
2318 def report_force_conversion(field, field_not, conversion):
2319 self.report_warning(
2320 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
2321 % (field, field_not, conversion))
2322
2323 def sanitize_string_field(info, string_field):
2324 field = info.get(string_field)
2325 if field is None or isinstance(field, compat_str):
2326 return
2327 report_force_conversion(string_field, 'a string', 'string')
2328 info[string_field] = compat_str(field)
2329
2330 def sanitize_numeric_fields(info):
2331 for numeric_field in self._NUMERIC_FIELDS:
2332 field = info.get(numeric_field)
2333 if field is None or isinstance(field, compat_numeric_types):
2334 continue
2335 report_force_conversion(numeric_field, 'numeric', 'int')
2336 info[numeric_field] = int_or_none(field)
2337
2338 sanitize_string_field(info_dict, 'id')
2339 sanitize_numeric_fields(info_dict)
2340
2341 if 'playlist' not in info_dict:
2342 # It isn't part of a playlist
2343 info_dict['playlist'] = None
2344 info_dict['playlist_index'] = None
2345
2346 self._sanitize_thumbnails(info_dict)
2347
2348 thumbnail = info_dict.get('thumbnail')
2349 thumbnails = info_dict.get('thumbnails')
2350 if thumbnail:
2351 info_dict['thumbnail'] = sanitize_url(thumbnail)
2352 elif thumbnails:
2353 info_dict['thumbnail'] = thumbnails[-1]['url']
2354
2355 if info_dict.get('display_id') is None and 'id' in info_dict:
2356 info_dict['display_id'] = info_dict['id']
2357
2358 if info_dict.get('duration') is not None:
2359 info_dict['duration_string'] = formatSeconds(info_dict['duration'])
2360
2361 for ts_key, date_key in (
2362 ('timestamp', 'upload_date'),
2363 ('release_timestamp', 'release_date'),
2364 ('modified_timestamp', 'modified_date'),
2365 ):
2366 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
2367 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
2368 # see http://bugs.python.org/issue1646728)
2369 try:
2370 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
2371 info_dict[date_key] = upload_date.strftime('%Y%m%d')
2372 except (ValueError, OverflowError, OSError):
2373 pass
2374
2375 live_keys = ('is_live', 'was_live')
2376 live_status = info_dict.get('live_status')
2377 if live_status is None:
2378 for key in live_keys:
2379 if info_dict.get(key) is False:
2380 continue
2381 if info_dict.get(key):
2382 live_status = key
2383 break
2384 if all(info_dict.get(key) is False for key in live_keys):
2385 live_status = 'not_live'
2386 if live_status:
2387 info_dict['live_status'] = live_status
2388 for key in live_keys:
2389 if info_dict.get(key) is None:
2390 info_dict[key] = (live_status == key)
2391
2392 # Auto generate title fields corresponding to the *_number fields when missing
2393 # in order to always have clean titles. This is very common for TV series.
2394 for field in ('chapter', 'season', 'episode'):
2395 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
2396 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
2397
2398 for cc_kind in ('subtitles', 'automatic_captions'):
2399 cc = info_dict.get(cc_kind)
2400 if cc:
2401 for _, subtitle in cc.items():
2402 for subtitle_format in subtitle:
2403 if subtitle_format.get('url'):
2404 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
2405 if subtitle_format.get('ext') is None:
2406 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
2407
2408 automatic_captions = info_dict.get('automatic_captions')
2409 subtitles = info_dict.get('subtitles')
2410
2411 info_dict['requested_subtitles'] = self.process_subtitles(
2412 info_dict['id'], subtitles, automatic_captions)
2413
2414 if info_dict.get('formats') is None:
2415 # There's only one format available
2416 formats = [info_dict]
2417 else:
2418 formats = info_dict['formats']
2419
2420 info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
2421 if not self.params.get('allow_unplayable_formats'):
2422 formats = [f for f in formats if not f.get('has_drm')]
2423
2424 if info_dict.get('is_live'):
2425 get_from_start = bool(self.params.get('live_from_start'))
2426 formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
2427 if not get_from_start:
2428 info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
2429
2430 if not formats:
2431 self.raise_no_formats(info_dict)
2432
2433 def is_wellformed(f):
2434 url = f.get('url')
2435 if not url:
2436 self.report_warning(
2437 '"url" field is missing or empty - skipping format, '
2438 'there is an error in extractor')
2439 return False
2440 if isinstance(url, bytes):
2441 sanitize_string_field(f, 'url')
2442 return True
2443
2444 # Filter out malformed formats for better extraction robustness
2445 formats = list(filter(is_wellformed, formats))
2446
2447 formats_dict = {}
2448
2449 # We check that all the formats have the format and format_id fields
2450 for i, format in enumerate(formats):
2451 sanitize_string_field(format, 'format_id')
2452 sanitize_numeric_fields(format)
2453 format['url'] = sanitize_url(format['url'])
2454 if not format.get('format_id'):
2455 format['format_id'] = compat_str(i)
2456 else:
2457 # Sanitize format_id from characters used in format selector expression
2458 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
2459 format_id = format['format_id']
2460 if format_id not in formats_dict:
2461 formats_dict[format_id] = []
2462 formats_dict[format_id].append(format)
2463
2464 # Make sure all formats have unique format_id
2465 common_exts = set(itertools.chain(*self._format_selection_exts.values()))
2466 for format_id, ambiguous_formats in formats_dict.items():
2467 ambigious_id = len(ambiguous_formats) > 1
2468 for i, format in enumerate(ambiguous_formats):
2469 if ambigious_id:
2470 format['format_id'] = '%s-%d' % (format_id, i)
2471 if format.get('ext') is None:
2472 format['ext'] = determine_ext(format['url']).lower()
2473 # Ensure there is no conflict between id and ext in format selection
2474 # See https://github.com/yt-dlp/yt-dlp/issues/1282
2475 if format['format_id'] != format['ext'] and format['format_id'] in common_exts:
2476 format['format_id'] = 'f%s' % format['format_id']
2477
2478 for i, format in enumerate(formats):
2479 if format.get('format') is None:
2480 format['format'] = '{id} - {res}{note}'.format(
2481 id=format['format_id'],
2482 res=self.format_resolution(format),
2483 note=format_field(format, 'format_note', ' (%s)'),
2484 )
2485 if format.get('protocol') is None:
2486 format['protocol'] = determine_protocol(format)
2487 if format.get('resolution') is None:
2488 format['resolution'] = self.format_resolution(format, default=None)
2489 if format.get('dynamic_range') is None and format.get('vcodec') != 'none':
2490 format['dynamic_range'] = 'SDR'
2491 if (info_dict.get('duration') and format.get('tbr')
2492 and not format.get('filesize') and not format.get('filesize_approx')):
2493 format['filesize_approx'] = info_dict['duration'] * format['tbr'] * (1024 / 8)
2494
2495 # Add HTTP headers, so that external programs can use them from the
2496 # json output
2497 full_format_info = info_dict.copy()
2498 full_format_info.update(format)
2499 format['http_headers'] = self._calc_headers(full_format_info)
2500 # Remove private housekeeping stuff
2501 if '__x_forwarded_for_ip' in info_dict:
2502 del info_dict['__x_forwarded_for_ip']
2503
2504 # TODO Central sorting goes here
2505
2506 if self.params.get('check_formats') is True:
2507 formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
2508
2509 if not formats or formats[0] is not info_dict:
2510 # only set the 'formats' fields if the original info_dict list them
2511 # otherwise we end up with a circular reference, the first (and unique)
2512 # element in the 'formats' field in info_dict is info_dict itself,
2513 # which can't be exported to json
2514 info_dict['formats'] = formats
2515
2516 info_dict, _ = self.pre_process(info_dict)
2517
2518 # The pre-processors may have modified the formats
2519 formats = info_dict.get('formats', [info_dict])
2520
2521 list_only = self.params.get('simulate') is None and (
2522 self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
2523 interactive_format_selection = not list_only and self.format_selector == '-'
2524 if self.params.get('list_thumbnails'):
2525 self.list_thumbnails(info_dict)
2526 if self.params.get('listsubtitles'):
2527 if 'automatic_captions' in info_dict:
2528 self.list_subtitles(
2529 info_dict['id'], automatic_captions, 'automatic captions')
2530 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
2531 if self.params.get('listformats') or interactive_format_selection:
2532 self.list_formats(info_dict)
2533 if list_only:
2534 # Without this printing, -F --print-json will not work
2535 self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True)
2536 return
2537
2538 format_selector = self.format_selector
2539 if format_selector is None:
2540 req_format = self._default_format_spec(info_dict, download=download)
2541 self.write_debug('Default format spec: %s' % req_format)
2542 format_selector = self.build_format_selector(req_format)
2543
2544 while True:
2545 if interactive_format_selection:
2546 req_format = input(
2547 self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS))
2548 try:
2549 format_selector = self.build_format_selector(req_format)
2550 except SyntaxError as err:
2551 self.report_error(err, tb=False, is_error=False)
2552 continue
2553
2554 # While in format selection we may need to have an access to the original
2555 # format set in order to calculate some metrics or do some processing.
2556 # For now we need to be able to guess whether original formats provided
2557 # by extractor are incomplete or not (i.e. whether extractor provides only
2558 # video-only or audio-only formats) for proper formats selection for
2559 # extractors with such incomplete formats (see
2560 # https://github.com/ytdl-org/youtube-dl/pull/5556).
2561 # Since formats may be filtered during format selection and may not match
2562 # the original formats the results may be incorrect. Thus original formats
2563 # or pre-calculated metrics should be passed to format selection routines
2564 # as well.
2565 # We will pass a context object containing all necessary additional data
2566 # instead of just formats.
2567 # This fixes incorrect format selection issue (see
2568 # https://github.com/ytdl-org/youtube-dl/issues/10083).
2569 incomplete_formats = (
2570 # All formats are video-only or
2571 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
2572 # all formats are audio-only
2573 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
2574
2575 ctx = {
2576 'formats': formats,
2577 'incomplete_formats': incomplete_formats,
2578 }
2579
2580 formats_to_download = list(format_selector(ctx))
2581 if interactive_format_selection and not formats_to_download:
2582 self.report_error('Requested format is not available', tb=False, is_error=False)
2583 continue
2584 break
2585
2586 if not formats_to_download:
2587 if not self.params.get('ignore_no_formats_error'):
2588 raise ExtractorError('Requested format is not available', expected=True,
2589 video_id=info_dict['id'], ie=info_dict['extractor'])
2590 self.report_warning('Requested format is not available')
2591 # Process what we can, even without any available formats.
2592 formats_to_download = [{}]
2593
2594 best_format = formats_to_download[-1]
2595 if download:
2596 if best_format:
2597 self.to_screen(
2598 f'[info] {info_dict["id"]}: Downloading {len(formats_to_download)} format(s): '
2599 + ', '.join([f['format_id'] for f in formats_to_download]))
2600 max_downloads_reached = False
2601 for i, fmt in enumerate(formats_to_download):
2602 formats_to_download[i] = new_info = dict(info_dict)
2603 # Save a reference to the original info_dict so that it can be modified in process_info if needed
2604 new_info.update(fmt)
2605 new_info['__original_infodict'] = info_dict
2606 try:
2607 self.process_info(new_info)
2608 except MaxDownloadsReached:
2609 max_downloads_reached = True
2610 new_info.pop('__original_infodict')
2611 # Remove copied info
2612 for key, val in tuple(new_info.items()):
2613 if info_dict.get(key) == val:
2614 new_info.pop(key)
2615 if max_downloads_reached:
2616 break
2617
2618 write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download)
2619 assert write_archive.issubset({True, False, 'ignore'})
2620 if True in write_archive and False not in write_archive:
2621 self.record_download_archive(info_dict)
2622
2623 info_dict['requested_downloads'] = formats_to_download
2624 info_dict = self.run_all_pps('after_video', info_dict)
2625 if max_downloads_reached:
2626 raise MaxDownloadsReached()
2627
2628 # We update the info dict with the selected best quality format (backwards compatibility)
2629 info_dict.update(best_format)
2630 return info_dict
2631
2632 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
2633 """Select the requested subtitles and their format"""
2634 available_subs = {}
2635 if normal_subtitles and self.params.get('writesubtitles'):
2636 available_subs.update(normal_subtitles)
2637 if automatic_captions and self.params.get('writeautomaticsub'):
2638 for lang, cap_info in automatic_captions.items():
2639 if lang not in available_subs:
2640 available_subs[lang] = cap_info
2641
2642 if (not self.params.get('writesubtitles') and not
2643 self.params.get('writeautomaticsub') or not
2644 available_subs):
2645 return None
2646
2647 all_sub_langs = available_subs.keys()
2648 if self.params.get('allsubtitles', False):
2649 requested_langs = all_sub_langs
2650 elif self.params.get('subtitleslangs', False):
2651 # A list is used so that the order of languages will be the same as
2652 # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
2653 requested_langs = []
2654 for lang_re in self.params.get('subtitleslangs'):
2655 if lang_re == 'all':
2656 requested_langs.extend(all_sub_langs)
2657 continue
2658 discard = lang_re[0] == '-'
2659 if discard:
2660 lang_re = lang_re[1:]
2661 current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
2662 if discard:
2663 for lang in current_langs:
2664 while lang in requested_langs:
2665 requested_langs.remove(lang)
2666 else:
2667 requested_langs.extend(current_langs)
2668 requested_langs = orderedSet(requested_langs)
2669 elif 'en' in available_subs:
2670 requested_langs = ['en']
2671 else:
2672 requested_langs = [list(all_sub_langs)[0]]
2673 if requested_langs:
2674 self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
2675
2676 formats_query = self.params.get('subtitlesformat', 'best')
2677 formats_preference = formats_query.split('/') if formats_query else []
2678 subs = {}
2679 for lang in requested_langs:
2680 formats = available_subs.get(lang)
2681 if formats is None:
2682 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
2683 continue
2684 for ext in formats_preference:
2685 if ext == 'best':
2686 f = formats[-1]
2687 break
2688 matches = list(filter(lambda f: f['ext'] == ext, formats))
2689 if matches:
2690 f = matches[-1]
2691 break
2692 else:
2693 f = formats[-1]
2694 self.report_warning(
2695 'No subtitle format found matching "%s" for language %s, '
2696 'using %s' % (formats_query, lang, f['ext']))
2697 subs[lang] = f
2698 return subs
2699
2700 def _forceprint(self, key, info_dict):
2701 if info_dict is None:
2702 return
2703 info_copy = info_dict.copy()
2704 info_copy['formats_table'] = self.render_formats_table(info_dict)
2705 info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict)
2706 info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles'))
2707 info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
2708
2709 def format_tmpl(tmpl):
2710 mobj = re.match(r'\w+(=?)$', tmpl)
2711 if mobj and mobj.group(1):
2712 return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
2713 elif mobj:
2714 return f'%({tmpl})s'
2715 return tmpl
2716
2717 for tmpl in self.params['forceprint'].get(key, []):
2718 self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
2719
2720 for tmpl, file_tmpl in self.params['print_to_file'].get(key, []):
2721 filename = self.evaluate_outtmpl(file_tmpl, info_dict)
2722 tmpl = format_tmpl(tmpl)
2723 self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
2724 with io.open(filename, 'a', encoding='utf-8') as f:
2725 f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
2726
2727 def __forced_printings(self, info_dict, filename, incomplete):
2728 def print_mandatory(field, actual_field=None):
2729 if actual_field is None:
2730 actual_field = field
2731 if (self.params.get('force%s' % field, False)
2732 and (not incomplete or info_dict.get(actual_field) is not None)):
2733 self.to_stdout(info_dict[actual_field])
2734
2735 def print_optional(field):
2736 if (self.params.get('force%s' % field, False)
2737 and info_dict.get(field) is not None):
2738 self.to_stdout(info_dict[field])
2739
2740 info_dict = info_dict.copy()
2741 if filename is not None:
2742 info_dict['filename'] = filename
2743 if info_dict.get('requested_formats') is not None:
2744 # For RTMP URLs, also include the playpath
2745 info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
2746 elif 'url' in info_dict:
2747 info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
2748
2749 if (self.params.get('forcejson')
2750 or self.params['forceprint'].get('video')
2751 or self.params['print_to_file'].get('video')):
2752 self.post_extract(info_dict)
2753 self._forceprint('video', info_dict)
2754
2755 print_mandatory('title')
2756 print_mandatory('id')
2757 print_mandatory('url', 'urls')
2758 print_optional('thumbnail')
2759 print_optional('description')
2760 print_optional('filename')
2761 if self.params.get('forceduration') and info_dict.get('duration') is not None:
2762 self.to_stdout(formatSeconds(info_dict['duration']))
2763 print_mandatory('format')
2764
2765 if self.params.get('forcejson'):
2766 self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
2767
2768 def dl(self, name, info, subtitle=False, test=False):
2769 if not info.get('url'):
2770 self.raise_no_formats(info, True)
2771
2772 if test:
2773 verbose = self.params.get('verbose')
2774 params = {
2775 'test': True,
2776 'quiet': self.params.get('quiet') or not verbose,
2777 'verbose': verbose,
2778 'noprogress': not verbose,
2779 'nopart': True,
2780 'skip_unavailable_fragments': False,
2781 'keep_fragments': False,
2782 'overwrites': True,
2783 '_no_ytdl_file': True,
2784 }
2785 else:
2786 params = self.params
2787 fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params)
2788 if not test:
2789 for ph in self._progress_hooks:
2790 fd.add_progress_hook(ph)
2791 urls = '", "'.join(
2792 (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url'])
2793 for f in info.get('requested_formats', []) or [info])
2794 self.write_debug('Invoking downloader on "%s"' % urls)
2795
2796 # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
2797 # But it may contain objects that are not deep-copyable
2798 new_info = self._copy_infodict(info)
2799 if new_info.get('http_headers') is None:
2800 new_info['http_headers'] = self._calc_headers(new_info)
2801 return fd.download(name, new_info, subtitle)
2802
2803 def existing_file(self, filepaths, *, default_overwrite=True):
2804 existing_files = list(filter(os.path.exists, orderedSet(filepaths)))
2805 if existing_files and not self.params.get('overwrites', default_overwrite):
2806 return existing_files[0]
2807
2808 for file in existing_files:
2809 self.report_file_delete(file)
2810 os.remove(file)
2811 return None
2812
2813 def process_info(self, info_dict):
2814 """Process a single resolved IE result. (Modified it in-place)"""
2815
2816 assert info_dict.get('_type', 'video') == 'video'
2817 original_infodict = info_dict
2818
2819 if 'format' not in info_dict and 'ext' in info_dict:
2820 info_dict['format'] = info_dict['ext']
2821
2822 if self._match_entry(info_dict) is not None:
2823 info_dict['__write_download_archive'] = 'ignore'
2824 return
2825
2826 self.post_extract(info_dict)
2827 self._num_downloads += 1
2828
2829 # info_dict['_filename'] needs to be set for backward compatibility
2830 info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
2831 temp_filename = self.prepare_filename(info_dict, 'temp')
2832 files_to_move = {}
2833
2834 # Forced printings
2835 self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict))
2836
2837 if self.params.get('simulate'):
2838 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2839 return
2840
2841 if full_filename is None:
2842 return
2843 if not self._ensure_dir_exists(encodeFilename(full_filename)):
2844 return
2845 if not self._ensure_dir_exists(encodeFilename(temp_filename)):
2846 return
2847
2848 if self._write_description('video', info_dict,
2849 self.prepare_filename(info_dict, 'description')) is None:
2850 return
2851
2852 sub_files = self._write_subtitles(info_dict, temp_filename)
2853 if sub_files is None:
2854 return
2855 files_to_move.update(dict(sub_files))
2856
2857 thumb_files = self._write_thumbnails(
2858 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail'))
2859 if thumb_files is None:
2860 return
2861 files_to_move.update(dict(thumb_files))
2862
2863 infofn = self.prepare_filename(info_dict, 'infojson')
2864 _infojson_written = self._write_info_json('video', info_dict, infofn)
2865 if _infojson_written:
2866 info_dict['infojson_filename'] = infofn
2867 # For backward compatibility, even though it was a private field
2868 info_dict['__infojson_filename'] = infofn
2869 elif _infojson_written is None:
2870 return
2871
2872 # Note: Annotations are deprecated
2873 annofn = None
2874 if self.params.get('writeannotations', False):
2875 annofn = self.prepare_filename(info_dict, 'annotation')
2876 if annofn:
2877 if not self._ensure_dir_exists(encodeFilename(annofn)):
2878 return
2879 if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
2880 self.to_screen('[info] Video annotations are already present')
2881 elif not info_dict.get('annotations'):
2882 self.report_warning('There are no annotations to write.')
2883 else:
2884 try:
2885 self.to_screen('[info] Writing video annotations to: ' + annofn)
2886 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
2887 annofile.write(info_dict['annotations'])
2888 except (KeyError, TypeError):
2889 self.report_warning('There are no annotations to write.')
2890 except (OSError, IOError):
2891 self.report_error('Cannot write annotations file: ' + annofn)
2892 return
2893
2894 # Write internet shortcut files
2895 def _write_link_file(link_type):
2896 if 'webpage_url' not in info_dict:
2897 self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
2898 return False
2899 linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
2900 if not self._ensure_dir_exists(encodeFilename(linkfn)):
2901 return False
2902 if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
2903 self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
2904 return True
2905 try:
2906 self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
2907 with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
2908 newline='\r\n' if link_type == 'url' else '\n') as linkfile:
2909 template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
2910 if link_type == 'desktop':
2911 template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
2912 linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
2913 except (OSError, IOError):
2914 self.report_error(f'Cannot write internet shortcut {linkfn}')
2915 return False
2916 return True
2917
2918 write_links = {
2919 'url': self.params.get('writeurllink'),
2920 'webloc': self.params.get('writewebloclink'),
2921 'desktop': self.params.get('writedesktoplink'),
2922 }
2923 if self.params.get('writelink'):
2924 link_type = ('webloc' if sys.platform == 'darwin'
2925 else 'desktop' if sys.platform.startswith('linux')
2926 else 'url')
2927 write_links[link_type] = True
2928
2929 if any(should_write and not _write_link_file(link_type)
2930 for link_type, should_write in write_links.items()):
2931 return
2932
2933 def replace_info_dict(new_info):
2934 nonlocal info_dict
2935 if new_info == info_dict:
2936 return
2937 info_dict.clear()
2938 info_dict.update(new_info)
2939
2940 try:
2941 new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
2942 replace_info_dict(new_info)
2943 except PostProcessingError as err:
2944 self.report_error('Preprocessing: %s' % str(err))
2945 return
2946
2947 if self.params.get('skip_download'):
2948 info_dict['filepath'] = temp_filename
2949 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
2950 info_dict['__files_to_move'] = files_to_move
2951 replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict))
2952 info_dict['__write_download_archive'] = self.params.get('force_write_download_archive')
2953 else:
2954 # Download
2955 info_dict.setdefault('__postprocessors', [])
2956 try:
2957
2958 def existing_video_file(*filepaths):
2959 ext = info_dict.get('ext')
2960 converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext)
2961 file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)),
2962 default_overwrite=False)
2963 if file:
2964 info_dict['ext'] = os.path.splitext(file)[1][1:]
2965 return file
2966
2967 success = True
2968 if info_dict.get('requested_formats') is not None:
2969
2970 def compatible_formats(formats):
2971 # TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
2972 video_formats = [format for format in formats if format.get('vcodec') != 'none']
2973 audio_formats = [format for format in formats if format.get('acodec') != 'none']
2974 if len(video_formats) > 2 or len(audio_formats) > 2:
2975 return False
2976
2977 # Check extension
2978 exts = set(format.get('ext') for format in formats)
2979 COMPATIBLE_EXTS = (
2980 set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')),
2981 set(('webm',)),
2982 )
2983 for ext_sets in COMPATIBLE_EXTS:
2984 if ext_sets.issuperset(exts):
2985 return True
2986 # TODO: Check acodec/vcodec
2987 return False
2988
2989 requested_formats = info_dict['requested_formats']
2990 old_ext = info_dict['ext']
2991 if self.params.get('merge_output_format') is None:
2992 if not compatible_formats(requested_formats):
2993 info_dict['ext'] = 'mkv'
2994 self.report_warning(
2995 'Requested formats are incompatible for merge and will be merged into mkv')
2996 if (info_dict['ext'] == 'webm'
2997 and info_dict.get('thumbnails')
2998 # check with type instead of pp_key, __name__, or isinstance
2999 # since we dont want any custom PPs to trigger this
3000 and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])):
3001 info_dict['ext'] = 'mkv'
3002 self.report_warning(
3003 'webm doesn\'t support embedding a thumbnail, mkv will be used')
3004 new_ext = info_dict['ext']
3005
3006 def correct_ext(filename, ext=new_ext):
3007 if filename == '-':
3008 return filename
3009 filename_real_ext = os.path.splitext(filename)[1][1:]
3010 filename_wo_ext = (
3011 os.path.splitext(filename)[0]
3012 if filename_real_ext in (old_ext, new_ext)
3013 else filename)
3014 return '%s.%s' % (filename_wo_ext, ext)
3015
3016 # Ensure filename always has a correct extension for successful merge
3017 full_filename = correct_ext(full_filename)
3018 temp_filename = correct_ext(temp_filename)
3019 dl_filename = existing_video_file(full_filename, temp_filename)
3020 info_dict['__real_download'] = False
3021
3022 downloaded = []
3023 merger = FFmpegMergerPP(self)
3024
3025 fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
3026 if dl_filename is not None:
3027 self.report_file_already_downloaded(dl_filename)
3028 elif fd:
3029 for f in requested_formats if fd != FFmpegFD else []:
3030 f['filepath'] = fname = prepend_extension(
3031 correct_ext(temp_filename, info_dict['ext']),
3032 'f%s' % f['format_id'], info_dict['ext'])
3033 downloaded.append(fname)
3034 info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
3035 success, real_download = self.dl(temp_filename, info_dict)
3036 info_dict['__real_download'] = real_download
3037 else:
3038 if self.params.get('allow_unplayable_formats'):
3039 self.report_warning(
3040 'You have requested merging of multiple formats '
3041 'while also allowing unplayable formats to be downloaded. '
3042 'The formats won\'t be merged to prevent data corruption.')
3043 elif not merger.available:
3044 self.report_warning(
3045 'You have requested merging of multiple formats but ffmpeg is not installed. '
3046 'The formats won\'t be merged.')
3047
3048 if temp_filename == '-':
3049 reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
3050 else 'but the formats are incompatible for simultaneous download' if merger.available
3051 else 'but ffmpeg is not installed')
3052 self.report_warning(
3053 f'You have requested downloading multiple formats to stdout {reason}. '
3054 'The formats will be streamed one after the other')
3055 fname = temp_filename
3056 for f in requested_formats:
3057 new_info = dict(info_dict)
3058 del new_info['requested_formats']
3059 new_info.update(f)
3060 if temp_filename != '-':
3061 fname = prepend_extension(
3062 correct_ext(temp_filename, new_info['ext']),
3063 'f%s' % f['format_id'], new_info['ext'])
3064 if not self._ensure_dir_exists(fname):
3065 return
3066 f['filepath'] = fname
3067 downloaded.append(fname)
3068 partial_success, real_download = self.dl(fname, new_info)
3069 info_dict['__real_download'] = info_dict['__real_download'] or real_download
3070 success = success and partial_success
3071
3072 if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
3073 info_dict['__postprocessors'].append(merger)
3074 info_dict['__files_to_merge'] = downloaded
3075 # Even if there were no downloads, it is being merged only now
3076 info_dict['__real_download'] = True
3077 else:
3078 for file in downloaded:
3079 files_to_move[file] = None
3080 else:
3081 # Just a single file
3082 dl_filename = existing_video_file(full_filename, temp_filename)
3083 if dl_filename is None or dl_filename == temp_filename:
3084 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3085 # So we should try to resume the download
3086 success, real_download = self.dl(temp_filename, info_dict)
3087 info_dict['__real_download'] = real_download
3088 else:
3089 self.report_file_already_downloaded(dl_filename)
3090
3091 dl_filename = dl_filename or temp_filename
3092 info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
3093
3094 except network_exceptions as err:
3095 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
3096 return
3097 except (OSError, IOError) as err:
3098 raise UnavailableVideoError(err)
3099 except (ContentTooShortError, ) as err:
3100 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
3101 return
3102
3103 if success and full_filename != '-':
3104
3105 def fixup():
3106 do_fixup = True
3107 fixup_policy = self.params.get('fixup')
3108 vid = info_dict['id']
3109
3110 if fixup_policy in ('ignore', 'never'):
3111 return
3112 elif fixup_policy == 'warn':
3113 do_fixup = False
3114 elif fixup_policy != 'force':
3115 assert fixup_policy in ('detect_or_warn', None)
3116 if not info_dict.get('__real_download'):
3117 do_fixup = False
3118
3119 def ffmpeg_fixup(cndn, msg, cls):
3120 if not cndn:
3121 return
3122 if not do_fixup:
3123 self.report_warning(f'{vid}: {msg}')
3124 return
3125 pp = cls(self)
3126 if pp.available:
3127 info_dict['__postprocessors'].append(pp)
3128 else:
3129 self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically')
3130
3131 stretched_ratio = info_dict.get('stretched_ratio')
3132 ffmpeg_fixup(
3133 stretched_ratio not in (1, None),
3134 f'Non-uniform pixel ratio {stretched_ratio}',
3135 FFmpegFixupStretchedPP)
3136
3137 ffmpeg_fixup(
3138 (info_dict.get('requested_formats') is None
3139 and info_dict.get('container') == 'm4a_dash'
3140 and info_dict.get('ext') == 'm4a'),
3141 'writing DASH m4a. Only some players support this container',
3142 FFmpegFixupM4aPP)
3143
3144 downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
3145 downloader = downloader.__name__ if downloader else None
3146
3147 if info_dict.get('requested_formats') is None: # Not necessary if doing merger
3148 ffmpeg_fixup(downloader == 'HlsFD',
3149 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
3150 FFmpegFixupM3u8PP)
3151 ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
3152 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
3153
3154 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
3155 ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
3156
3157 fixup()
3158 try:
3159 replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move))
3160 except PostProcessingError as err:
3161 self.report_error('Postprocessing: %s' % str(err))
3162 return
3163 try:
3164 for ph in self._post_hooks:
3165 ph(info_dict['filepath'])
3166 except Exception as err:
3167 self.report_error('post hooks: %s' % str(err))
3168 return
3169 info_dict['__write_download_archive'] = True
3170
3171 if self.params.get('force_write_download_archive'):
3172 info_dict['__write_download_archive'] = True
3173
3174 # Make sure the info_dict was modified in-place
3175 assert info_dict is original_infodict
3176
3177 max_downloads = self.params.get('max_downloads')
3178 if max_downloads is not None and self._num_downloads >= int(max_downloads):
3179 raise MaxDownloadsReached()
3180
3181 def __download_wrapper(self, func):
3182 @functools.wraps(func)
3183 def wrapper(*args, **kwargs):
3184 try:
3185 res = func(*args, **kwargs)
3186 except UnavailableVideoError as e:
3187 self.report_error(e)
3188 except MaxDownloadsReached as e:
3189 self.to_screen(f'[info] {e}')
3190 raise
3191 except DownloadCancelled as e:
3192 self.to_screen(f'[info] {e}')
3193 if not self.params.get('break_per_url'):
3194 raise
3195 else:
3196 if self.params.get('dump_single_json', False):
3197 self.post_extract(res)
3198 self.to_stdout(json.dumps(self.sanitize_info(res)))
3199 return wrapper
3200
3201 def download(self, url_list):
3202 """Download a given list of URLs."""
3203 url_list = variadic(url_list) # Passing a single URL is a common mistake
3204 outtmpl = self.outtmpl_dict['default']
3205 if (len(url_list) > 1
3206 and outtmpl != '-'
3207 and '%' not in outtmpl
3208 and self.params.get('max_downloads') != 1):
3209 raise SameFileError(outtmpl)
3210
3211 for url in url_list:
3212 self.__download_wrapper(self.extract_info)(
3213 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3214
3215 return self._download_retcode
3216
3217 def download_with_info_file(self, info_filename):
3218 with contextlib.closing(fileinput.FileInput(
3219 [info_filename], mode='r',
3220 openhook=fileinput.hook_encoded('utf-8'))) as f:
3221 # FileInput doesn't have a read method, we can't call json.load
3222 info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
3223 try:
3224 self.__download_wrapper(self.process_ie_result)(info, download=True)
3225 except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
3226 if not isinstance(e, EntryNotInPlaylist):
3227 self.to_stderr('\r')
3228 webpage_url = info.get('webpage_url')
3229 if webpage_url is not None:
3230 self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
3231 return self.download([webpage_url])
3232 else:
3233 raise
3234 return self._download_retcode
3235
3236 @staticmethod
3237 def sanitize_info(info_dict, remove_private_keys=False):
3238 ''' Sanitize the infodict for converting to json '''
3239 if info_dict is None:
3240 return info_dict
3241 info_dict.setdefault('epoch', int(time.time()))
3242 info_dict.setdefault('_type', 'video')
3243 remove_keys = {'__original_infodict'} # Always remove this since this may contain a copy of the entire dict
3244 keep_keys = ['_type'] # Always keep this to facilitate load-info-json
3245 if remove_private_keys:
3246 remove_keys |= {
3247 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
3248 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
3249 }
3250 reject = lambda k, v: k not in keep_keys and (
3251 k.startswith('_') or k in remove_keys or v is None)
3252 else:
3253 reject = lambda k, v: k in remove_keys
3254
3255 def filter_fn(obj):
3256 if isinstance(obj, dict):
3257 return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
3258 elif isinstance(obj, (list, tuple, set, LazyList)):
3259 return list(map(filter_fn, obj))
3260 elif obj is None or isinstance(obj, (str, int, float, bool)):
3261 return obj
3262 else:
3263 return repr(obj)
3264
3265 return filter_fn(info_dict)
3266
3267 @staticmethod
3268 def filter_requested_info(info_dict, actually_filter=True):
3269 ''' Alias of sanitize_info for backward compatibility '''
3270 return YoutubeDL.sanitize_info(info_dict, actually_filter)
3271
3272 @staticmethod
3273 def post_extract(info_dict):
3274 def actual_post_extract(info_dict):
3275 if info_dict.get('_type') in ('playlist', 'multi_video'):
3276 for video_dict in info_dict.get('entries', {}):
3277 actual_post_extract(video_dict or {})
3278 return
3279
3280 post_extractor = info_dict.get('__post_extractor') or (lambda: {})
3281 extra = post_extractor().items()
3282 info_dict.update(extra)
3283 info_dict.pop('__post_extractor', None)
3284
3285 original_infodict = info_dict.get('__original_infodict') or {}
3286 original_infodict.update(extra)
3287 original_infodict.pop('__post_extractor', None)
3288
3289 actual_post_extract(info_dict or {})
3290
3291 def run_pp(self, pp, infodict):
3292 files_to_delete = []
3293 if '__files_to_move' not in infodict:
3294 infodict['__files_to_move'] = {}
3295 try:
3296 files_to_delete, infodict = pp.run(infodict)
3297 except PostProcessingError as e:
3298 # Must be True and not 'only_download'
3299 if self.params.get('ignoreerrors') is True:
3300 self.report_error(e)
3301 return infodict
3302 raise
3303
3304 if not files_to_delete:
3305 return infodict
3306 if self.params.get('keepvideo', False):
3307 for f in files_to_delete:
3308 infodict['__files_to_move'].setdefault(f, '')
3309 else:
3310 for old_filename in set(files_to_delete):
3311 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
3312 try:
3313 os.remove(encodeFilename(old_filename))
3314 except (IOError, OSError):
3315 self.report_warning('Unable to remove downloaded original file')
3316 if old_filename in infodict['__files_to_move']:
3317 del infodict['__files_to_move'][old_filename]
3318 return infodict
3319
3320 def run_all_pps(self, key, info, *, additional_pps=None):
3321 self._forceprint(key, info)
3322 for pp in (additional_pps or []) + self._pps[key]:
3323 info = self.run_pp(pp, info)
3324 return info
3325
3326 def pre_process(self, ie_info, key='pre_process', files_to_move=None):
3327 info = dict(ie_info)
3328 info['__files_to_move'] = files_to_move or {}
3329 info = self.run_all_pps(key, info)
3330 return info, info.pop('__files_to_move', None)
3331
3332 def post_process(self, filename, info, files_to_move=None):
3333 """Run all the postprocessors on the given file."""
3334 info['filepath'] = filename
3335 info['__files_to_move'] = files_to_move or {}
3336 info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors'))
3337 info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
3338 del info['__files_to_move']
3339 return self.run_all_pps('after_move', info)
3340
3341 def _make_archive_id(self, info_dict):
3342 video_id = info_dict.get('id')
3343 if not video_id:
3344 return
3345 # Future-proof against any change in case
3346 # and backwards compatibility with prior versions
3347 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
3348 if extractor is None:
3349 url = str_or_none(info_dict.get('url'))
3350 if not url:
3351 return
3352 # Try to find matching extractor for the URL and take its ie_key
3353 for ie_key, ie in self._ies.items():
3354 if ie.suitable(url):
3355 extractor = ie_key
3356 break
3357 else:
3358 return
3359 return '%s %s' % (extractor.lower(), video_id)
3360
3361 def in_download_archive(self, info_dict):
3362 fn = self.params.get('download_archive')
3363 if fn is None:
3364 return False
3365
3366 vid_id = self._make_archive_id(info_dict)
3367 if not vid_id:
3368 return False # Incomplete video information
3369
3370 return vid_id in self.archive
3371
3372 def record_download_archive(self, info_dict):
3373 fn = self.params.get('download_archive')
3374 if fn is None:
3375 return
3376 vid_id = self._make_archive_id(info_dict)
3377 assert vid_id
3378 self.write_debug(f'Adding to archive: {vid_id}')
3379 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
3380 archive_file.write(vid_id + '\n')
3381 self.archive.add(vid_id)
3382
3383 @staticmethod
3384 def format_resolution(format, default='unknown'):
3385 if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
3386 return 'audio only'
3387 if format.get('resolution') is not None:
3388 return format['resolution']
3389 if format.get('width') and format.get('height'):
3390 return '%dx%d' % (format['width'], format['height'])
3391 elif format.get('height'):
3392 return '%sp' % format['height']
3393 elif format.get('width'):
3394 return '%dx?' % format['width']
3395 return default
3396
3397 def _list_format_headers(self, *headers):
3398 if self.params.get('listformats_table', True) is not False:
3399 return [self._format_screen(header, self.Styles.HEADERS) for header in headers]
3400 return headers
3401
3402 def _format_note(self, fdict):
3403 res = ''
3404 if fdict.get('ext') in ['f4f', 'f4m']:
3405 res += '(unsupported)'
3406 if fdict.get('language'):
3407 if res:
3408 res += ' '
3409 res += '[%s]' % fdict['language']
3410 if fdict.get('format_note') is not None:
3411 if res:
3412 res += ' '
3413 res += fdict['format_note']
3414 if fdict.get('tbr') is not None:
3415 if res:
3416 res += ', '
3417 res += '%4dk' % fdict['tbr']
3418 if fdict.get('container') is not None:
3419 if res:
3420 res += ', '
3421 res += '%s container' % fdict['container']
3422 if (fdict.get('vcodec') is not None
3423 and fdict.get('vcodec') != 'none'):
3424 if res:
3425 res += ', '
3426 res += fdict['vcodec']
3427 if fdict.get('vbr') is not None:
3428 res += '@'
3429 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
3430 res += 'video@'
3431 if fdict.get('vbr') is not None:
3432 res += '%4dk' % fdict['vbr']
3433 if fdict.get('fps') is not None:
3434 if res:
3435 res += ', '
3436 res += '%sfps' % fdict['fps']
3437 if fdict.get('acodec') is not None:
3438 if res:
3439 res += ', '
3440 if fdict['acodec'] == 'none':
3441 res += 'video only'
3442 else:
3443 res += '%-5s' % fdict['acodec']
3444 elif fdict.get('abr') is not None:
3445 if res:
3446 res += ', '
3447 res += 'audio'
3448 if fdict.get('abr') is not None:
3449 res += '@%3dk' % fdict['abr']
3450 if fdict.get('asr') is not None:
3451 res += ' (%5dHz)' % fdict['asr']
3452 if fdict.get('filesize') is not None:
3453 if res:
3454 res += ', '
3455 res += format_bytes(fdict['filesize'])
3456 elif fdict.get('filesize_approx') is not None:
3457 if res:
3458 res += ', '
3459 res += '~' + format_bytes(fdict['filesize_approx'])
3460 return res
3461
3462 def render_formats_table(self, info_dict):
3463 if not info_dict.get('formats') and not info_dict.get('url'):
3464 return None
3465
3466 formats = info_dict.get('formats', [info_dict])
3467 if not self.params.get('listformats_table', True) is not False:
3468 table = [
3469 [
3470 format_field(f, 'format_id'),
3471 format_field(f, 'ext'),
3472 self.format_resolution(f),
3473 self._format_note(f)
3474 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3475 return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1)
3476
3477 delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
3478 table = [
3479 [
3480 self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
3481 format_field(f, 'ext'),
3482 format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
3483 format_field(f, 'fps', '\t%d'),
3484 format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
3485 delim,
3486 format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
3487 format_field(f, 'tbr', '\t%dk'),
3488 shorten_protocol_name(f.get('protocol', '')),
3489 delim,
3490 format_field(f, 'vcodec', default='unknown').replace(
3491 'none', 'images' if f.get('acodec') == 'none'
3492 else self._format_screen('audio only', self.Styles.SUPPRESS)),
3493 format_field(f, 'vbr', '\t%dk'),
3494 format_field(f, 'acodec', default='unknown').replace(
3495 'none', '' if f.get('vcodec') == 'none'
3496 else self._format_screen('video only', self.Styles.SUPPRESS)),
3497 format_field(f, 'abr', '\t%dk'),
3498 format_field(f, 'asr', '\t%dHz'),
3499 join_nonempty(
3500 self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
3501 format_field(f, 'language', '[%s]'),
3502 join_nonempty(format_field(f, 'format_note'),
3503 format_field(f, 'container', ignore=(None, f.get('ext'))),
3504 delim=', '),
3505 delim=' '),
3506 ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
3507 header_line = self._list_format_headers(
3508 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
3509 delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
3510
3511 return render_table(
3512 header_line, table, hide_empty=True,
3513 delim=self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))
3514
3515 def render_thumbnails_table(self, info_dict):
3516 thumbnails = list(info_dict.get('thumbnails') or [])
3517 if not thumbnails:
3518 return None
3519 return render_table(
3520 self._list_format_headers('ID', 'Width', 'Height', 'URL'),
3521 [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
3522
3523 def render_subtitles_table(self, video_id, subtitles):
3524 def _row(lang, formats):
3525 exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats)))
3526 if len(set(names)) == 1:
3527 names = [] if names[0] == 'unknown' else names[:1]
3528 return [lang, ', '.join(names), ', '.join(exts)]
3529
3530 if not subtitles:
3531 return None
3532 return render_table(
3533 self._list_format_headers('Language', 'Name', 'Formats'),
3534 [_row(lang, formats) for lang, formats in subtitles.items()],
3535 hide_empty=True)
3536
3537 def __list_table(self, video_id, name, func, *args):
3538 table = func(*args)
3539 if not table:
3540 self.to_screen(f'{video_id} has no {name}')
3541 return
3542 self.to_screen(f'[info] Available {name} for {video_id}:')
3543 self.to_stdout(table)
3544
3545 def list_formats(self, info_dict):
3546 self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict)
3547
3548 def list_thumbnails(self, info_dict):
3549 self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict)
3550
3551 def list_subtitles(self, video_id, subtitles, name='subtitles'):
3552 self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles)
3553
3554 def urlopen(self, req):
3555 """ Start an HTTP download """
3556 if isinstance(req, compat_basestring):
3557 req = sanitized_Request(req)
3558 return self._opener.open(req, timeout=self._socket_timeout)
3559
3560 def print_debug_header(self):
3561 if not self.params.get('verbose'):
3562 return
3563
3564 def get_encoding(stream):
3565 ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
3566 if not supports_terminal_sequences(stream):
3567 from .compat import WINDOWS_VT_MODE
3568 ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
3569 return ret
3570
3571 encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
3572 locale.getpreferredencoding(),
3573 sys.getfilesystemencoding(),
3574 get_encoding(self._screen_file), get_encoding(self._err_file),
3575 self.get_encoding())
3576
3577 logger = self.params.get('logger')
3578 if logger:
3579 write_debug = lambda msg: logger.debug(f'[debug] {msg}')
3580 write_debug(encoding_str)
3581 else:
3582 write_string(f'[debug] {encoding_str}\n', encoding=None)
3583 write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
3584
3585 source = detect_variant()
3586 write_debug(join_nonempty(
3587 'yt-dlp version', __version__,
3588 f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
3589 '' if source == 'unknown' else f'({source})',
3590 delim=' '))
3591 if not _LAZY_LOADER:
3592 if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
3593 write_debug('Lazy loading extractors is forcibly disabled')
3594 else:
3595 write_debug('Lazy loading extractors is disabled')
3596 if plugin_extractors or plugin_postprocessors:
3597 write_debug('Plugins: %s' % [
3598 '%s%s' % (klass.__name__, '' if klass.__name__ == name else f' as {name}')
3599 for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
3600 if self.params.get('compat_opts'):
3601 write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
3602
3603 if source == 'source':
3604 try:
3605 sp = Popen(
3606 ['git', 'rev-parse', '--short', 'HEAD'],
3607 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
3608 cwd=os.path.dirname(os.path.abspath(__file__)))
3609 out, err = sp.communicate_or_kill()
3610 out = out.decode().strip()
3611 if re.match('[0-9a-f]+', out):
3612 write_debug('Git HEAD: %s' % out)
3613 except Exception:
3614 try:
3615 sys.exc_clear()
3616 except Exception:
3617 pass
3618
3619 def python_implementation():
3620 impl_name = platform.python_implementation()
3621 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
3622 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
3623 return impl_name
3624
3625 write_debug('Python version %s (%s %s) - %s' % (
3626 platform.python_version(),
3627 python_implementation(),
3628 platform.architecture()[0],
3629 platform_name()))
3630
3631 exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
3632 ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
3633 if ffmpeg_features:
3634 exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
3635
3636 exe_versions['rtmpdump'] = rtmpdump_version()
3637 exe_versions['phantomjs'] = PhantomJSwrapper._version()
3638 exe_str = ', '.join(
3639 f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v
3640 ) or 'none'
3641 write_debug('exe versions: %s' % exe_str)
3642
3643 from .downloader.websocket import has_websockets
3644 from .postprocessor.embedthumbnail import has_mutagen
3645 from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
3646
3647 lib_str = join_nonempty(
3648 compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
3649 SECRETSTORAGE_AVAILABLE and 'secretstorage',
3650 has_mutagen and 'mutagen',
3651 SQLITE_AVAILABLE and 'sqlite',
3652 has_websockets and 'websockets',
3653 delim=', ') or 'none'
3654 write_debug('Optional libraries: %s' % lib_str)
3655
3656 proxy_map = {}
3657 for handler in self._opener.handlers:
3658 if hasattr(handler, 'proxies'):
3659 proxy_map.update(handler.proxies)
3660 write_debug(f'Proxy map: {proxy_map}')
3661
3662 # Not implemented
3663 if False and self.params.get('call_home'):
3664 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
3665 write_debug('Public IP address: %s' % ipaddr)
3666 latest_version = self.urlopen(
3667 'https://yt-dl.org/latest/version').read().decode('utf-8')
3668 if version_tuple(latest_version) > version_tuple(__version__):
3669 self.report_warning(
3670 'You are using an outdated version (newest version: %s)! '
3671 'See https://yt-dl.org/update if you need help updating.' %
3672 latest_version)
3673
3674 def _setup_opener(self):
3675 timeout_val = self.params.get('socket_timeout')
3676 self._socket_timeout = 20 if timeout_val is None else float(timeout_val)
3677
3678 opts_cookiesfrombrowser = self.params.get('cookiesfrombrowser')
3679 opts_cookiefile = self.params.get('cookiefile')
3680 opts_proxy = self.params.get('proxy')
3681
3682 self.cookiejar = load_cookies(opts_cookiefile, opts_cookiesfrombrowser, self)
3683
3684 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
3685 if opts_proxy is not None:
3686 if opts_proxy == '':
3687 proxies = {}
3688 else:
3689 proxies = {'http': opts_proxy, 'https': opts_proxy}
3690 else:
3691 proxies = compat_urllib_request.getproxies()
3692 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
3693 if 'http' in proxies and 'https' not in proxies:
3694 proxies['https'] = proxies['http']
3695 proxy_handler = PerRequestProxyHandler(proxies)
3696
3697 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
3698 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
3699 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
3700 redirect_handler = YoutubeDLRedirectHandler()
3701 data_handler = compat_urllib_request_DataHandler()
3702
3703 # When passing our own FileHandler instance, build_opener won't add the
3704 # default FileHandler and allows us to disable the file protocol, which
3705 # can be used for malicious purposes (see
3706 # https://github.com/ytdl-org/youtube-dl/issues/8227)
3707 file_handler = compat_urllib_request.FileHandler()
3708
3709 def file_open(*args, **kwargs):
3710 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in yt-dlp for security reasons')
3711 file_handler.file_open = file_open
3712
3713 opener = compat_urllib_request.build_opener(
3714 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
3715
3716 # Delete the default user-agent header, which would otherwise apply in
3717 # cases where our custom HTTP handler doesn't come into play
3718 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
3719 opener.addheaders = []
3720 self._opener = opener
3721
3722 def encode(self, s):
3723 if isinstance(s, bytes):
3724 return s # Already encoded
3725
3726 try:
3727 return s.encode(self.get_encoding())
3728 except UnicodeEncodeError as err:
3729 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
3730 raise
3731
3732 def get_encoding(self):
3733 encoding = self.params.get('encoding')
3734 if encoding is None:
3735 encoding = preferredencoding()
3736 return encoding
3737
3738 def _write_info_json(self, label, ie_result, infofn, overwrite=None):
3739 ''' Write infojson and returns True = written, False = skip, None = error '''
3740 if overwrite is None:
3741 overwrite = self.params.get('overwrites', True)
3742 if not self.params.get('writeinfojson'):
3743 return False
3744 elif not infofn:
3745 self.write_debug(f'Skipping writing {label} infojson')
3746 return False
3747 elif not self._ensure_dir_exists(infofn):
3748 return None
3749 elif not overwrite and os.path.exists(infofn):
3750 self.to_screen(f'[info] {label.title()} metadata is already present')
3751 else:
3752 self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
3753 try:
3754 write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
3755 except (OSError, IOError):
3756 self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
3757 return None
3758 return True
3759
3760 def _write_description(self, label, ie_result, descfn):
3761 ''' Write description and returns True = written, False = skip, None = error '''
3762 if not self.params.get('writedescription'):
3763 return False
3764 elif not descfn:
3765 self.write_debug(f'Skipping writing {label} description')
3766 return False
3767 elif not self._ensure_dir_exists(descfn):
3768 return None
3769 elif not self.params.get('overwrites', True) and os.path.exists(descfn):
3770 self.to_screen(f'[info] {label.title()} description is already present')
3771 elif ie_result.get('description') is None:
3772 self.report_warning(f'There\'s no {label} description to write')
3773 return False
3774 else:
3775 try:
3776 self.to_screen(f'[info] Writing {label} description to: {descfn}')
3777 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
3778 descfile.write(ie_result['description'])
3779 except (OSError, IOError):
3780 self.report_error(f'Cannot write {label} description file {descfn}')
3781 return None
3782 return True
3783
3784 def _write_subtitles(self, info_dict, filename):
3785 ''' Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error'''
3786 ret = []
3787 subtitles = info_dict.get('requested_subtitles')
3788 if not subtitles or not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')):
3789 # subtitles download errors are already managed as troubles in relevant IE
3790 # that way it will silently go on when used with unsupporting IE
3791 return ret
3792
3793 sub_filename_base = self.prepare_filename(info_dict, 'subtitle')
3794 if not sub_filename_base:
3795 self.to_screen('[info] Skipping writing video subtitles')
3796 return ret
3797 for sub_lang, sub_info in subtitles.items():
3798 sub_format = sub_info['ext']
3799 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
3800 sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext'))
3801 existing_sub = self.existing_file((sub_filename_final, sub_filename))
3802 if existing_sub:
3803 self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present')
3804 sub_info['filepath'] = existing_sub
3805 ret.append((existing_sub, sub_filename_final))
3806 continue
3807
3808 self.to_screen(f'[info] Writing video subtitles to: {sub_filename}')
3809 if sub_info.get('data') is not None:
3810 try:
3811 # Use newline='' to prevent conversion of newline characters
3812 # See https://github.com/ytdl-org/youtube-dl/issues/10268
3813 with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile:
3814 subfile.write(sub_info['data'])
3815 sub_info['filepath'] = sub_filename
3816 ret.append((sub_filename, sub_filename_final))
3817 continue
3818 except (OSError, IOError):
3819 self.report_error(f'Cannot write video subtitles file {sub_filename}')
3820 return None
3821
3822 try:
3823 sub_copy = sub_info.copy()
3824 sub_copy.setdefault('http_headers', info_dict.get('http_headers'))
3825 self.dl(sub_filename, sub_copy, subtitle=True)
3826 sub_info['filepath'] = sub_filename
3827 ret.append((sub_filename, sub_filename_final))
3828 except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
3829 if self.params.get('ignoreerrors') is not True: # False or 'only_download'
3830 raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
3831 self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
3832 return ret
3833
3834 def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
3835 ''' Write thumbnails to file and return list of (thumb_filename, final_thumb_filename) '''
3836 write_all = self.params.get('write_all_thumbnails', False)
3837 thumbnails, ret = [], []
3838 if write_all or self.params.get('writethumbnail', False):
3839 thumbnails = info_dict.get('thumbnails') or []
3840 multiple = write_all and len(thumbnails) > 1
3841
3842 if thumb_filename_base is None:
3843 thumb_filename_base = filename
3844 if thumbnails and not thumb_filename_base:
3845 self.write_debug(f'Skipping writing {label} thumbnail')
3846 return ret
3847
3848 for idx, t in list(enumerate(thumbnails))[::-1]:
3849 thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
3850 thumb_display_id = f'{label} thumbnail {t["id"]}'
3851 thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
3852 thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
3853
3854 existing_thumb = self.existing_file((thumb_filename_final, thumb_filename))
3855 if existing_thumb:
3856 self.to_screen('[info] %s is already present' % (
3857 thumb_display_id if multiple else f'{label} thumbnail').capitalize())
3858 t['filepath'] = existing_thumb
3859 ret.append((existing_thumb, thumb_filename_final))
3860 else:
3861 self.to_screen(f'[info] Downloading {thumb_display_id} ...')
3862 try:
3863 uf = self.urlopen(t['url'])
3864 self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}')
3865 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
3866 shutil.copyfileobj(uf, thumbf)
3867 ret.append((thumb_filename, thumb_filename_final))
3868 t['filepath'] = thumb_filename
3869 except network_exceptions as err:
3870 thumbnails.pop(idx)
3871 self.report_warning(f'Unable to download {thumb_display_id}: {err}')
3872 if ret and not write_all:
3873 break
3874 return ret