Add support for SSL client certificate authentication (#3435)

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 56f0346dccc3a1291561404f9e6d330e8555a17a..1766ff37996c500474591dbeba1889833a59d8eb 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -12,6 +12,7 @@
  import operator
  import os
  import platform
+import random
  import re
  import shutil
  import subprocess
@@ -20,18 +21,14 @@
  import time
  import tokenize
  import traceback
-import random
  import unicodedata
  import urllib.request
-
-from enum import Enum
  from string import ascii_letters
  
+from .cache import Cache
  from .compat import (
-    compat_brotli,
      compat_get_terminal_size,
      compat_os_name,
-    compat_pycrypto_AES,
      compat_shlex_quote,
      compat_str,
      compat_urllib_error,
@@ -39,74 +36,101 @@
      windows_enable_vt_mode,
  )
  from .cookies import load_cookies
+from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name
+from .downloader.rtmp import rtmpdump_version
+from .extractor import _LAZY_LOADER
+from .extractor import _PLUGIN_CLASSES as plugin_extractors
+from .extractor import gen_extractor_classes, get_info_extractor
+from .extractor.openload import PhantomJSwrapper
+from .minicurses import format_text
+from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors
+from .postprocessor import (
+    EmbedThumbnailPP,
+    FFmpegFixupDuplicateMoovPP,
+    FFmpegFixupDurationPP,
+    FFmpegFixupM3u8PP,
+    FFmpegFixupM4aPP,
+    FFmpegFixupStretchedPP,
+    FFmpegFixupTimestampPP,
+    FFmpegMergerPP,
+    FFmpegPostProcessor,
+    MoveFilesAfterDownloadPP,
+    get_postprocessor,
+)
+from .update import detect_variant
  from .utils import (
+    DEFAULT_OUTTMPL,
+    LINK_TEMPLATES,
+    NO_DEFAULT,
+    NUMBER_RE,
+    OUTTMPL_TYPES,
+    POSTPROCESS_WHEN,
+    STR_FORMAT_RE_TMPL,
+    STR_FORMAT_TYPES,
+    ContentTooShortError,
+    DateRange,
+    DownloadCancelled,
+    DownloadError,
+    EntryNotInPlaylist,
+    ExistingVideoReached,
+    ExtractorError,
+    GeoRestrictedError,
+    HEADRequest,
+    InAdvancePagedList,
+    ISO3166Utils,
+    LazyList,
+    MaxDownloadsReached,
+    Namespace,
+    PagedList,
+    PerRequestProxyHandler,
+    Popen,
+    PostProcessingError,
+    ReExtractInfo,
+    RejectedVideoReached,
+    SameFileError,
+    UnavailableVideoError,
+    YoutubeDLCookieProcessor,
+    YoutubeDLHandler,
+    YoutubeDLRedirectHandler,
      age_restricted,
      args_to_str,
-    ContentTooShortError,
      date_from_str,
-    DateRange,
-    DEFAULT_OUTTMPL,
      determine_ext,
      determine_protocol,
-    DownloadCancelled,
-    DownloadError,
      encode_compat_str,
      encodeFilename,
-    EntryNotInPlaylist,
      error_to_compat_str,
-    ExistingVideoReached,
      expand_path,
-    ExtractorError,
      filter_dict,
      float_or_none,
      format_bytes,
-    format_field,
      format_decimal_suffix,
+    format_field,
      formatSeconds,
-    GeoRestrictedError,
      get_domain,
-    has_certifi,
-    HEADRequest,
-    InAdvancePagedList,
      int_or_none,
      iri_to_uri,
-    ISO3166Utils,
      join_nonempty,
-    LazyList,
-    LINK_TEMPLATES,
      locked_file,
      make_dir,
      make_HTTPS_handler,
-    MaxDownloadsReached,
      merge_headers,
      network_exceptions,
-    NO_DEFAULT,
      number_of_digits,
      orderedSet,
-    OUTTMPL_TYPES,
-    PagedList,
      parse_filesize,
-    PerRequestProxyHandler,
      platform_name,
-    Popen,
-    POSTPROCESS_WHEN,
-    PostProcessingError,
      preferredencoding,
      prepend_extension,
-    ReExtractInfo,
      register_socks_protocols,
-    RejectedVideoReached,
      remove_terminal_sequences,
      render_table,
      replace_extension,
-    SameFileError,
      sanitize_filename,
      sanitize_path,
      sanitize_url,
      sanitized_Request,
      std_headers,
-    STR_FORMAT_RE_TMPL,
-    STR_FORMAT_TYPES,
      str_or_none,
      strftime_or_none,
      subtitles_filename,
@@ -115,47 +139,13 @@
      to_high_limit_path,
      traverse_obj,
      try_get,
-    UnavailableVideoError,
      url_basename,
      variadic,
      version_tuple,
      write_json_file,
      write_string,
-    YoutubeDLCookieProcessor,
-    YoutubeDLHandler,
-    YoutubeDLRedirectHandler,
  )
-from .cache import Cache
-from .minicurses import format_text
-from .extractor import (
-    gen_extractor_classes,
-    get_info_extractor,
-    _LAZY_LOADER,
-    _PLUGIN_CLASSES as plugin_extractors
-)
-from .extractor.openload import PhantomJSwrapper
-from .downloader import (
-    FFmpegFD,
-    get_suitable_downloader,
-    shorten_protocol_name
-)
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
-    get_postprocessor,
-    EmbedThumbnailPP,
-    FFmpegFixupDuplicateMoovPP,
-    FFmpegFixupDurationPP,
-    FFmpegFixupM3u8PP,
-    FFmpegFixupM4aPP,
-    FFmpegFixupStretchedPP,
-    FFmpegFixupTimestampPP,
-    FFmpegMergerPP,
-    FFmpegPostProcessor,
-    MoveFilesAfterDownloadPP,
-    _PLUGIN_CLASSES as plugin_postprocessors
-)
-from .update import detect_variant
-from .version import __version__, RELEASE_GIT_HEAD
+from .version import RELEASE_GIT_HEAD, __version__
  
  if compat_os_name == 'nt':
      import ctypes
@@ -329,6 +319,10 @@ class YoutubeDL:
      legacyserverconnect: Explicitly allow HTTPS connection to servers that do not
                         support RFC 5746 secure renegotiation
      nocheckcertificate:  Do not verify SSL certificates
+    client_certificate:  Path to client certificate file in PEM format. May include the private key
+    client_certificate_key:  Path to private key file for client certificate
+    client_certificate_password:  Password for client certificate private key, if encrypted.
+                        If not provided and the key is encrypted, yt-dlp will ask interactively
      prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
                         At the moment, this is only supported by YouTube.
      http_headers:      A dictionary of custom headers to be used for all requests
@@ -420,10 +414,14 @@ class YoutubeDL:
      sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
      listformats:       Print an overview of available video formats and exit.
      list_thumbnails:   Print a table of all thumbnails and exit.
-    match_filter:      A function that gets called with the info_dict of
-                       every video.
-                       If it returns a message, the video is ignored.
-                       If it returns None, the video is downloaded.
+    match_filter:      A function that gets called for every video with the signature
+                       (info_dict, *, incomplete: bool) -> Optional[str]
+                       For backward compatibility with youtube-dl, the signature
+                       (info_dict) -> Optional[str] is also allowed.
+                       - If it returns a message, the video is ignored.
+                       - If it returns None, the video is downloaded.
+                       - If it returns utils.NO_DEFAULT, the user is interactively
+                         asked whether to download the video.
                         match_filter_func in utils.py is one example for this.
      no_color:          Do not emit color codes in output.
      geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
@@ -886,16 +884,19 @@ def trouble(self, message=None, tb=None, is_error=True):
              raise DownloadError(message, exc_info)
          self._download_retcode = 1
  
-    class Styles(Enum):
-        HEADERS = 'yellow'
-        EMPHASIS = 'light blue'
-        ID = 'green'
-        DELIM = 'blue'
-        ERROR = 'red'
-        WARNING = 'yellow'
-        SUPPRESS = 'light black'
+    Styles = Namespace(
+        HEADERS='yellow',
+        EMPHASIS='light blue',
+        FILENAME='green',
+        ID='green',
+        DELIM='blue',
+        ERROR='red',
+        WARNING='yellow',
+        SUPPRESS='light black',
+    )
  
      def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
+        text = str(text)
          if test_encoding:
              original_text = text
              # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
@@ -903,8 +904,6 @@ def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_enc
              text = text.encode(encoding, 'ignore').decode(encoding)
              if fallback is not None and text != original_text:
                  text = fallback
-        if isinstance(f, self.Styles):
-            f = f.value
          return format_text(text, f) if allow_colors else text if fallback is None else fallback
  
      def _format_screen(self, *args, **kwargs):
@@ -965,7 +964,7 @@ def report_file_delete(self, file_name):
              self.to_screen('Deleting existing file')
  
      def raise_no_formats(self, info, forced=False, *, msg=None):
-        has_drm = info.get('__has_drm')
+        has_drm = info.get('_has_drm')
          ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg)
          msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!'
          if forced or not ignored:
@@ -1055,7 +1054,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
              formatSeconds(info_dict['duration'], '-' if sanitize else ':')
              if info_dict.get('duration', None) is not None
              else None)
-        info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+        info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads)
          info_dict['video_autonumber'] = self._num_videos
          if info_dict.get('resolution') is None:
              info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@@ -1063,7 +1062,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
          # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences
          # of %(field)s to %(field)0Nd for backward compatibility
          field_size_compat_map = {
-            'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0),
+            'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0),
              'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0),
              'autonumber': self.params.get('autonumber_size') or 5,
          }
@@ -1077,18 +1076,18 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
          # Field is of the form key1.key2...
          # where keys (except first) can be string, int or slice
          FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
-        MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+        MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
          MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
-        INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+        INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
              (?P<negate>-)?
-            (?P<fields>{field})
-            (?P<maths>(?:{math_op}{math_field})*)
+            (?P<fields>{FIELD_RE})
+            (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*)
              (?:>(?P<strf_format>.+?))?
              (?P<remaining>
                  (?P<alternate>(?<!\\),[^|&)]+)?
                  (?:&(?P<replacement>.*?))?
                  (?:\|(?P<default>.*?))?
-            )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+            )$''')
  
          def _traverse_infodict(k):
              k = k.split('.')
@@ -1314,7 +1313,17 @@ def check_filter():
                  except TypeError:
                      # For backward compatibility
                      ret = None if incomplete else match_filter(info_dict)
-                if ret is not None:
+                if ret is NO_DEFAULT:
+                    while True:
+                        filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME)
+                        reply = input(self._format_screen(
+                            f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip()
+                        if reply in {'y', ''}:
+                            return None
+                        elif reply == 'n':
+                            return f'Skipping {video_title}'
+                    return True
+                elif ret is not None:
                      return ret
              return None
  
@@ -1716,6 +1725,7 @@ def get_entry(i):
              entries.append(entry)
              try:
                  if entry is not None:
+                    # TODO: Add auto-generated fields
                      self._match_entry(entry, incomplete=True, silent=True)
              except (ExistingVideoReached, RejectedVideoReached):
                  broken = True
@@ -1766,14 +1776,15 @@ def get_entry(i):
              playlist_index, entry = entry_tuple
              if 'playlist-index' in self.params.get('compat_opts', []):
                  playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1
-            self.to_screen(f'[download] Downloading video {i} of {n_entries}')
+            self.to_screen('[download] Downloading video %s of %s' % (
+                self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
              # This __x_forwarded_for_ip thing is a bit ugly but requires
              # minimal changes
              if x_forwarded_for:
                  entry['__x_forwarded_for_ip'] = x_forwarded_for
              extra = {
                  'n_entries': n_entries,
-                '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+                '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
                  'playlist_count': ie_result.get('playlist_count'),
                  'playlist_index': playlist_index,
                  'playlist_autonumber': i,
@@ -2330,7 +2341,7 @@ def _fill_common_fields(self, info_dict, is_video=True):
                                       video_id=info_dict['id'], ie=info_dict['extractor'])
              elif not info_dict.get('title'):
                  self.report_warning('Extractor failed to obtain "title". Creating a generic title instead')
-                info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}'
+                info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}'
  
          if info_dict.get('duration') is not None:
              info_dict['duration_string'] = formatSeconds(info_dict['duration'])
@@ -2343,11 +2354,9 @@ def _fill_common_fields(self, info_dict, is_video=True):
              if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
                  # Working around out-of-range timestamp values (e.g. negative ones on Windows,
                  # see http://bugs.python.org/issue1646728)
-                try:
+                with contextlib.suppress(ValueError, OverflowError, OSError):
                      upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
                      info_dict[date_key] = upload_date.strftime('%Y%m%d')
-                except (ValueError, OverflowError, OSError):
-                    pass
  
          live_keys = ('is_live', 'was_live')
          live_status = info_dict.get('live_status')
@@ -2447,10 +2456,11 @@ def sanitize_numeric_fields(info):
          else:
              formats = info_dict['formats']
  
-        info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
+        # or None ensures --clean-infojson removes it
+        info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
          if not self.params.get('allow_unplayable_formats'):
              formats = [f for f in formats if not f.get('has_drm')]
-            if info_dict['__has_drm'] and all(
+            if info_dict['_has_drm'] and all(
                      f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
                  self.report_warning(
                      'This video is DRM protected and only images are available for download. '
@@ -3145,16 +3155,16 @@ def fixup():
                      if fixup_policy in ('ignore', 'never'):
                          return
                      elif fixup_policy == 'warn':
-                        do_fixup = False
+                        do_fixup = 'warn'
                      elif fixup_policy != 'force':
                          assert fixup_policy in ('detect_or_warn', None)
                          if not info_dict.get('__real_download'):
                              do_fixup = False
  
                      def ffmpeg_fixup(cndn, msg, cls):
-                        if not cndn:
+                        if not (do_fixup and cndn):
                              return
-                        if not do_fixup:
+                        elif do_fixup == 'warn':
                              self.report_warning(f'{vid}: {msg}')
                              return
                          pp = cls(self)
@@ -3277,9 +3287,9 @@ def sanitize_info(info_dict, remove_private_keys=False):
          info_dict.setdefault('_type', 'video')
  
          if remove_private_keys:
-            reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in {
+            reject = lambda k, v: v is None or k.startswith('__') or k in {
                  'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries',
-                'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
+                'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber',
              }
          else:
              reject = lambda k, v: False
@@ -3301,6 +3311,17 @@ def filter_requested_info(info_dict, actually_filter=True):
          ''' Alias of sanitize_info for backward compatibility '''
          return YoutubeDL.sanitize_info(info_dict, actually_filter)
  
+    def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None):
+        for filename in set(filter(None, files_to_delete)):
+            if msg:
+                self.to_screen(msg % filename)
+            try:
+                os.remove(filename)
+            except OSError:
+                self.report_warning(f'Unable to delete file {filename}')
+            if filename in info.get('__files_to_move', []):  # NB: Delete even if None
+                del info['__files_to_move'][filename]
+
      @staticmethod
      def post_extract(info_dict):
          def actual_post_extract(info_dict):
@@ -3333,14 +3354,8 @@ def run_pp(self, pp, infodict):
              for f in files_to_delete:
                  infodict['__files_to_move'].setdefault(f, '')
          else:
-            for old_filename in set(files_to_delete):
-                self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
-                try:
-                    os.remove(encodeFilename(old_filename))
-                except OSError:
-                    self.report_warning('Unable to remove downloaded original file')
-                if old_filename in infodict['__files_to_move']:
-                    del infodict['__files_to_move'][old_filename]
+            self._delete_downloaded_files(
+                *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)')
          return infodict
  
      def run_all_pps(self, key, info, *, additional_pps=None):
@@ -3590,7 +3605,7 @@ def print_debug_header(self):
          def get_encoding(stream):
              ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__))
              if not supports_terminal_sequences(stream):
-                from .compat import WINDOWS_VT_MODE
+                from .compat import WINDOWS_VT_MODE  # Must be imported locally
                  ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
              return ret
  
@@ -3637,10 +3652,8 @@ def get_encoding(stream):
                  if re.match('[0-9a-f]+', out):
                      write_debug('Git HEAD: %s' % out)
              except Exception:
-                try:
+                with contextlib.suppress(Exception):
                      sys.exc_clear()
-                except Exception:
-                    pass
  
          def python_implementation():
              impl_name = platform.python_implementation()
@@ -3657,7 +3670,7 @@ def python_implementation():
          exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
          ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
          if ffmpeg_features:
-            exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
+            exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features))
  
          exe_versions['rtmpdump'] = rtmpdump_version()
          exe_versions['phantomjs'] = PhantomJSwrapper._version()
@@ -3666,20 +3679,12 @@ def python_implementation():
          ) or 'none'
          write_debug('exe versions: %s' % exe_str)
  
-        from .downloader.websocket import has_websockets
-        from .postprocessor.embedthumbnail import has_mutagen
-        from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE
-
-        lib_str = join_nonempty(
-            compat_brotli and compat_brotli.__name__,
-            has_certifi and 'certifi',
-            compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
-            SECRETSTORAGE_AVAILABLE and 'secretstorage',
-            has_mutagen and 'mutagen',
-            SQLITE_AVAILABLE and 'sqlite',
-            has_websockets and 'websockets',
-            delim=', ') or 'none'
-        write_debug('Optional libraries: %s' % lib_str)
+        from .compat.compat_utils import get_package_info
+        from .dependencies import available_dependencies
+
+        write_debug('Optional libraries: %s' % (', '.join(sorted({
+            join_nonempty(*get_package_info(m)) for m in available_dependencies.values()
+        })) or 'none'))
  
          self._setup_opener()
          proxy_map = {}